I'm currently working on a library of temporal integrators. I'm wondering what's best for computing efficiency: Computing the coefficients for every time step, or using them as parameters from an external repository.
The first case would be this way:
Subroutine getRKF45(this, dt, y, z)
Implicit none
Class(diffEc), Intent(InOut) :: this
Real(8), Intent(In) :: dt
Real(8), dimension(:), Intent(InOut) :: y, z
Real(8), dimension(6,this%n) :: k
Call field(this%deriv, this%x, this%t)
k(1,:) = this%deriv(:)
Call field(this%deriv, this%x+k(1,:)*dt*(1.d0/5.d0), this%t+dt*(1.d0/5.d0))
k(2,:) = this%deriv(:)
Call field(this%deriv, this%x+k(1,:)*dt*(3.d0/40.d0)+k(2,:)*dt*(9.d0/40.d0) &
, this%t + dt*(3.d0/10.d0))
k(3,:) = this%deriv(:)
Call field(this%deriv, this%x+k(1,:)*dt*(3.d0/10.d0)-k(2,:)*dt*(9.d0/10.d0) &
+k(3,:)*dt*(6.d0/5.d0), this%t + dt*(3.d0/5.d0))
k(4,:) = this%deriv(:)
Call field(this%deriv, this%x-k(1,:)*dt*(11.d0/54.d0)+k(2,:)*dt*(5.d0/2.d0) &
-k(3,:)*dt*(70.d0/27.d0)+k(4,:)*dt*(35.d0/27.d0), this%t + dt)
k(5,:) = this%deriv(:)
Call field(this%deriv, this%x+k(1,:)*dt*(1631.d0/55296.d0)+k(2,:)*dt*(175.d0/512.d0) &
+k(3,:)*dt*(575.d0/13824.d0)+k(4,:)*dt*(44275.d0/110592.d0) &
+k(5,:)*dt*(253.d0/4096.d0), this%t + dt*(7.d0/8.d0))
k(6,:) = this%deriv(:)
y(:) = this%x(:) + dt*(k(1,:)*(37.d0/378.d0)+k(3,:)*(250.d0/621.d0)+k(4,:)*(125.d0/594.d0) &
+k(6,:)*(512.d0/1771.d0))
z(:) = this%x(:) + dt*(k(1,:)*(2825.d0/27648.d0)+k(3,:)*(18575.d0/48384.d0) &
+k(4,:)*(13525.d0/55296.d0)+k(5,:)*(277.d0/14336.d0)+k(6,:)*(1.d0/4.d0))
End Subroutine getRKF45
And using coefficients pre-computed:
Subroutine getRKF45(this, dt, y, z)
Use IntegratorUtilities, only: rk45Coef
Implicit none
Class(diffEc), Intent(InOut) :: this
Real(8), Intent(In) :: dt
Real(8), dimension(:), Intent(InOut) :: y, z
Real(8), dimension(6,this%n) :: k
Call field(this%deriv, this%x, this%t)
k(1,:) = this%deriv(:)
Call field(this%deriv, this%x+k(1,:)*dt*rk45Coef(1), this%t+dt*rk45Coef(1))
k(2,:) = this%deriv(:)
Call field(this%deriv, this%x+k(1,:)*dt*rk45Coef(2)+k(2,:)*dt*rk45Coef(3) &
, this%t + dt*rk45Coef(4))
k(3,:) = this%deriv(:)
Call field(this%deriv, this%x+k(1,:)*dt*rk45Coef(4)-k(2,:)*dt*rk45Coef(5) &
+k(3,:)*dt*rk45Coef(6), this%t + dt*rk45Coef(7))
k(4,:) = this%deriv(:)
Call field(this%deriv, this%x-k(1,:)*dt*rk45Coef(8)+k(2,:)*dt*rk45Coef(9) &
-k(3,:)*dt*rk45Coef(10)+k(4,:)*dt*rk45Coef(11), this%t + dt)
k(5,:) = this%deriv(:)
Call field(this%deriv, this%x+k(1,:)*dt*rk45Coef(12)+k(2,:)*dt*rk45Coef(13) &
+k(3,:)*dt*rk45Coef(14)+k(4,:)*dt*rk45Coef(15) &
+k(5,:)*dt*rk45Coef(16), this%t + dt*rk45Coef(17))
k(6,:) = this%deriv(:)
y(:) = this%x(:) + dt*(k(1,:)*rk45Coef(18)+k(3,:)*rk45Coef(19)+k(4,:)*rk45Coef(20) &
+k(6,:)*rk45Coef(21))
z(:) = this%x(:) + dt*(k(1,:)*rk45Coef(22)+k(3,:)*rk45Coef(23) &
+k(4,:)*rk45Coef(24)+k(5,:)*rk45Coef(25)+k(6,:)*rk45Coef(26))
End Subroutine getRKF45
I've run some test and the time difference between the two schemes is not relevant. Does the compiler pre-process these coefficients multiplications?
Related
Today I'm trying to evaluate this differential equation for internal energy in a gas in Fortran 90:
du / dt = dT / dt = - λ / ρ
Where u is the internal energy and λ is the cooling function (and they are both functions of temperature T only). ρ is the mass density and we can assume it's constant.
I'm using a Runge-Kutta 2nd order method (heun), and I'm sure I wrote the actual solving algorithm correctly, but I'm pretty sure I'm messing up the implementation. I'm also not sure how to efficiently choose an arbitrary energy scale.
I'm implementing the Right Hand Side with this subroutine:
MODULE RHS
! right hand side
IMPLICIT NONE
CONTAINS
SUBROUTINE dydx(neq, y, f)
INTEGER, INTENT(IN) :: neq
REAL*8, DIMENSION(neq), INTENT(IN) :: y
REAL*8, DIMENSION(neq), INTENT(OUT) :: f
f(1) = -y(1)
END SUBROUTINE dydx
END MODULE RHS
And this is the Heun algorithm I'm using:
SUBROUTINE heun(neq, h, yold, ynew)
INTEGER, INTENT(IN) :: neq
REAL*8, INTENT(IN) :: h
REAL*8, DIMENSION(neq), INTENT(IN) ::yold
REAL*8, DIMENSION(neq), INTENT(OUT) :: ynew
REAL*8, DIMENSION(neq) :: f, ftilde
INTEGER :: i
CALL dydx(neq, yold, f)
DO i=1, neq
ynew(i) = yold(i) + h*f(i)
END DO
CALL dydx(neq, ynew, ftilde)
DO i=1, neq
ynew(i) = yold(i) + 0.5d0*h*(f(i) + ftilde(i))
END DO
END SUBROUTINE heun
Considering both lambda and rho are n-dimensional arrays, i'm saving the results in an array called u_tilde, selecting a starting condition at T = 1,000,000 K
h = 1.d0/n
u_tilde(1) = lambda(n)/density(n) ! lambda(3) is at about T=one million
DO i = 2, n
CALL heun(1, h*i, u_tilde(i-1), u_tilde(i))
ENDDO
This gives me this weird plot for temperature over time.
I would like to have a starting temperature of one million kelvin, and then have it cool down to 10.000 K and see how long it takes. How do I implement these boundary conditions?
What am I doing wrong in RHS and in setting up the calculation loop in the program?
Your implementation of dydx only assigns the first element.
Also, there is no need to define loops for each step, as Fortran90 can do vector operations.
For a modular design, I suggest implementing a custom type that holds your model data, like the mass density and the cooling coefficient.
Here is an example simple implementation, that only holds one scalar value, such that y' = -c y
module mod_diffeq
use, intrinsic :: iso_fortran_env, wp => real64
implicit none
type :: model
real(wp) :: coefficient
end type
contains
pure function dxdy(arg, x, y) result(yp)
type(model), intent(in) :: arg
real(wp), intent(in) :: x, y(:)
real(wp) :: yp(size(y))
yp = -arg%coefficient*y
end function
pure function heun(arg, x0, y0, h) result(y)
type(model), intent(in) :: arg
real(wp), intent(in) :: x0, y0(:), h
real(wp) :: y(size(y0)), k0(size(y0)), k1(size(y0))
k0 = dxdy(arg, x0, y0)
k1 = dxdy(arg, x0+h, y0 + h*k0)
y = y0 + h*(k0+k1)/2
end function
end module
and the above module is used for some cooling simulations with
program FortranCoolingConsole1
use mod_diffeq
implicit none
integer, parameter :: neq = 100
integer, parameter :: nsteps = 256
! Variables
type(model):: gas
real(wp) :: x, y(neq), x_end, h
integer :: i
! Body of Console1
gas%coefficient = 1.0_wp
x = 0.0_wp
x_end = 10.0_wp
do i=1, neq
if(i==1) then
y(i) = 1000.0_wp
else
y(i) = 0.0_wp
end if
end do
print '(1x," ",a22," ",a22)', 'x', 'y(1)'
print '(1x," ",g22.15," ",g22.15)', x, y(1)
! Initial Conditions
h = (x_end - x)/nsteps
! Simulation
do while(x<x_end)
x = x + h
y = heun(gas, x, y, h)
print '(1x," ",g22.15," ",g22.15)', x, y(1)
end do
end program
Note that I am only tracking the 1st element of neq components of y.
The sample output shows exponential decay starting from 1000
x y(1)
0.00000000000000 1000.00000000000
0.390625000000000E-01 961.700439453125
0.781250000000000E-01 924.867735244334
0.117187500000000 889.445707420492
0.156250000000000 855.380327695983
0.195312500000000 822.619637044785
0.234375000000000 791.113666448740
0.273437500000000 760.814360681126
0.312500000000000 731.675505009287
0.351562500000000 703.652654704519
0.390625000000000 676.703067251694
0.429687500000000 650.785637155231
0.468750000000000 625.860833241968
0.507812500000000 601.890638365300
0.546875000000000 578.838491418631
0.585937500000000 556.669231569681
...
Also, if you wanted the above to implement runge-kutta 4th order you can include the following in the mod_diffeq module
pure function rk4(arg, x0, y0, h) result(y)
type(model), intent(in) :: arg
real(wp), intent(in) :: x0, y0(:), h
real(wp) :: y(size(y0)), k0(size(y0)), k1(size(y0)), k2(size(y0)), k3(size(y0))
k0 = dxdy(arg, x0, y0)
k1 = dxdy(arg, x0+h/2, y0 + (h/2)*k0)
k2 = dxdy(arg, x0+h/2, y0 + (h/2)*k1)
k3 = dxdy(arg, x0+h, y0 + h*k2)
y = y0 + (h/6)*(k0+2*k1+2*k2+k3)
end function
I am trying to compile a Linux source file on Windows with the tool Cygwin. When I try to make, error occurs:
mpif90 -c -O3 -I/usr/include -Iobj -Jobj -o obj/sdf_output_point_ru.o
src/io/sdf_output_point_ru.f90
src/io/sdf_output_point_ru.f90:260:39:
MPI_STATUS_IGNORE, errcode)
1
error: There is no specific subroutine for the generic ‘mpi_file_write’ at (1)
src/io/sdf_output_point_ru.f90:265:37:
MPI_STATUS_IGNORE, errcode)
1
error: There is no specific subroutine for the generic ‘mpi_file_write’ at (1)
make: *** [Makefile:166:sdf_output_point_ru.o] error 1
The original source file of sdf_output_point_ru.o file is as follows:
MODULE sdf_output_point_ru
USE mpi
USE sdf_common
USE sdf_output
IMPLICIT NONE
CONTAINS
SUBROUTINE write_point_mesh_meta_r8(h, id, name, dim_labels, dim_units, &
dim_mults)
TYPE(sdf_file_handle) :: h
CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: id, name
CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: dim_labels(:), dim_units(:)
REAL(r8), DIMENSION(:), INTENT(IN), OPTIONAL :: dim_mults
INTEGER :: ndims
TYPE(sdf_block_type), POINTER :: b
INTEGER :: i, errcode
b => h%current_block
b%blocktype = c_blocktype_point_mesh
ndims = b%ndims
b%nelements = b%ndims * b%npoints
! Metadata is
! - mults REAL(r8), DIMENSION(ndims)
! - labels CHARACTER(id_length), DIMENSION(ndims)
! - units CHARACTER(id_length), DIMENSION(ndims)
! - geometry INTEGER(i4)
! - minval REAL(r8), DIMENSION(ndims)
! - maxval REAL(r8), DIMENSION(ndims)
! - npoints INTEGER(i8)
b%info_length = h%block_header_length + soi4 + soi8 &
+ (3 * ndims) * sof8 + 2 * ndims * c_id_length
b%data_length = b%nelements * b%type_size
! Write header
IF (PRESENT(id)) THEN
ALLOCATE(b%dim_labels(ndims), b%dim_units(ndims), b%dim_mults(ndims))
IF (PRESENT(dim_labels)) THEN
DO i = 1,ndims
CALL safe_copy_string(dim_labels(i), b%dim_labels(i))
ENDDO
ELSE
IF (ndims .GE. 1) CALL safe_copy_string('X', b%dim_labels(1))
IF (ndims .GE. 2) CALL safe_copy_string('Y', b%dim_labels(2))
IF (ndims .GE. 3) CALL safe_copy_string('Z', b%dim_labels(3))
ENDIF
IF (PRESENT(dim_units)) THEN
DO i = 1,ndims
CALL safe_copy_string(dim_units(i), b%dim_units(i))
ENDDO
ELSE
DO i = 1,ndims
CALL safe_copy_string('m', b%dim_units(i))
ENDDO
ENDIF
IF (PRESENT(dim_mults)) THEN
DO i = 1,ndims
b%dim_mults(i) = REAL(dim_mults(i),r8)
ENDDO
ELSE
DO i = 1,ndims
b%dim_mults(i) = 1.d0
ENDDO
ENDIF
CALL sdf_write_block_header(h, id, name)
ELSE
CALL write_block_header(h)
ENDIF
IF (h%rank .EQ. h%rank_master) THEN
CALL MPI_FILE_WRITE(h%filehandle, b%dim_mults, ndims, MPI_REAL8, &
MPI_STATUS_IGNORE, errcode)
DO i = 1,ndims
CALL sdf_safe_write_id(h, b%dim_labels(i))
ENDDO
DO i = 1,ndims
CALL sdf_safe_write_id(h, b%dim_units(i))
ENDDO
CALL MPI_FILE_WRITE(h%filehandle, b%geometry, 1, MPI_INTEGER4, &
MPI_STATUS_IGNORE, errcode)
CALL MPI_FILE_WRITE(h%filehandle, b%extents, 2 * ndims, MPI_REAL8, &
MPI_STATUS_IGNORE, errcode)
CALL MPI_FILE_WRITE(h%filehandle, b%npoints, 1, MPI_INTEGER8, &
MPI_STATUS_IGNORE, errcode)
ENDIF
h%current_location = b%block_start + b%info_length
b%done_info = .TRUE.
END SUBROUTINE write_point_mesh_meta_r8
SUBROUTINE write_point_mesh_meta_r4(h, id, name, dim_labels, dim_units, &
dim_mults)
TYPE(sdf_file_handle) :: h
CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: id, name
CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: dim_labels(:), dim_units(:)
REAL(r4), DIMENSION(:), INTENT(IN), OPTIONAL :: dim_mults
REAL(r8), DIMENSION(c_maxdims) :: dim_mults8
TYPE(sdf_block_type), POINTER :: b
INTEGER :: i
IF (PRESENT(dim_mults)) THEN
b => h%current_block
DO i = 1,b%ndims
dim_mults8(i) = REAL(dim_mults(i),r8)
ENDDO
CALL write_point_mesh_meta_r8(h, id, name, dim_labels, dim_units, &
dim_mults8)
ELSE
CALL write_point_mesh_meta_r8(h, id, name, dim_labels, dim_units)
ENDIF
END SUBROUTINE write_point_mesh_meta_r4
SUBROUTINE write_point_variable_meta_r8(h, id, name, units, mesh_id, mult)
TYPE(sdf_file_handle) :: h
CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: id, name, units
CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: mesh_id
REAL(r8), INTENT(IN), OPTIONAL :: mult
INTEGER :: ndims
TYPE(sdf_block_type), POINTER :: b
INTEGER :: errcode
b => h%current_block
b%blocktype = c_blocktype_point_variable
ndims = b%ndims
b%nelements = b%ndims * b%npoints
! Metadata is
! - mult REAL(r8)
! - units CHARACTER(id_length)
! - meshid CHARACTER(id_length)
! - npoints INTEGER(i8)
b%info_length = h%block_header_length + soi8 + sof8 + 2 * c_id_length
b%data_length = b%nelements * b%type_size
! Write header
IF (PRESENT(id)) THEN
CALL safe_copy_string(units, b%units)
CALL safe_copy_string(mesh_id, b%mesh_id)
IF (PRESENT(mult)) THEN
b%mult = REAL(mult,r8)
ELSE
b%mult = 1.d0
ENDIF
CALL sdf_write_block_header(h, id, name)
ELSE
CALL write_block_header(h)
ENDIF
IF (h%rank .EQ. h%rank_master) THEN
CALL MPI_FILE_WRITE(h%filehandle, b%mult, 1, MPI_REAL8, &
MPI_STATUS_IGNORE, errcode)
CALL sdf_safe_write_id(h, b%units)
CALL sdf_safe_write_id(h, b%mesh_id)
CALL MPI_FILE_WRITE(h%filehandle, b%npoints, 1, MPI_INTEGER8, &
MPI_STATUS_IGNORE, errcode)
ENDIF
h%current_location = b%block_start + b%info_length
b%done_info = .TRUE.
END SUBROUTINE write_point_variable_meta_r8
SUBROUTINE write_point_variable_meta_r4(h, id, name, units, mesh_id, mult)
TYPE(sdf_file_handle) :: h
CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: id, name, units
CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: mesh_id
REAL(r4), INTENT(IN), OPTIONAL :: mult
IF (PRESENT(mult)) THEN
CALL write_point_variable_meta_r8(h, id, name, units, mesh_id, &
REAL(mult,r8))
ELSE
CALL write_point_variable_meta_r8(h, id, name, units, mesh_id)
ENDIF
END SUBROUTINE write_point_variable_meta_r4
SUBROUTINE write_srl_pt_var_int_i8_r8(h, id, name, units, array, &
npoint_global, mesh_id, mult)
TYPE(sdf_file_handle) :: h
CHARACTER(LEN=*), INTENT(IN) :: id, name, units
INTEGER, DIMENSION(:), INTENT(IN) :: array
INTEGER(i8), INTENT(IN) :: npoint_global
CHARACTER(LEN=*), INTENT(IN) :: mesh_id
REAL(r8), INTENT(IN), OPTIONAL :: mult
INTEGER(i8) :: i, idx, npoint_max, npoint_rem
INTEGER :: errcode
TYPE(sdf_block_type), POINTER :: b
IF (npoint_global .LE. 0) RETURN
CALL sdf_get_next_block(h)
b => h%current_block
b%type_size = INT(h%soi,r4)
b%datatype = h%datatype_integer
b%mpitype = h%mpitype_integer
b%ndims = 1
b%npoints = npoint_global
! Write header
CALL write_point_variable_meta_r8(h, id, name, units, mesh_id, mult)
! Write the real data
IF (h%rank .EQ. h%rank_master) THEN
h%current_location = b%data_location
CALL MPI_FILE_SEEK(h%filehandle, h%current_location, MPI_SEEK_SET, &
errcode)
! This is all a bit messy, but it is necessary because MPI_FILE_WRITE
! accepts an INTEGER count of elements to write, which may not be
! big enough for npoint_global which is an INTEGER*8
npoint_max = HUGE(npoint_max)
npoint_rem = MOD(npoint_global, npoint_max)
idx = 1
DO i = 1, npoint_global / npoint_max
CALL MPI_FILE_WRITE(h%filehandle, array(idx), npoint_max, b%mpitype, &
MPI_STATUS_IGNORE, errcode)
idx = idx + npoint_max
ENDDO
CALL MPI_FILE_WRITE(h%filehandle, array(idx), npoint_rem, b%mpitype, &
MPI_STATUS_IGNORE, errcode)
ENDIF
h%current_location = b%data_location + b%data_length
b%done_data = .TRUE.
END SUBROUTINE write_srl_pt_var_int_i8_r8
SUBROUTINE write_srl_pt_var_int_i4_r8(h, id, name, units, array, &
npoint_global, mesh_id, mult)
TYPE(sdf_file_handle) :: h
CHARACTER(LEN=*), INTENT(IN) :: id, name, units
INTEGER, DIMENSION(:), INTENT(IN) :: array
INTEGER, INTENT(IN) :: npoint_global
CHARACTER(LEN=*), INTENT(IN) :: mesh_id
REAL(r8), INTENT(IN), OPTIONAL :: mult
CALL write_srl_pt_var_int_i8_r8(h, id, name, units, array, &
INT(npoint_global,i8), mesh_id, mult)
END SUBROUTINE write_srl_pt_var_int_i4_r8
SUBROUTINE write_srl_pt_var_int_i8_r4(h, id, name, units, array, &
npoint_global, mesh_id, mult)
TYPE(sdf_file_handle) :: h
CHARACTER(LEN=*), INTENT(IN) :: id, name, units
INTEGER, DIMENSION(:), INTENT(IN) :: array
INTEGER(i8), INTENT(IN) :: npoint_global
CHARACTER(LEN=*), INTENT(IN) :: mesh_id
REAL(r4), INTENT(IN) :: mult
CALL write_srl_pt_var_int_i8_r8(h, id, name, units, array, &
npoint_global, mesh_id, REAL(mult,r8))
END SUBROUTINE write_srl_pt_var_int_i8_r4
SUBROUTINE write_srl_pt_var_int_i4_r4(h, id, name, units, array, &
npoint_global, mesh_id, mult)
TYPE(sdf_file_handle) :: h
CHARACTER(LEN=*), INTENT(IN) :: id, name, units
INTEGER, DIMENSION(:), INTENT(IN) :: array
INTEGER, INTENT(IN) :: npoint_global
CHARACTER(LEN=*), INTENT(IN) :: mesh_id
REAL(r4), INTENT(IN) :: mult
CALL write_srl_pt_var_int_i8_r8(h, id, name, units, array, &
INT(npoint_global,i8), mesh_id, REAL(mult,r8))
END SUBROUTINE write_srl_pt_var_int_i4_r4
END MODULE sdf_output_point_ru
I wrote a program for matrix product state in Fortran 90. In this program, I use the Intel MKL library. When the compiler option is:
OPTIMIZE= -parallel -par-threshold90 -ipo -O3 -no-prec-div -fp-model fast=2 -xHost
LinkLine = $(OPTIMIZE) $(DIAGNOSE) -mkl -static-intel,
in the runtime, the memory keeps increasing.
If I change the compiler option to:
OPTIMIZE= -ipo -O3 -no-prec-div -fp-model fast=2 -xHost
LinkLine = $(OPTIMIZE) $(DIAGNOSE) -mkl=sequential -static-intel,
there is no problem, everything is fine.
Why does this happen? Is there any solution to this?
Follow the suggestions by Alexander, I post one of the subroutines (performing singular value decomposition for a matrix) in the following:
SUBROUTINE SVD(Theta,S1,S2,lambda,Din,Dout)
IMPLICIT NONE
INTEGER, INTENT(IN) :: Din, Dout
COMPLEX(kind=rKind),DIMENSION(Din,Din), INTENT(IN) :: Theta
REAL(kind=rKind), INTENT(OUT) :: lambda(Dout)
COMPLEX(kind=rKind), INTENT(OUT) :: S1(Din,Dout),S2(Din,Dout)
COMPLEX(kind=rKind) :: M(Din,Din)
REAL(kind=rKind) :: temp_lam(Din)
COMPLEX(kind=rKind) :: temp_U(Din,Din), temp_V(Din,Din)
COMPLEX(kind=rKind) :: WORK(10*Din)
REAL(kind=rKind) :: RWORK(5*Din)
REAL(kind=rKind), PARAMETER :: Truncation = 1E-13_rKind
INTEGER :: INFO, i
!----------------------------
M = Theta
CALL ZGESVD('A','A',Din,Din,M,Din,temp_lam,temp_U,Din,temp_V,Din,WORK,10*Din,RWORK,INFO)
lambda = 0.0_rKind
S1 = 0.0_rKind
S2 = 0.0_rKind
DO i = 1,Dout
IF (temp_lam(i)/temp_lam(1)>Truncation) THEN
lambda(i) = temp_lam(i)
S1(:,i) = temp_U(:,i)
S2(:,i) = temp_V(i,:)
END IF
END DO
END SUBROUTINE SVD
And another subroutine is
SUBROUTINE TwoSiteUpdate(A_update,B_update,lambda_2_update,&
& A,B,lambda_1,lambda_2,U,D1,D2_old,D2_new)
! Designed for 1D MPS.
IMPLICIT NONE
INTEGER, INTENT(IN) :: D1, D2_old, D2_new
COMPLEX(kind=rKind), INTENT(IN) :: A(D1,D2_old,localSize), B(D2_old,D1,localSize)
REAL(kind=rKind), INTENT(IN) :: lambda_1(D1), lambda_2(D2_old)
COMPLEX(kind=rKind), INTENT(IN) :: U(localSize,localSize,localSize,localSize)
COMPLEX(kind=rKind), INTENT(OUT) :: A_update(D1,D2_new,localSize), B_update(D2_new,D1,localSize)
REAL(kind=rKind), INTENT(OUT) :: lambda_2_update(D2_new)
COMPLEX(kind=rKind) :: temp_A(D1,D2_old,localSize)
COMPLEX(kind=rKind) :: temp_B(D2_old,D1,localSize)
COMPLEX(kind=rKind) :: Theta(D1,localSize,D1,localSize)
COMPLEX(kind=rKind) :: Theta_S(D1*localSize,D1*localSize)
COMPLEX(kind=rKind) :: S1(D1*localSize,D2_new), S2(D1*localSize,D2_new)
COMPLEX(kind=rKind) :: S1_tmp(D1,localSize,D2_new)
COMPLEX(kind=rKind) :: S2_tmp(D1,localSize,D2_new)
REAL(kind=rKind), PARAMETER :: Truncation=1E-16_rKind
COMPLEX(kind=rKind) :: tmp_ele
INTEGER :: i1,i2,i3,im
INTEGER :: mA1,mB1,mA2,mB2
INTEGER :: i
!------------------- add lambda on A, B ----------------------
temp_A = 0.0_rKind
temp_B = 0.0_rKind
DO im = 1,localSize
DO i2 = 1,D2_old
DO i1 = 1,D1
IF(lambda_1(i1)>Truncation) THEN
temp_A(i1,i2,im) = A(i1,i2,im)*SQRT(lambda_1(i1))
temp_B(i2,i1,im) = B(i2,i1,im)*SQRT(lambda_1(i1))
END IF
END DO
END DO
END DO
!--------------- svd on Theta ----------------
Theta = 0.0_rKind
DO mB2 = 1,localSize
DO i2 = 1,D1
DO mA2 = 1,localSize
DO i1 = 1,D1
tmp_ele = 0.0_rKind
DO mB1 = 1,localSize
DO mA1 = 1,localSize
DO i3 = 1,D2_old
tmp_ele = tmp_ele + U(mA2,mB2,mA1,mB1)*temp_A(i1,i3,mA1)*temp_B(i3,i2,mB1)
END DO
END DO
END DO
Theta(i1,mA2,i2,mB2) = tmp_ele
END DO
END DO
END DO
END DO
Theta_S = RESHAPE(Theta,SHAPE=(/D1*localSize,D1*localSize/))
CALL SVD(Theta_S,S1,S2,lambda_2_update,D1*localSize,D2_new)
lambda_2_update = lambda_2_update/lambda_2_update(1)
S1_tmp = RESHAPE(S1,SHAPE=(/D1,localSize,D2_new/))
S2_tmp = RESHAPE(S2,SHAPE=(/D1,localSize,D2_new/))
!---------------- update A, B ----------------
A_update = 0.0_rKind
B_update = 0.0_rKind
DO im = 1,localSize
DO i2 = 1,D2_new
DO i1 = 1,D1
IF (lambda_1(i1)>Truncation) THEN
A_update(i1,i2,im) = S1_tmp(i1,im,i2)*SQRT(lambda_2_update(i2))/SQRT(lambda_1(i1))
B_update(i2,i1,im) = S2_tmp(i1,im,i2)*SQRT(lambda_2_update(i2))/SQRT(lambda_1(i1))
END IF
END DO
END DO
END DO
END SUBROUTINE TwoSiteUpdate
Above two subroutines are the time consuming part of the program. Without these two subroutines, the program is fine and the memory does not increase. With these two, the memory do increase when I use the parallel version.
You may check whether you used compound type with allocatable items in loops. You may also use OpenMP parallel directives and the -openmp compile option to manual control where to be paralleled, rather than the auto-parallel, as follows
$omp parallel do
do i = 1, n
...
$ ifort -openmp foo.f90
I want to apply three different methods, selected with the value of an integer switch. The first method uses two integers, the second a real array and an integer and the third a real 2D array. In my current implementation, I allocate and pass as parameters all the above data (2 int + real_array + int + real_2array). I could also use a module, but it would be similar. I'm searching for a method to define only the data that my method will use (i.e. only the matrix for method 3) and nothing else. Any suggestions?
Edit:
I have made a simplified version of what I described above.
A part of the main program:
INTEGER :: m, imeth
REAL*8 :: x, y
REAL*8, DIMENSION(:), ALLOCATABLE :: uu, wc
REAL*8, DIMENSION(:,:), ALLOCATABLE :: BCH
imeth = 0
m = 64
ALLOCATE(uu(m), uu_(m), wc(m))
ALLOCATE(BCH(m,m))
if (imeth .EQ. 0) then
x = 1.0d0
y = 2.0d0
elseif (imeth .EQ. 1) then
!Assign values to wc
else
!Assign values to BCH
endif
call subr(m,x,y,uu,uu_,imeth,BCH,wc)
STOP
END
and a subroutine
SUBROUTINE subr(n,a,b,u,u_,imeth,DCH,ws)
IMPLICIT NONE
INTEGER, INTENT(IN) :: n, imeth
REAL*8, INTENT(IN) :: u(n), DCH(n,n), ws(n)
REAL*8, INTENT(OUT) :: u_(n)
INTEGER :: i
if (imeth .EQ. 0) then
u_ = -u_ * 0.5d0 / (a+b)
elseif (imeth .EQ. 1) then
u_ = -u / ws
else
u_ = matmul(DCH,u)
endif
RETURN
END SUBROUTINE subr
I want the main program to have a form like
imeth = 0
m = 64
ALLOCATE(uu(m), uu_(m))
if (imeth .EQ. 0) then
a = 1.0d0
b = 2.0d0
elseif (imeth .EQ. 1) then
ALLOCATE(wc(m))
!Assign values to wc
else
ALLOCATE(BCH(m,m))
!Assign values to BCH
endif
if (imeth .EQ. 0) then
call subrA(m,x,y,uu,uu_)
elseif (imeth .EQ. 1) then
call subrB(m,wc,uu,uu_)
else
call subrC(m,BCH,uu,uu_)
endif
EDIT: After OP added the code I think that using optional arguments in conjunction with the present intrinsic might be better suited for this task. The subroutine could then read
SUBROUTINE subr(n,u_,a,b,u,DCH,ws)
IMPLICIT NONE
INTEGER, INTENT(IN) :: n
REAL*8, INTENT(OUT) :: u_(n)
REAL*8, INTENT(IN),OPTIONAL :: a(n)
REAL*8, INTENT(IN),OPTIONAL :: b(n)
REAL*8, INTENT(IN),OPTIONAL :: u(n)
REAL*8, INTENT(IN),OPTIONAL :: DCH(n,n)
REAL*8, INTENT(IN),OPTIONAL :: ws(n)
INTEGER :: i
if ( present(a) .and. present(b) ) then
u_ = -u_ * 0.5d0 / (a+b)
elseif ( present(u) .and. present(ws) ) then
u_ = -u / ws
elseif ( present(wch) .and. present(u) ) then
u_ = matmul(DCH,u)
else
stop 'invalid combination'
endif
END SUBROUTINE subr
Here is the old answer as it still might be helpful:
Maybe you could try interfaces:
module interface_test
implicit none
interface method
module procedure method1
module procedure method2
module procedure method3
end interface
contains
subroutine method1(int1, int2)
implicit none
integer,intent(in) :: int1
integer,intent(out) :: int2
int2 = 2*int1
end subroutine
subroutine method2(int, realArray)
implicit none
integer,intent(in) :: int
real,intent(out) :: realArray(:)
realArray = real(2*int)
end subroutine
subroutine method3(realArray)
implicit none
real,intent(inout) :: realArray(:,:)
realArray = 2*realArray
end subroutine
end module
program test
use interface_test, only: method
implicit none
integer :: int1, int2
real :: arr1D(10)
real :: arr2D(10,10)
int1 = 1
call method(int1, int2)
print *, int2
call method(int1,arr1D)
print *, arr1D(1)
arr2D = 1.
call method(arr2D)
print *, arr2D(1,1)
end program
I'm trying to parallelize the following code.
subroutine log_likelihood(y, theta, lli, ll)
doubleprecision, allocatable, intent(in) :: y(:)
doubleprecision, intent(in) :: theta(2)
doubleprecision, allocatable, intent(out) :: lli(:)
doubleprecision, intent(out) :: ll
integer :: i
ALLOCATE (lli(size(y)))
lli = 0.0d0
ll = 0.0d0
do i = 1, size(y)
lli(i) = -log(sqrt(theta(2))) - 0.5*log(2.0d0*pi) &
- (1.0d0/(2.0d0*theta(2)))*((y(i)-theta(1))**2)
end do
ll = sum(lli)
end subroutine log_likelihood
To do this, I'm trying to use MPI_ALLGATHER. This is the code I wrote
subroutine log_likelihood(y, theta, lli, ll)
doubleprecision, allocatable, intent(in) :: y(:)
doubleprecision, intent(in) :: theta(2)
doubleprecision, allocatable, intent(out) :: lli(:)
doubleprecision, intent(out) :: ll
integer :: i, size_y, diff
size_y=size(y)
ALLOCATE (lli(size_y))
!Broadcasting
call MPI_BCAST(theta, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
call MPI_BCAST(y, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
! Determine how many points to handle with each proc
points_per_proc = (size_y + numprocs - 1)/numprocs
! Determine start and end index for this proc's points
istart = proc_num * points_per_proc + 1
iend = min((proc_num + 1)*points_per_proc, size_y)
diff = iend-istart+1
allocate(proc_contrib(istart:iend))
do i = istart, iend
proc_contrib(i) = -log(sqrt(theta(2))) - 0.5*log(2.0d0*pi) &
- (1.0d0/(2.0d0*theta(2)))*((y(i)-theta(1))**2)
end do
call MPI_ALLGATHER(proc_contrib, diff, MPI_DOUBLE_PRECISION, &
lli, diff, MPI_DOUBLE_PRECISION, &
MPI_COMM_WORLD, ierr)
ll = sum(lli)
end subroutine log_likelihood
When I try to run my program, I get the following error.
$ mpiexec -n 2 ./mle.X
Fatal error in PMPI_Allgather: Internal MPI error!, error stack:
PMPI_Allgather(961)......: MPI_Allgather(sbuf=0x7ff2f251b860, scount=1500000, MPI_DOUBLE_PRECISION, rbuf=0x7ff2f2ad5650, rcount=3000000, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD) failed
MPIR_Allgather_impl(807).:
MPIR_Allgather(766)......:
MPIR_Allgather_intra(560):
MPIR_Localcopy(357)......: memcpy arguments alias each other, dst=0x7ff2f2ad5650 src=0x7ff2f251b860 len=12000000
===================================================================================
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= EXIT CODE: 1
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================
Can somebody please explain to me what I'm doing wrong?
Thanks!
I finally was able to solve my problem. The serial and parallel versions of my code are available at https://bitbucket.org/ignacio82/bhhh
subroutine log_likelihood(y, n, theta, lli, ll)
integer, intent(in) :: n
doubleprecision, intent(in) :: y(n)
doubleprecision, intent(in) :: theta(2)
doubleprecision, intent(out) :: lli(n)
doubleprecision, intent(out) :: ll
integer :: i
do i = istart, iend
proc_contrib(i-istart+1) = -log(sqrt(theta(2))) - 0.5*log(2.0d0*pi) &
- (1.0d0/(2.0d0*theta(2)))*((y(i)-theta(1))**2)
end do
if ( mod(n,numprocs)==0 ) then
call MPI_ALLGATHER(proc_contrib, points_per_proc, MPI_DOUBLE_PRECISION, &
lli, points_per_proc, MPI_DOUBLE_PRECISION, &
MPI_COMM_WORLD, ierr)
else if (numprocs-1 == proc_num ) then
recvcounts(numprocs) = iend-istart+1
call MPI_ALLGATHERV(proc_contrib, points_per_proc, MPI_DOUBLE_PRECISION, &
lli, recvcounts, displs, MPI_DOUBLE_PRECISION, &
MPI_COMM_WORLD, ierr)
end if
ll = sum(lli)
end subroutine log_likelihood