Let's say that you separated your domain according to the number of ranks (with mpi_cart_create) then you have the indices of each domain. Then I want to use gatherv to put back a global array in order to write the results. This code seems to work for 2 or 4 processes but doesn't for 8. I feel like I am missing something. There should be no error message.
Edit : each rank contains its local array. Its elements are equal to the rank of the process (from 0 to n_procs). What I am trying to archieve is to put back the global array with gatherv. This is for a finite volume code. My aim is just to reproduce the domain decomposition and what I desire with gatherv. Here, there should be no -10 in the print and the block should be clearly dissociable with the decomposition done by mpi.
The code works as follow : I separate my domain, compute the delimitation/indices associated to each domain, definite my new type that will be received in the global array, then calculate the displacement and use gatherv.
program scatter
use mpi
implicit none
integer,parameter :: nrows = 8,ncols = 8
integer, dimension(8,8) :: global
integer, dimension(2) :: dims_orig,dims,coo,sizes,subsizes,coo_init
logical,dimension(2) :: periods
integer, dimension(:,:),allocatable :: local
integer, dimension(:),allocatable :: displ,counts
integer :: info,code,rank,n_procs,comm2d
integer :: ndi,ndj,nni,nnj,nni_last,nnj_last
integer :: i1,i2,j1,j2,ii1,ii2,jj1,jj2,row,col
integer :: type0,type_subarray,sizeofint,i,j
integer :: imax,jmax,loc_size
integer(kind=mpi_address_kind) :: start,extend
logical :: reorder
! dimensions of the global array
imax = nrows
jmax = ncols
! default value of the global array
global = -10
call mpi_init(code)
call mpi_comm_rank(mpi_comm_world,rank,code)
call mpi_comm_size(mpi_comm_world,n_procs,code)
dims_orig = 0
CALL MPI_DIMS_CREATE(n_procs ,2,dims_orig,code)
! number of domain in each direction
ndi = dims_orig(1)
ndj = dims_orig(2)
! nni/nnj = sizes of each domain
nni = nrows/ndi
nnj = ncols/ndj
nni_last = nrows-(ndi-1)*nrows/ndi
nnj_last = ncols-(ndj-1)*ncols/ndj
dims(1) = ndi
dims(2) = ndj
periods = .false.
reorder = .true.
call mpi_cart_create(MPI_COMM_WORLD,2,dims,periods,reorder,comm2d,code)
call mpi_comm_rank(comm2d,rank,code)
call mpi_cart_get(comm2d,2,dims,periods,coo,code)
if(coo(1)==ndi-1) then
nni = nni_last
i2 = imax
i1 = i2-nni+1
else
i1 = rank/ndj*nni+1
i2 = i1+nni-1
endif
if(coo(2)==ndj-1) then
nnj = nnj_last
j2 = jmax
j1 = j2-nnj+1
else
j1 = MOD(rank,ndj)*nnj+1
j2 = j1+nnj-1
endif
print*,rank,"|",i1,i2,j1,j2
call mpi_barrier(mpi_comm_world,code)
! create new types
sizes = [imax, jmax]
subsizes = [i2-i1+1,j2-j1+1]
coo_init = [i1-1 ,j1-1 ]
call mpi_type_create_subarray(2,sizes,subsizes,coo_init,&
mpi_order_fortran,MPI_integer,type0,code)
call mpi_type_size(MPI_INTEGER,sizeofint,code)
start = 0
extend = sizeofint*nnj
! call mpi_type_get_extent(type0,start,extend,code)
call MPI_TYPE_CREATE_RESIZED(TYPE0,start,extend,TYPE_SUBARRAY,info)
!type_subarray = type0
call MPI_TYPE_COMMIT(TYPE_SUBARRAY,code)
allocate(displ(ndi*ndj))
! forall(col=1:ndj,row=1:ndi)
! displ(1+(row-1)+(col-1)*ndi) = (row-1)+(col-1)*imax
! ! displ(1+(row-1)*ndi+(col-1)) = (row-1)*ndi+(col-1)
! endforall
! computing the displacement
do i =1,ndi
do j = 1,ndj
displ(1+(i-1)+(j-1)*ndj) = (i-1) + (j-1)*imax
enddo
enddo
allocate(local(i1:i2,j1:j2))
allocate(counts(n_procs))
counts = 1
local = rank
loc_size = (i2-i1+1)*(j2-j1+1)!(i2-i1+1)*(j2-j1+1)
call mpi_gatherv(local,loc_size,MPI_integer,&
global,counts,displ,type_subarray,0,&
MPI_COMM_WORLD,code)
call mpi_barrier(mpi_comm_world,code)
if(rank==0) then
do i = 1,nrows
print*,global(i,:)
enddo
endif
call mpi_finalize(code)
endprogram scatter
Related
I'm trying to compute the length of bond chains for the COMMON neighbours using a subroutine, but I get the error
FOR3414: invalid recursive self-reference to BOND detected between CALL and BOND.
This below is the subroutine where I get the error:
SUBROUTINE bond (kk)
! local variables
parameter (natmax=50000,nvoisin_com_max=50,max_chain=100)
parameter (nvoisin_max=50,ibond_max=100)
integer ia,ja,kk,jj,nvoisin_com,nb,nc,index
integer length (max_chain),pvoisin_com(nvoisin_com_max)
integer visit(nvoisin_com_max)
integer n_voisin(natmax),pvoisin(natmax,nvoisin_max)
logical voisin_logic
! COMMON variables
COMMON /e1/ n_voisin
COMMON /e2/ pvoisin
COMMON /e3/ nvoisin_com,pvoisin_com
COMMON /e4/ length,visit
COMMON /e5/ nb,nc
index = 0
visit (kk) = 1
! if index = 0 at the end, before return a new chain is added
! (the atoms is the end of a chain)
ia = pvoisin_com (kk)
do jj = 1, nvoisin_com
if (kk.eq.jj) cycle
if (visit(jj).eq.1) cycle
ja = pvoisin_com (jj)
call voisin (ia,ja,voisin_logic)
if (voisin_logic) then
nb = nb + 1
index = 1
call bond (jj)
endif
enddo
if (index.eq.0) then
nc = nc + 1
length(nc) = nb
endif
nb = nb - 1
visit (kk) = 0
RETURN
END
I have a fortran code that computes the solution vector using the thomas algorithm subroutine.
I want the solution vector to run in a loop for a certain number of time.
How do i call this subroutine in the loop?
my subroutine is the thomas algorithm subroutine.
It returns the solution vector u but I want it to use the vectors NN times in a loop. So the old u becomes the new u to use in the subroutine.
How do I do this?
Below is the what i tried
program thomasalg2
implicit double precision(A-H,O-Z)
real*8, dimension(9,1) :: a,b,c,r,u,uold!the dimension is subject to change depending on the size of the new matrix
!real*8, dimension(9,50) :: W
real*8 :: pi
real*8 :: h,k,lm,l,T
integer :: i,j,al,NN,n
l = 1!right endpoint on the X-axis
n = 9 !number of rols/cols of the coefficient matrix with boundaries included
T = 0.5 !maximum number of the time variable
NN = 50!number of time steps
np = n
h = l/n
k = T/NN
al = 1.0D0 !alpha
pi = dacos(-1.0D0)
lm = (al**2)*(k/(h**2)) !lambda
do i = 1,n
r(i,1) = sin(pi*i*h) !this is W_0
end do
a(1,1) = 0.0D0
do i = 2,n
a(i,1) = -lm
end do
do i = 1,n
b(i,1) = 1 + (2*lm)
end do
c(9,1) = 0.0D0
do i = 1,n-1
c(i,1) = -lm
end do
!the 3 diagonals are stored in the 1st, 2nd, 3rd & 4th files respectively.
open(10, file = 'thom1.txt')
open(11, file = 'thom2.txt')
open(12, file = 'thom3.txt')
open(13, file = 'thom4.txt')
write(10,*)
do i = 1,n
write(10,*) a(i,1)
end do
write(11,*)
do i = 1,n
write(11,*) b(i,1)
end do
write(12,*)
do i = 1,n
write(12,*) c(i,1)
end do
write(13,*)
do i = 1,n
write(13,*) r(i,1)
end do
open(14, file = 'tridag2.txt')
write(14,*)
n = 9
do i = 1,n
write(14,*) a(i,1),b(i,1),c(i,1),r(i,1) !write the given vectors in the file in the form of a column vector
end do
call tridag(a,b,c,r,u,n)
!solve the given system and return the solution vector u
do i = 1,NN
call tridag(a,b,c,r,u,n)
!write(15,*) u
r = u
end do
open(15, file = 'tridag2u.txt')
write(15,*)
!write the solution vector in the form of a column vector
do i = 1,n
write(15,*) u(i,1)
end do
!print *, "Your data has been written in 'tridag2.txt'"
end program thomasalg2
subroutine tridag(a,b,c,r,u,n)
implicit double precision (A-H, O-Z)
integer n, NMAX
real*8 a(n), b(n), c(n), r(n), u(n)
parameter (NMAX = 500)
integer j
real*8 bet, gam(NMAX)
if(b(1).eq.0.) stop "tridag: rewrite equations"
bet = b(1)
u(1)=r(1)/bet
do j = 2,n
gam(j) = c(j-1)/bet
bet = b(j)-a(j)*gam(j)
if (bet.eq.0.) stop "tridag failed"
u(j) = (r(j)-a(j)*u(j-1))/bet
end do
do j = n-1,1,-1
u(j) = u(j)-gam(j+1)*u(j+1)
end do
!print *, "The solution is", u
return
end subroutine
I want to run a Fortran program which calls a subroutine that I want to parallelize with MPI. I know this sounds complicated, but I want to be able to specify the number of processes for each call. What I would want to use is a structure like this:
program my_program
implicit none
!Define variables
nprocs = !formula for calculating number of processes.
call my_subroutine(output,nprocs,other input vars)
end my_program
I want to run my_subroutine with the same effect as this:
mpirun -n nprocs my_subroutine.o
where my_subroutine has been compiled with 'other input vars.'
Is this possible?
Here is a simple example. I try compiling as follows:
$ mpif90 -o my_program WAVE_2D_FP_TUNER_mpi.f90 randgen.f SIMPLE_ROUTINE.f90
I try to run it like this:
$ mpirun -np (1 or 2) my_program
PROGRAM WAVE_2D_FP_TUNER_mpi
USE MPI
IMPLICIT NONE
REAL(KIND=8) :: T,PARAM(1:3),Z,ZBQLU01
REAL(KIND=8) :: ERRORS,COSTS,CMAX,CMAX_V(1:1000),THRESHOLD,Z_MIN,Z_MAX
REAL(KIND=8) :: U,S,R(1:6),MATRIX(1:15)
INTEGER :: EN,INC,I,J,M,P
INTEGER :: NPROCS,IERR
!0.8,-0.4,0.4,10,4,4,7 -- [0.003,0.534]
!0.8,-0.2,0.2,10,4,4,7 -- [0.190,0.588]
CALL MPI_INIT(IERR)
CALL MPI_COMM_SIZE(MPI_COMM_WORLD,NPROCS,IERR)
THRESHOLD = 0.D0
EN = 81
INC = 1
Z_MIN = -2.D-1; Z_MAX = 2.D-1
T = 1.D0
PARAM(1) = 10.D0; PARAM(2) = 4.D0; PARAM(3) = 4.D0
CMAX = 7.D0 !Max that wave speed could possibly be.
CALL ZBQLINI(0.D0)
OPEN(UNIT = 1, FILE = "TUNER_F.txt")
WRITE(1,*) 'Grid Size: '
WRITE(1,*) T/(EN-1)
DO P = 1,15
S = 0
Z = Z_MIN + (1.d0/(15-1))*dble((P-1))*(Z_MAX - Z_MIN)
WRITE(1,*) 'Z: ',Z
DO I = 1,1000
DO J = 1,6
R(J) = ZBQLU01(0.D0)
END DO
!CALL PDE_WAVE_F_mpi(T,PARAM,R,Z,CMAX,EN,INC,NPROCS,U)
CALL SIMPLE_ROUTINE(T,PARAM,R,Z,CMAX,EN,INC,NPROCS,U)
IF (U<=threshold) THEN
S = S + 1.D0
ELSE
S = S + 0.D0
END IF
END DO
MATRIX(P) = (1.D0/1000)*S
END DO
DO I = 1,15
WRITE(1,*) MATRIX(I)
END DO
PRINT *,MINVAL(MATRIX)
PRINT *,MAXVAL(MATRIX)
CLOSE(1)
CALL MPI_FINALIZE(IERR)
END PROGRAM WAVE_2D_FP_TUNER_mpi
Here is the subroutine that I wish to parallelize with mpi.
SUBROUTINE SIMPLE_ROUTINE(T,PARAM,R,Z,CMAX,EN,INC,NPROCS,U)
! Outputs scalar U = T*Z*CMAX*INC*SUM(PARAM)*SUM(R)*SUM(Y)
USE MPI
IMPLICIT NONE
REAL(KIND=8), INTENT(IN) :: T,PARAM(1:3),R(1:6),Z,CMAX
INTEGER, INTENT(IN) :: EN,INC
INTEGER, INTENT(IN) :: NPROCS
REAL(KIND=8), INTENT(OUT) :: U
REAL(KIND=8) :: H,LOCAL_SUM,SUM_OF_X
REAL(KIND=8), DIMENSION(:), ALLOCATABLE :: X
INTEGER :: PX,PX_MAX,NXL,REMX,IX_OFF,P_LEFT,P_RIGHT
INTEGER :: J
INTEGER :: IERR,MYID
! Broadcast nprocs handle to all processes in MPRI_COMM_WORLD
CALL MPI_BCAST(&NPROCS, NPROCS, MPI_INT, 0, MPI_COMM_WORLD,IERR)
! Create subcommunicator SUBCOMM (Do not know how to define WORLD_GROUP?)
CALL MPI_COMM_SPLIT(MPI_COMM_WORLD,WORLD_GROUP,SUBCOMM,IERR)
! Assign IDs to processes in SUBCOMM
CALL MPI_COMM_RANK(SUBCOMM,MYID,IERR)
! Give NPROCS - 1 to SUBCOMM
CALL MPI_COMM_SIZE(SUBCOMM,NPROCS-1,IERR)
H = 2.D0/(EN-1)
! LABEL THE PROCESSES FROM 1 TO PX_MAX.
PX = MYID + 1
PX_MAX = NPROCS
! SPLIT UP THE GRID IN THE X-DIRECTION.
NXL = EN/PX_MAX !nxl = 10/3 = 3
REMX = EN-NXL*PX_MAX !remx = 10-3*3 = 1
IF (PX .LE. REMX) THEN !for px = 1,nxl = 3
NXL = NXL+1 !nxl = 4
IX_OFF = (PX-1)*NXL !ix_off = 0
ELSE
IX_OFF = REMX*(NXL+1)+(PX-(REMX+1))*NXL !for px = 2 and px = 3, ix_off = 1*(3+1)+(2-(1+1))*3 = 4, ix_off = 1*(3+1)+(3-(1+1))*3 = 7
END IF
! ALLOCATE MEMORY FOR VARIOUS ARRAYS.
ALLOCATE(X(0:NXL+1))
X(:) = (/(-1.D0+DBLE(J-1+IX_OFF)*H, J=1,EN)/)
LOCAL_SUM = SUM(X(1:NXL))
CALL MPI_REDUCE(LOCAL_SUM,SUM_OF_X,1,&
MPI_DOUBLE_PRECISION,MPI_SUM,&
0,MPI_COMM_WORLD,IERR)
U = T*Z*CMAX*INC*SUM(PARAM)*SUM(R)*SUM_OF_X
DEALLOCATE(X)
CALL MPI_COMM_FREE(SUBCOMM,IERR)
CALL MPI_BARRIER(MPI_COMM_WORLD,IERR)
END SUBROUTINE SIMPLE_ROUTINE
Ultimately, I want to be able to change the number of processors used in the subroutine, where I want nprocs to be calculated from the value of EN.
A simple approach is to start the MPI app with the maximum number of processes.
Then my_subroutine will first MPI_Bcast(&nprocs, ...) and MPI_COMM_SPLIT(MPI_COMM_WORLD, ..., &subcomm) in order to create a sub communicator subcomm with nprocs
(you can use MPI_UNDEFINED so the "other" communicator will be MPI_COMM_NULL.
Then the MPI tasks that are part of subcomm will perform the computation.
Finally, MPI_Comm_free(&subcomm) and MPI_Barrier(MPI_COMM_WORLD)
From a performance point of view, note sub-communicator creation can be expensive, but hopefully not significant compared to the computation time.
If not, you'd rather revamp your algorithm so it can have nprocs tasks do the job, and the other ones waiting.
An other approach would be to start your app with one MPI task, MPI_Comm_spawn() nprocs-1 tasks, merge the inter-communicator, perform the computation, and terminates the spawned tasks.
The overhead of task creation is way more important, and this might not be fully supported by your resource manager, so I would not advise this option.
I'm trying to compile and run a relatively old code from a PhD thesis, you can find whole code in Appendices C and D of this document.
Here is necessary parts from the code:
from wfMath.f90 :
subroutine wfmath_gaussian(widthz,pz)
use progvars
implicit none
real*8, intent(in) :: widthz ! the width of the wavepacket
real*8, intent(in) :: pz ! momentum
integer :: nR
real*8 :: rvalue
complex*16 :: cvalue
! complex*16 :: psi !!!ORIGINAL CODE LINE 23/02/2018. Saba
complex*16, dimension(1) :: psi !!! psi is originally defined as a scalar. But wfmath_normalize(wf) takes a rank-1 tensor as
!argument. So here I change the declaration of psi from a scalar to a rank-1 tensor contains only one element. 23/02/2018. Saba
real*8 :: z2
z2 = minz + deltaz
do nR=1, nz
rvalue = exp( -((z2-centerz)/widthz)**2 /2 )/ (2*pi*widthz) !!!ORIGINAL CODE LINE
!rvalue = exp( -((z2-centerz)/1.0d0)**2 /2 )/ (2*pi*1.0d0)
cvalue = cdexp( cmplx(0.0,1.0)*(pz*z2))
psi(nR) = rvalue * cvalue
z2 = z2 + deltaz ! next grid position in x-direction
enddo
call wfmath_normalize(psi)
end subroutine wfmath_gaussian
from tdse.f90 - main part :
subroutine init
use progvars
use strings;
use wfMath;
use wfPot;
!use tdseMethods;
implicit none
integer :: nloop
real*8 :: widthz,pz
select case (trim(molecule))
case("H2")
mass = 917.66d0; nz = 2048; deltaz = 0.05d0;
case("D2")
mass = 1835.241507d0; nz = 1024; deltaz = 0.05d0;
case("N2")
mass = 12846.69099d0; nz = 512; deltaz = 0.01d0;
case("O2")
mass = 14681.93206d0; nz =8192 ; deltaz = 0.005d0;
case("Ar2")
mass = 36447.94123d0; nz = 65536; deltaz = 0.002d0;
end select
maxt = 33072.80d0 !800fs ! maximum time
deltat = 1.0d0 ! delta time
widthz = 1.0d0 ! width of the gaussian
minz = 0.05d0 ! minimum z in a.u.
maxz = nz * deltaz ! maximum z in a.u.
centerz = 2.1d0 ! center of the gaussian
nt = NINT(maxt/deltat) ! time steps
pz = 0.d0 ! not used currently
!_____________________________FFT Section____________________________________________
deltafft = 20.d0* deltat !1.0d0*deltat ! time step for FFT
nfft = NINT(maxt/deltafft) ! no of steps for FFT
!_________________________absorber parameters_______________________________________
fadewidth = 10.d0 ! the width of the absorber in a.u.
fadestrength = 0.01d0 ! the maximum heigth of the negative imaginary potential
!_________________________E FIELD section_____________________________________________
Ewidth = 1446.2d0 !35fs ! width of the envelope
Eo = 0.053 !E14 ! field amplitude
Eomega = 0.057d0 !800nm ! laser frequency
! Eomega = 0.033d0 !1400nm ! laser frequency
Ephi = 0.d0 ! carrier envelope phase
Eto = 1000.d0 ! ecenter of the Gaussian envelope
EoPed = 0.0755 !2E14
EwidthPed = 826.638 !20fs
EomegaPed = Eomega
EphiPed = 0.d0
EtoPed = 1000.d0
EoPump = 0.053 !1E14 0.00285d0
EwidthPump = Ewidth
EomegaPump = 0.057d0
EphiPump = 0.0d0
EtoPump = 0.d0
includeAbsorber = .true. ! switch for absorber
includeField = .true. ! .false. ! switch for efield
includePedestal = .false. ! switch for pedestal
includeConstantPump = .true. ! .false. ! switch for efield
useADK = .false. ! ADK switch
calculatePowerSpectra = .true.
calculateKERPowerSpectra = .true. !.false.
!_____________________________Printing & Plotting Filters__________________________________
printFilter = nz
maxFrequencyFilter = 500
printInterval =100 !200
! print filter upper boundary check
if(printFilter > nz) then
printFilter = nz
end if
call allocateArrays();
do nloop = 1,nz
Z(nloop) = minz+ (nloop)* deltaz;
P(nloop) = 2*pi*(nloop-(nz/2)-1)/(maxz-minz);
E(nloop) = 27.2*(P(nloop)**2)/(4.d0*mass);
end do
! call wfmath_gaussian(psiground,widthz,pz) !!! ORIGINAL CODE LINE
call wfmath_gaussian(psiground,real(widthz),pz) ! Attempt to solve mismatch error. Does not work. Saba 24/02/2018
! call wfmath_gaussian(psiground,1.0d0,pz) ! Attempt to solve mismatch error. Does not work. Saba 23/02/2018
! call wfmath_gaussian(pz) ! Attempt to solve mismatch error. Does not work. Saba 23/02/2018
call setabsorber_right(fadewidth, fadestrength)
call printpsi(psiground,trim(concat(outputFolder,"psi_gausssian.dat")))
call potentials_init(nz) !initialize potential arrays
call read_potential();
end subroutine init
As far as I understand, there is no mismatch at all. widthz is declared as real*8 in main part of the code (subroutine init), and subroutine wfmath_gaussian(...) expects widthz to be a real*8. I can't see where this mismatch error occurs?
used compiler: GNU Fortran 6.3.0
used compile line: $gfortran tdse.f90
error message:
tdse.f90:159:118:
call wfmath_gaussian(psiground,real(widthz),pz) ! Attempt to solve mismatch error. Does not work. Saba 24/02/2018
(1)
Error: Type mismatch in argument 'widthz' at (1); passed COMPLEX(8) to REAL(8)
Thanks in advance...
I want to read data from some .dat files to a Fortran code for postprocessing. As a test case, I am just using one processor for MPI and trying to read single data file to my code. The content of the data file is as follows:
qout0050.dat : 1 1 1
However, the matrix (Vn in this case) which is supposed to store the content of this data file shows all 0 values. The relevant part of the code which reads from data file and store to the matrix is as follows:
subroutine postproc()
use precision_mod
use mpicomms_mod
implicit none
integer(kind=MPI_Offset_kind) :: i, j , igrid, k, l, disp, iproc, info, lwork
integer :: rst, numvar, ifile, number, num, step, ntot
integer :: Nx_max, Ny_max, Nz_max
integer :: Nxp, Nzp, Nyp, Ngrid
integer :: Ifirst, Ilast, Jfirst, Jlast, Kfirst, Klast
character*(64) :: fname, buffer, ffname
integer :: tmp, N1, N2, N3, tmpp
real(WP), allocatable :: qout(:,:,:,:), phi_xyz(:,:,:,:,:)
real(WP), allocatable :: Vn(:,:), Vntmp(:,:), TAU(:,:)
real(WP), allocatable :: Rprime(:,:), Rtmp(:,:), Rend(:,:), Q(:,:), tmpL(:), tmpG(:)
real(WP), allocatable :: s(:), vt(:,:), u(:,:), utmp(:,:)
real(WP), allocatable :: tmp1(:,:), tmp2(:,:), tmp3(:,:), phi(:,:)
integer, dimension(2) :: view
integer :: view1
integer, dimension(3) :: lsizes, gsizes, start
real(WP) :: tmpr
real(WP), allocatable :: work(:), mu(:), eigY(:,:), wr(:), wi(:), beta(:)
integer(kind=MPI_Offset_kind) :: SP_MOK, Nx_MOK, Ny_MOK, Nz_MOK, WP_MOK
open(unit=110,file='postparameters.dat',form="formatted")
read (110,*) Nx_max
read (110,*) Ny_max
read (110,*) Nz_max
read (110,*) numvar
read (110,*) step
close(110)
! Define the size of grid on each processor
if (mod(Nx_max,px).ne.0) then
write(*,*) 'Error in preproc: Nx_max is not devisable by px'
call MPI_ABORT(MPI_COMM_WORLD,0,ierr)
end if
Nxp = Nx_max/px
if (mod(Ny_max,py).ne.0) then
write(*,*) 'Error in preproc: Nx_max is not devisable by px'
call MPI_ABORT(MPI_COMM_WORLD,0,ierr)
end if
Nyp = Ny_max/py
if (mod(Nz_max,pz).ne.0) then
write(*,*) 'Error in preproc: Nx_max is not devisable by px'
call MPI_ABORT(MPI_COMM_WORLD,0,ierr)
end if
Nzp = Nz_max/pz
Ifirst = irank*Nxp + 1
Ilast = Ifirst + Nxp - 1
Jfirst = jrank*Nyp + 1
Jlast = Jfirst + Nyp - 1
Kfirst = krank*Nzp + 1
Klast = Kfirst + Nzp - 1
! Setting the view for phi
gsizes(1) = Nx_max
gsizes(2) = Ny_max
gsizes(3) = Nz_max
lsizes(1) = Nxp
lsizes(2) = Nyp
lsizes(3) = Nzp
start(1) = Ifirst - 1
start(2) = Jfirst - 1
start(3) = Kfirst - 1
call MPI_TYPE_CREATE_SUBARRAY(3,gsizes,lsizes,start,&
MPI_ORDER_FORTRAN,MPI_REAL_SP,view,ierr)
call MPI_TYPE_COMMIT(view,ierr)
call MPI_TYPE_CREATE_SUBARRAY(3,gsizes,lsizes,start,&
MPI_ORDER_FORTRAN,MPI_REAL_WP,view1,ierr)
call MPI_TYPE_COMMIT(view1,ierr)
WP_MOK = int(8, MPI_Offset_kind)
Nx_MOK = int(Nx_max, MPI_Offset_kind)
Ny_MOK = int(Ny_max, MPI_Offset_kind)
Nz_MOK = int(Nz_max, MPI_Offset_kind)
! Reading the qout file
ffname = 'qout'
allocate(qout(Nxp,Nyp,Nzp,numvar))
allocate(Vn(Nxp*Nyp*Nzp*numvar,step))
do rst = 1,step
if (myrank == 0) print*, 'Step = ', 50 + rst -1
write(buffer,"(i4.4)") 50 + rst -1
fname = trim(ffname)//trim(buffer)
fname = trim('ufs')//":"// trim(fname)
fname = trim(adjustl(fname))//'.dat'
call MPI_FILE_OPEN(MPI_COMM_WORLD,fname,MPI_MODE_RDONLY,MPI_INFO_NULL,ifile,ierr)
call MPI_FILE_READ(ifile,Ngrid,1,MPI_INTEGER,status,ierr)
if (1 /= Ngrid) then
if (myrank == 0 ) write(*,*) Ngrid
endif
call MPI_FILE_READ(ifile,tmp,1,MPI_INTEGER,status,ierr)
if (tmp /= Nx_max) write(*,*) tmp
call MPI_FILE_READ(ifile,tmp,1,MPI_INTEGER,status,ierr)
if (tmp /= Ny_max) write(*,*) tmp
call MPI_FILE_READ(ifile,tmp,1,MPI_INTEGER,status,ierr)
if (tmp /= Nz_max) write(*,*) tmp
call MPI_FILE_READ(ifile,tmpr,1,MPI_REAL_WP,status,ierr)
call MPI_FILE_READ(ifile,tmpr,1,MPI_REAL_WP,status,ierr)
call MPI_FILE_READ(ifile,tmpr,1,MPI_REAL_WP,status,ierr)
call MPI_FILE_READ(ifile,tmpr,1,MPI_REAL_WP,status,ierr)
do l=1,numvar
disp = 4*4 + 4*WP_MOK + Nx_MOK*Ny_MOK*Nz_MOK*WP_MOK*(l-1)
call MPI_FILE_SET_VIEW(ifile,disp,MPI_REAL_WP,view1,"native",MPI_INFO_NULL,ierr)
call MPI_FILE_READ_ALL(ifile,qout(1:Nxp,1:Nyp,1:Nzp,l),Nxp*Nzp*Nyp, MPI_REAL_WP,status,ierr)
end do
call MPI_FILE_CLOSE(ifile,ierr)
!-----------------------------------------------
! Bluiding the snapshot matrix Vn --------------
!-----------------------------------------------
do i=1,numvar
do k=1,Nzp
do j=1,Nyp
Vn((1 + Nxp*(j-1) + Nxp*Nyp*(k-1) + Nxp*Nyp*Nzp*(i-1)):(Nxp*j + Nxp*Nyp*(k-1) + Nxp*Nyp*Nzp*(i-1)),rst) = qout(1:Nxp,j,k,i)
end do
end do
end do
end do
call MPI_BARRIER(MPI_COMM_WORLD,ierr)
deallocate(qout)