I am trying to execute this program using MPI that I have written in fortran:
Program CartesianGrid2D
Implicit None
Include 'mpif.h'
!----------------------------------------!
! Setting of the computational grid !
Integer, Parameter :: NDIM = 2 ! number of space dimensions
Integer, Parameter :: IMAX = 200 ! number of grid points in x-direction
Integer, Parameter :: JMAX = 200 ! number of grid points in y-direction
Real, Parameter :: x0=0.0 !min x-coord
Real, Parameter :: x1=1.0 !max x-coord
Real, Parameter :: y0=0.0 !min y-coord
Real, Parameter :: y1=1.0 !max y-coord
!----------------------------------------!
! Local variable declaration
TYPE tMPI
Integer :: myrank
Integer :: nCPU
Integer :: status(MPI_STATUS_SIZE)
Integer :: iStart, iEnd !idx of starting and ending cell in x-dir
Integer :: jStart, jEnd !idx of starting and ending cell in y-dir
Integer :: imax, jmax !number of cells within each rank
Integer, Allocatable :: mycoords(:) !point coords of the subgrid
Integer :: iErr !flag for errors in x-dir
Integer :: x_thread !number of CPUs in x-dir
Integer :: y_thread !number of CPUs in y-dir
End TYPE tMPI
TYPE(tMPI) :: MPI
Logical, Allocatable :: periods(:)
Integer, Allocatable :: dims(:)
Integer :: TCPU, BCPU, RCPU, LCPU !neighbor ranks of myrank
Integer :: i, j, idx, jdx, source
Integer :: COMM_CART !cartesian MPI communicator
Real :: dx, dy
Real, Allocatable :: x(:), y(:) ! grid coordinates
!----------------------------------------!
! 1) MPI initialization
CALL MPI_INIT(MPI%iErr)
CALL MPI_COMM_RANK(MPI_COMM_WORLD, MPI%myrank, MPI%iErr)
CALL MPI_COMM_SIZE(MPI_COMM_WORLD,MPI%nCPU,MPI%iErr)
! check the number of CPUs
If(MOD(MPI%nCPU,2).ne.0) then
print *, 'ERROR. Number of CPU must be even!'
CALL MPI_FINALIZE(MPI%iErr)
Stop
End if
CALL MPI_BARRIER(MPI_COMM_WORLD,MPI%iErr)
! 2) Create a Cartesian topology
! check the number of cells
If(MOD(IMAX,2).ne.0) then
print *, 'ERROR. Number of x-cells must be even!'
CALL MPI_FINALIZE(MPI%iErr)
Stop
End if
If(MOD(JMAX,2).ne.0) then
print *, 'ERROR. Number of y-cells must be even!'
CALL MPI_FINALIZE(MPI%iErr)
Stop
End if
! Domain decomposition
MPI%x_thread = MPI%nCPU/2
MPI%y_thread = MPI%nCPU - MPI%x_thread
Allocate(dims(NDIM), periods(NDIM), MPI%mycoords(NDIM))
dims = (/ MPI%x_thread, MPI%y_thread /)
periods = .FALSE.
CALL MPI_CART_CREATE(MPI_COMM_WORLD,NDIM,dims,periods,.TRUE.,COMM_CART,MPI%iErr)
CALL MPI_COMM_RANK(MPI_COMM_WORLD, MPI%myrank, MPI%iErr)
! 2.3) Find CPU neighbords
CALL MPI_CART_SHIFT(COMM_CART,0,1,source,RCPU,MPI%iErr)
CALL MPI_CART_SHIFT(COMM_CART,0,-1,source,LCPU,MPI%iErr)
CALL MPI_CART_SHIFT(COMM_CART,1,0,source,TCPU,MPI%iErr)
CALL MPI_CART_SHIFT(COMM_CART,-1,0,source,BCPU,MPI%iErr)
! coordinates of the subgrid
CALL MPI_CART_COORDS(COMM_CART,MPI%myrank,NDIM,MPI%mycoords,MPI%iErr)
MPI%imax = IMAX/MPI%x_thread
MPI%jmax = JMAX/MPI%y_thread
MPI%iStart = 1 + MPI%mycoords(1)*MPI%imax
MPI%iEnd = MPI%iStart + MPI%imax - 1
MPI%jStart = 1 + MPI%mycoords(2)*MPI%jmax
MPI%jEnd = MPI%jStart + MPI%jmax - 1
! 3) Comoute the real mesh
dx = (x1-x0)/Real(IMAX-1)
dy = (y1-y0)/Real(JMAX-1)
Allocate(x(MPI%IMAX))
Allocate(y(MPI%JMAX))
idx = 0
Do i = MPI%iStart, MPI%iEnd
idx = idx + 1
x(idx) = (x0-dx/2.) + (i-1)*dx
End do
jdx = 0
Do i = MPI%jStart, MPI%jEnd
jdx = jdx + 1
y(jdx) = (y0-dy/2.) + (j-1)*dy
End do
! 4) Plot the output and finalize the program
CALL ASCII_Output(x,y,MPI%imax,MPI%jmax,MPI%myrank)
CALL MPI_FINALIZE(MPI%iErr)
End program CartesianGrid2D
Subroutine ASCII_Output(x,y,imax,jmax,myrank)
!-----------------------------------------!
Implicit None
!-----------------------------------------!
Integer :: imax, jmax, myrank
Real :: x(imax), y(jmax)
Integer :: i, j, DataUnit
Character(len=10) :: cmyrank
Character(len=200) :: IOFileName
!-----------------------------------------!
Write(cmyrank,'(I4.4)') myrank
IOFileName = 'CartesianGrid_output-'//TRIM(cmyrank)//'.dat'
DataUnit = 100+myrank
Open(Unit=DataUnit, File=Trim(IOFileName), Status='Unknown', Action='Write')
Write(DataUnit,*) imax
Write(DataUnit,*) jmax
Do i = 1, imax
Write(DataUnit,*) x(i)
End do
Do j = 1, jmax
Write(DataUnit,*) y(j)
End do
Close(DataUnit)
End Subroutine ASCII_Output
However whenever I try to execute I have got this list of errors popping up:
Abort(795947788) on node 0 (rank 0 in comm 0): Fatal error in internal_Cart_create: Invalid argument, error stack:
internal_Cart_create(102): MPI_Cart_create(MPI_COMM_WORLD, ndims=2, dims=0x60000384c9f0, periods=0x60000384c9e0, reorder=1, comm_cart=0x16f8532a0) failed
MPIR_Cart_create_impl(43): Size of the communicator (6) is smaller than the size of the Cartesian topology (9)
Abort(863056652) on node 1 (rank 1 in comm 0): Fatal error in internal_Cart_create: Invalid argument, error stack:
internal_Cart_create(102): MPI_Cart_create(MPI_COMM_WORLD, ndims=2, dims=0x600003030760, periods=0x600003030860, reorder=1, comm_cart=0x16ef332a0) failed
MPIR_Cart_create_impl(43): Size of the communicator (6) is smaller than the size of the Cartesian topology (9)
Abort(460403468) on node 2 (rank 2 in comm 0): Fatal error in internal_Cart_create: Invalid argument, error stack:
internal_Cart_create(102): MPI_Cart_create(MPI_COMM_WORLD, ndims=2, dims=0x6000039647d0, periods=0x6000039647e0, reorder=1, comm_cart=0x16d05f2a0) failed
MPIR_Cart_create_impl(43): Size of the communicator (6) is smaller than the size of the Cartesian topology (9)
Abort(191968012) on node 3 (rank 3 in comm 0): Fatal error in internal_Cart_create: Invalid argument, error stack:
internal_Cart_create(102): MPI_Cart_create(MPI_COMM_WORLD, ndims=2, dims=0x600000b78470, periods=0x600000b784e0, reorder=1, comm_cart=0x16f75f2a0) failed
MPIR_Cart_create_impl(43): Size of the communicator (6) is smaller than the size of the Cartesian topology (9)
Abort(997274380) on node 4 (rank 4 in comm 0): Fatal error in internal_Cart_create: Invalid argument, error stack:
internal_Cart_create(102): MPI_Cart_create(MPI_COMM_WORLD, ndims=2, dims=0x6000039843e0, periods=0x6000039843d0, reorder=1, comm_cart=0x16ba532a0) failed
MPIR_Cart_create_impl(43): Size of the communicator (6) is smaller than the size of the Cartesian topology (9)
Abort(259076876) on node 5 (rank 5 in comm 0): Fatal error in internal_Cart_create: Invalid argument, error stack:
internal_Cart_create(102): MPI_Cart_create(MPI_COMM_WORLD, ndims=2, dims=0x6000016cab90, periods=0x6000016caaa0, reorder=1, comm_cart=0x16d3232a0) failed
MPIR_Cart_create_impl(43): Size of the communicator (6) is smaller than the size of the Cartesian topology (9)
What I first do is : mpif90 -cpp -lmpi NameOfTheProgram.f90 and then whenever I execute the a.out I do mpirun -np 6 ./a.out
Running this on a MacBook Air M1 (Whenever I have to run fortran I usually use a gfortran compiler).
Your computation
MPI%x_thread = MPI%nCPU/2
MPI%y_thread = MPI%nCPU - MPI%x_thread
makes no sense. As the error message indicates, the product of x_thread and y_thread is not equal to your communicator size.
Please use MPI_Dims_create to set these parameters.
Related
I am making a module in Fortran 90 to run PARPACK on a given matrix. I have an existing ARPACK code which functions normally as expected. I tried converting it into PARPACK and it runs into memory clear errors. I am fairly new to coding and fortran, please excuse any blunders I've made.
The code:
!ARPACK module
module parpack
implicit none
contains
subroutine parp
! use mpi
include '/usr/lib/x86_64-linux-gnu/openmpi/include/mpif.h'
integer comm, myid, nprocs, rc, nloc, status(MPI_STATUS_SIZE)
integer, parameter :: pres=8
integer nev, ncv, maxn, maxnev, maxncv
parameter (maxn=10**7, maxnev=maxn-1, maxncv=maxn)
! Arrays for SNAUPD
integer iparam(11), ipntr(14)
logical, allocatable :: select(:)
real(kind=pres), allocatable :: workd(:), workl(:), worktmp1(:), worktmp2(:)
! Scalars for SNAUPD
character bmat*1, which*2
integer ido, n, info, ierr, ldv
integer i, j, ishfts, maxitr, mode1, nconv
integer(kind=pres) lworkl
real(kind=pres) tol
! Arrays for SNEUPD
real(kind=pres), allocatable :: d(:,:), resid(:), v(:,:), workev(:), z(:,:)
! Scalars for SNEUPD
logical rvec, first
real sigmar, sigmai
!==============================================
real(kind=pres), allocatable :: mat(:,:)
open (11, file = 'matrix.dat', status = 'old')
read (11,*) n
!=============================================
! Dimension of the problem
nev = n/10
ncv = nev+2
ldv = n
bmat = 'I'
which = 'LM'
! Additional environment variables
ido = 0
tol = 0.0E+0
info = 0
lworkl = 3*ncv**2+6*ncv
! Algorithm Mode specifications:
ishfts = 1
maxitr = 300
mode1 = 1
iparam(1) = ishfts
iparam(3) = maxitr
iparam(7) = mode1
! Distribution to nodes
!=============================================
! Matrix allocation
allocate (mat(n,n))
! PDNAUPD
allocate (workd(5*n))
allocate (workl(lworkl))
allocate (resid(n))
allocate (worktmp1(n))
allocate (worktmp2(n))
! PDNEUPD
allocate (d(n,3))
allocate (v(ldv,ncv))
allocate (workev(3*n))
allocate (z(ldv,ncv))
allocate (select(ncv))
!===========================================
! Read Matrix from the provided file
mat = 0
read(11,*) mat
mat = transpose(mat)
!===========================================
! MPI Calling
call MPI_INIT(ierr)
comm = MPI_COMM_WORLD
call MPI_COMM_RANK(comm, myid, ierr)
call MPI_COMM_SIZE(comm, nprocs, ierr)
nloc = n/nprocs
! if ( mod(n, nprocs) .gt. myid ) nloc = nloc + n
!===============================================
20 continue
call pdnaupd(comm, ido, bmat, nloc, which, nev, tol, resid, ncv, v, ldv, iparam, ipntr, workd, workl, lworkl, info) !Top level solver
call MPI_BARRIER(comm,ierr)
print *, ido, info, iparam(5) !for testing
!===============================================
if (ido .eq. -1 .or. ido .eq. 1) then
worktmp1 = 0
if (myid .ne. 0) then !It is slave
call MPI_SEND(workd(ipntr(1)), nloc, MPI_DOUBLE_PRECISION, 0, 0, comm, ierr)
else !It is host
worktmp1(1:nloc) = workd(ipntr(1):ipntr(1)+nloc-1)
i = nprocs
if (i .gt. 1) then
do i=1,nprocs-1
call MPI_RECV(worktmp1(i*nloc+1), nloc, MPI_DOUBLE_PRECISION, i, 0, comm, status, ierr)
end do
endif
endif
call MPI_BARRIER(comm,ierr)
if (myid .eq. 0) then !It is host
! Matrix multiplication
worktmp2 = 0
call matmultiply(n, mat, worktmp1, worktmp2)
workd(ipntr(2):ipntr(2)+nloc-1) = worktmp2(1:nloc)
i = nprocs
if (i .gt. 1) then
do i=1,nprocs-1
call MPI_SEND(worktmp2(i*nloc+1), nloc, MPI_DOUBLE_PRECISION, i, 100*i, comm, ierr)
end do
endif
else !It is slave
call MPI_RECV(workd(ipntr(2)), nloc, MPI_DOUBLE_PRECISION, 0, 100*myid, comm, status, ierr)
endif
go to 20
! call matmultiply(n, mat, workd(ipntr(1):ipntr(1)+n-1), workd(ipntr(2):ipntr(2)+n-1))
! go to 20
endif
! print *, info !for testing
!===============================================================
! Post-processing for eigenvalues
rvec = .true.
if (myid .eq. 0) then
call pdneupd ( comm, rvec, 'A', select, d, d(1,2), z, ldv, sigmar, sigmai, &
workev, bmat, n, which, nev, tol, resid, ncv, v, ldv, iparam, ipntr, &
workd, workl, lworkl, info)
endif
! print *, info !for testing
close(11)
call MPI_FINALIZE(ierr)
return
end subroutine
!==============================================================================================
! Additional Function definitions
subroutine matmultiply(n, mat, v, w)
integer n, i, j
integer, parameter :: pres=8
real(kind = pres) mat(n,n), temp(n)
real(kind = pres) v(n), w(n)
temp = 0
do j = 1,n
do i = 1,n
temp(j) = temp(j) + mat(i,j)*v(i)
end do
end do
w = temp
return
end subroutine
end module
I apologize for the ton of redundant lines and comments, I am yet to clean it up for finalization.
When I run the code on a single thread with ./a.out, I get the following output:
Invalid MIT-MAGIC-COOKIE-1 key 1 0 1629760560
1 0 1629760560
1 0 1629760560
1 0 1629760560
.
.
. <A long chain as the code is exhausting all iterations>
.<first of the numbers is ido, which starts with 1 instead of -1 for some reason, second being
.info and third being iparam(5) which is a random number until the final iteration>
.
99 1 1
munmap_chunk(): invalid pointer
Program received signal SIGABRT: Process abort signal.
Backtrace for this error:
#0 0x7f5a863d0d01 in ???
#1 0x7f5a863cfed5 in ???
#2 0x7f5a8620420f in ???
#3 0x7f5a8620418b in ???
#4 0x7f5a861e3858 in ???
#5 0x7f5a8624e3ed in ???
#6 0x7f5a8625647b in ???
#7 0x7f5a862566cb in ???
#8 0x560f05ac1819 in ???
#9 0x560f05abd7bc in checker
at /home/srivatsank/Desktop/fortran/lap_vs_arp/ptest/ptest.f90:45
#10 0x560f05abd8d9 in main
at /home/srivatsank/Desktop/fortran/lap_vs_arp/ptest/ptest.f90:3
Aborted (core dumped)
line 45 in ptest is call parp
line 3 in ptest is use parpack(name of the module)
The main code is as follows:
program checker
use parpack
use arpack
! use lapack
implicit none
!Program to test LAPACK and ARPACK
! 1. Variable definition
integer a,n,i
real, allocatable :: mat(:,:)
real t0, t1
a=2
! Loop
! do 20 a = 1,3
! Open File
open(unit=10, file = 'matrix.dat', status = 'replace')
! 2. Generate Symmetric matrices
n = 10**a
allocate (mat(n,n))
call RANDOM_NUMBER(mat)
! 3. Save symmetric matrices to r.dat
write (10,*) n
do 30 i=1,n
write(10,*) mat(i,:)
30 end do
deallocate(mat)
close(10)
! 4. Test time taken by each of the routines
! call cpu_time(t0)
! call arp
! call cpu_time(t1)
! print *, 'n:', n, 'ARPACK time taken:', t1-t0
call cpu_time(t0)
call parp
call cpu_time(t1)
print *, 'n:', n, 'PARPACK time taken:', t1-t0
!20 end do
end program checker
The memory error occurs at the very end of the subroutine, when the mail program tries to exit from the subroutine. I have verified this by printing statements as the last line in the subroutine.
And on running mpirun -np 4 a.out, the code just enters the pdneupd process and sits there for eternity. Could anyone help?
I try to do a mpi shared memory example , but every time i get some weird value.
It's a 1D stencil, just doing the sum of elements at position i-1,i and i+1
I'm running this program on 2 node of 32 MPI process and with the domain size nx=64, the domain of each rank has only 1 element.
I do the exchange between node with MPI_SENDRECEIVE with ghost cells
program mpishared
USE MPI_F08
use ISO_C_BINDING
implicit none
integer :: rank, rankNode, rankW, rankE
integer :: nbp, nbNode
integer :: key
TYPE(MPI_Comm) :: commNode ! shared node
integer :: nx ! area global
integer :: sx,ex ! area local
integer :: rsx,rex ! real bound of local array with halo
integer(kind=MPI_ADDRESS_KIND) :: size
TYPE(C_PTR) :: baseptr
TYPE(MPI_Win) :: win
integer, parameter :: dp = kind(1.d0)
real(kind=dp), dimension(:), contiguous, pointer :: ushared
real(kind=dp), dimension(:), allocatable :: u
integer :: iterx,iter,iterp
!! Init MPI
CALL MPI_INIT()
!! Info WORLD
CALL MPI_COMM_RANK(MPI_COMM_WORLD,rank)
CALL MPI_COMM_SIZE(MPI_COMM_WORLD,nbp)
! Comm 4 Node
key = 0
CALL MPI_COMM_SPLIT_TYPE(MPI_COMM_WORLD,MPI_COMM_TYPE_SHARED,key,MPI_INFO_NULL,commNode)
CALL MPI_COMM_RANK(commNode, rankNode)
CALL MPI_COMM_SIZE(commNode, nbNode)
! Neighbours
rankW = rank-1
rankE = rank+1
if (rank == 0) rankW=MPI_PROC_NULL
if (rank == nbp-1) rankE=MPI_PROC_NULL
! Size of global domain
nx = 64
! Size of local domain
sx = 1+(rank*nx)/nbp
ex = ((rank+1)*nx)/nbp
rsx = sx ! real size only different for first
rex = ex ! and last rank in node
if (rankNode == 0) rsx = rsx-1
if (rankNode == nbNode-1) rex=rex+1
! Allocate Shared domain
size = (rex-rsx+1)
allocate(u(rex-rsx+1))
CALL MPI_WIN_ALLOCATE_SHARED(size,1,MPI_INFO_NULL,commNode,baseptr,win)
CALL C_F_POINTER(baseptr,ushared)
! Init local domain
do iterx=1,rex-rsx+1
u(iterx) = 0.0_dp
end do
if (rank == nbp-1) then
u(rex-rsx+1) = rex
end if
if (rank == 0) then
u(1) = -1.0_dp
end if
! Main Loop
CALL MPI_WIN_LOCK_ALL(0,win)
do iter=1,10
! Update sharedold
do iterx=1,rex-rsx+1
ushared(iterx)=u(iterx)
end do
! Update bound between node
if (rankNode == 0) then
CALL MPI_SENDRECV(ushared(2),nx,MPI_DOUBLE_PRECISION,rankW,100, &
ushared(1),nx,MPI_DOUBLE_PRECISION,rankW,100,&
MPI_COMM_WORLD,MPI_STATUS_IGNORE)
end if
if (rankNode == nbNode-1) then
CALL MPI_SENDRECV(ushared(ex-rsx+1),nx,MPI_DOUBLE_PRECISION,rankE,100, &
ushared(rex-rsx+1),nx,MPI_DOUBLE_PRECISION,rankE,100,&
MPI_COMM_WORLD,MPI_STATUS_IGNORE)
end if
call MPI_WIN_SYNC(win)
call MPI_BARRIER(MPI_COMM_WORLD)
! Compute
do iterx=sx-rsx+1,ex-rsx+1
u(iterx)=(ushared(iterx-1)+ushared(iterx)+ushared(iterx+1))/3.0_dp
!print *, rank, iterx, u(iterx), ushared(iterx-1), ushared(iterx), ushared(iterx+1)
end do
call MPI_BARRIER(MPI_COMM_WORLD)
end do
call MPI_WIN_UNLOCK_ALL(win)
do iterp=0, nbp-1
if (iterp == rank) then
do iterx=1,rex-rsx+1
print * , iter,"u", rank, iterx, u(iterx)
end do
end if
call MPI_BARRIER(MPI_COMM_WORLD)
end do
CALL MPI_FINALIZE()
end program
The value after a lot of iterations must be equal to the rank
But when i'm running it, wrong value start to appear (like -6.018996517484083E+196 )
Since i'm new to MPI RMA, i don't know if it's a bug of the MPI implementation i use or if i'm doing something wrong
I try to call subroutine SPLEV of FITPACK library through two functions ('wer' and 'qwe') nested one into another (the code is below).
The following message appears under execution of compiled program:
QWE
Program received signal SIGSEGV: Segmentation fault - invalid memory reference.
Backtrace for this error:
0 0x7F3EE4BF3E08
1 0x7F3EE4BF2F90
2 0x7F3EE453A4AF
3 0x4041B6 in splev_
4 0x400BD0 in value.3386 at pr.f90:?
5 0x400A6B in MAIN__ at pr.f90:?
Ошибка сегментирования (сделан дамп памяти)
If I compile my program with flags -g -fbacktrace -fsanitize=address,zero,undefined the follow output message appears:
QWE
0.37051690837706980
Program received signal SIGSEGV: Segmentation fault - invalid memory reference.
Backtrace for this error:
0 0x7FAB5F45CE08
1 0x7FAB5F45BF90
2 0x7FAB5EDA34AF
3 0x4075F0 in splev_ at splev.f:73 (discriminator 2)
4 0x400DDE in value.3386 at pr.f90:87
5 0x400FFA in qwe.3406 at pr.f90:43
6 0x400F88 in wer.3403 at pr.f90:48
7 0x400D08 in MAIN__ at pr.f90:38
Ошибка сегментирования (сделан дамп памяти)
If I compile my program with flags -g -fbacktrace -Wall -fcheck=all the follow output message appears:
QWE
Program received signal SIGSEGV: Segmentation fault - invalid memory reference.
Backtrace for this error:
0 0x7F2BE6F0FE08
1 0x7F2BE6F0EF90
2 0x7F2BE68564AF
3 0x4075F0 in splev_ at splev.f:73 (discriminator 2)
4 0x400DDE in value.3386 at pr.f90:87
5 0x400C46 in MAIN__ at pr.f90:35
Ошибка сегментирования (сделан дамп памяти)
If I compile my program with flags -g -fbacktrace -fsanitize=address the follow output message appears:
QWE
ASAN:SIGSEGV
=================================================================
==4796==ERROR: AddressSanitizer: SEGV on unknown address 0x000000000000 (pc 0x000000408f67 bp 0x7ffe7a134440 sp 0x7ffe7a1341e0 T0)
0 0x408f66 in splev_ /home/yurchvlad/Science/Coll_Int/F90/f90DP/1/splev.f:73
1 0x40145d in value.3386 (/home/yurchvlad/Science/Coll_Int/F90/f90DP/1/curfit+0x40145d)
2 0x4011a3 in intcoll /home/yurchvlad/Science/Coll_Int/F90/f90DP/1/pr.f90:35
3 0x401849 in main /home/yurchvlad/Science/Coll_Int/F90/f90DP/1/pr.f90:2
4 0x7fcad9b3282f in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2082f)
5 0x400d38 in _start (/home/yurchvlad/Science/Coll_Int/F90/f90DP/1/curfit+0x400d38)
AddressSanitizer can not provide additional info.
SUMMARY: AddressSanitizer: SEGV /home/yurchvlad/Science/Coll_Int/F90/f90DP/1/splev.f:73 splev_
==4796==ABORTING
Firstly I will show the code and then I will give some information about subroutines CURFIT and SPLEV of library FITPACK which are playing there a principal role.
Here is my code. This is just a test program, i.e. it is not confusion, that I interpolate there array of values of analytical function.
PROGRAM IntColl
USE Constants
IMPLICIT NONE
INTEGER :: i, nen ! i = counter
! nen, nmn, ne is sirvice variables, which
! appear on exit of CURFIT and needed on entry
! of SPLEV and SPLINT
REAL(DP) :: foo
REAL(DP) :: MOM1 ! dimensionless neutrino momentum
REAL(DP) :: dmg ( 1 : 2 * NG) ! dimensionless momentum grid
REAL(DP) :: endf( 1 : 2 * NG) ! electron neutrino distribution function
! muon neutrino distribution function
! electron and positron distribution function
REAL(DP) :: ten ( 1 : 2 * NG + k + 1) ! service arrays:
! ten is array arising on exit of working of CURFIT
! and contain knots of the spline (for endf, mndf and edf correspondingly).
REAL(DP) :: cen ( 1 : 2 * NG + k + 1) ! needed on entry of SPLEV and SPLINT
! cen appear on exit of CURFIT, contain coefficients of spline
! (for endf, mndf and edf correspondingly) and needed on entry of SPLEV and SPLINT.
REAL(DP) :: w ( 1 : 2 * NG + k + 1) ! w is array of weights for points on entry of CURFIT.
DO i = 1, 2 * NG
dmg(i) = i / 10.D+00 ! filling arrays to give their
endf(i) = eq_nu_di_fu(dmg(i)) ! on entry into subroutine
w(i) = 1.d+00 ! CURFIT
END DO
MOM1 = .53D+00
PRINT *, 'QWE'
CALL spline(dmg, endf, nen, ten, cen)
foo = value(MOM1, ten, nen, cen)
PRINT *, foo
PRINT *, wer(MOM1)
CONTAINS
REAL(DP) FUNCTION qwe(q) ! qwe and wer is "wrappers" for using
REAL(DP) :: q ! of subroutines spline > curfit
qwe = value(q, ten, nen, cen) ! in main program
END FUNCTION qwe
REAL(DP) FUNCTION wer(q)
REAL(DP) :: q
wer = qwe(q)
END FUNCTION wer
SUBROUTINE spline(x, y, n, t, c) ! spline is "hand-made wrapper" for
IMPLICIT NONE ! more convenient using of subroutine
! CURFIT in main program
INTEGER :: m, nest, n, lwrk, ier
INTEGER, PARAMETER :: iopt = 0
INTEGER :: iwrk( 1 : 10 * NG )
REAL(DP) :: xb, xe, fp
REAL(DP) :: wrk( 1 : 2 * NG * (k + 1) + (2 * NG + k + 1) * (7 + 3 * k) )
REAL(DP) :: x( 1 : 2 * NG), y(1: 2 * NG )
REAL(DP) :: t( 1 : 2 * NG + k + 1 )
REAL(DP) :: c( 1 : 2 * NG + k + 1 )
xb = 0.d+00
xe = x(2 * NG)
m = 2 * NG
nest = m + k + 1
lwrk = 2 * NG * (k + 1) + nest * (7 + 3 * k)
CALL curfit(iopt, m, x, y, w, xb, xe, k, s, nest, n, t, c, fp, wrk, lwrk, iwrk, ier)
END SUBROUTINE spline
REAL(DP) FUNCTION value(q, t, n, c) ! value is "hand-made wrapper" for
IMPLICIT NONE ! more convenient using of subroutine
! SPLEV in main program
INTEGER :: n, ier ! SPLEV should work only after
INTEGER, PARAMETER :: m = 1 ! CURFIT edned its working
REAL(DP) :: q
REAL(DP) :: t( 1 : 2 * NG + k + 1 )
REAL(DP) :: c( 1 : 2 * NG + k + 1 )
REAL(DP) :: ddmg(1), sddmg(1)
ddmg(1) = q
CALL splev(t, n, c, k, ddmg, sddmg, m, ier)
value = sddmg(1)
END FUNCTION value
REAL(DP) FUNCTION eq_nu_di_fu(y) ! eq_nu_di_fy givev values for array
IMPLICIT NONE ! to interpolate
REAL(DP) :: y
eq_nu_di_fu = 1 / (EXP(y) + 1)
END FUNCTION eq_nu_di_fu
END PROGRAM IntColl
The module Constants is there:
MODULE CONSTANTS
INTEGER, PARAMETER :: DP = SELECTED_REAL_KIND(15, 307)
INTEGER, PARAMETER :: NG = 200 ! NUMBER OF KNOTS OF GRID
INTEGER , PARAMETER :: K = 3 ! THE ORDER OF SPLINE
REAL(DP), PARAMETER :: S = 0.D+00 ! CUBIC SPLINE SMOOTHING FACTOR
END MODULE
Now, subroutines CURFIT and SPLEV appearing in above code with all their dependensies are in follow sources:
https://github.com/jbaayen/fitpackpp/tree/master/fitpack
where these subroutines are in double precision
and
http://www.netlib.org/dierckx/
where these subroutines are in single precision.
It is very important to mention that with single precision above scheme works!
Of course, if I use subroutines of single precision I modify all the types of all variables in corrisponding way.
What else have I observed:
straightforward using of FUNCTION value works.
If the line
PRINT *, 'QWE'
of the main program is commented, the value 'foo' also is not printed.
I can get data from this code, but need to acquire them in array for plotting surface plot. So, I try get 101X101 based on the below the code, the data which i want to write is data(i,j).
program test
implicit none
! --- [local entities]
real*8 :: rrr,th,U0,amp,alp,Ndiv
real*8 :: pi,alpR,NR,Rmin,Rmax,z, data(101:101)
integer :: ir, i, j
OPEN(UNIT=10, FORM='unformatted', FILE="data")
do i=1, 101
do j=1, 101
th=atan2(real(i-51,kind(0d0)),real(j-51,kind(0d0)))
pi=atan(1.d0)*4.d0
!
Ndiv= 24.d0 !! Number of circumferential division
alp = 90.d0/180.d0*pi !! phase [rad]
U0 = 11.4d0 !! average velocity
amp = 0.5d0 !! amplitude of velocity
Rmin = 10 !! [m]
Rmax = 50 !! [m]
NR = 6.d0 !! Number of radial division
!
rrr=sqrt(real(i-51,kind(0d0))**2+real(j-51,kind(0d0))**2)
ir=int((rrr-Rmin)/(Rmax-Rmin)*NR)
alpR=2.d0*pi/dble(Ndiv)*dble(mod(ir,2))
data(i,j)=U0*(1.d0+amp*dsin(0.5d0*Ndiv*th+alp+alpR))
write(10) data(i,j)
end do
end do
stop
end program test
but there are some error, I couldn't figure out how to solve it.
test.f90:27.10:
data(i,j)=z
1
Error: Rank mismatch in array reference at (1) (2/1)
test.f90:28.20:
write(10) data(i,j)
1
Error: Rank mismatch in array reference at (1) (2/1)
klogin7$ gfortran test.f90
test.f90:27.10:
data(i,j)=z
1
Error: Rank mismatch in array reference at (1) (2/1)
test.f90:28.20:
write(10) data(i,j)
1
Error: Rank mismatch in array reference at (1) (2/1)
I'm trying to break up a 4D array over the third dimension, and send to each node using MPI. Basically, I'm computing derivatives of a matrix, Cpq, with respect to atom positions in each of the three cartesian directions. Cpq is of size nat_sl x nat_sl, so dCpqdR is of size nat_sl x nat_sl x nat x 3. At the end of the day, for ever s,i pair, I have to compute the matrix product of dCpqdR between the transpose of the eigenvectors of Cpq and the eigenvectors of Cpq like so:
temp = MATMUL(TRANSPOSE(Cpq), MATMUL(dCpqdR(:, :, s, i), Cpq))
This is fine, but as it turns out, the loop over s and i is now by far the slow part of my code. Because each can be done independently, I was hoping that I could break up dCpqdR, and give each task it's own s, i to compute the derivative of. That is, I'd like task 1 to get dCpqdR(:,:,1,1), task 2 to get dCpqdR(:,:,1,2), etc.
I've got this working in some sense by using a buffered send/recv pair of calls. The root node allocates a temporary array, fills it, sends to the relevant nodes, and the relevant nodes do their computations as they wish. This is fine, but can be slow and memory inefficient. I'd ideally like to break it up in a more memory efficient way.
The logical thing to do, then, is to use mpi_scatterv, but here is where I start running into trouble, as I'm having trouble figuring out the memory layout for this. I've written this, so far:
call mpi_type_create_subarray(4, (/ nat_sl, nat_sl, nat, 3 /), (/nat_sl, nat_sl, n_pairs(me_image+1), 3/),&
(/0, 0, 0, 0/), mpi_order_fortran, mpi_double_precision, subarr_typ, ierr)
call mpi_type_commit(subarr_typ, ierr)
call mpi_scatterv(dCpqdR, n_pairs(me_image+1), f_displs, subarr_typ,&
my_dCpqdR, 3*nat_sl*3*nat_sl*3*n_pairs(me_image+1), subarr_typ,&
root_image, intra_image_comm, ierr)
I've computed n_pairs using this subroutine:
subroutine mbdvdw_para_init_int_forces()
implicit none
integer :: p, s, i, counter, k, cpu_ind
integer :: num_unique_rpq, n_pairs_per_proc, cpu
real(dp) :: Rpq(3), Rpq_norm, current_val
num_pairs = nat
if(.not.allocated(f_cpu_id)) allocate(f_cpu_id(nat, 3))
n_pairs_per_proc = floor(dble(num_pairs)/nproc_image)
cpu = 0
n_pairs = 0
counter = 1
p = 1
do counter = 0, num_pairs-1, 1
n_pairs(modulo(counter, nproc_image)+1) = n_pairs(modulo(counter, nproc_image)+1) + 1
end do
do s = 1, nat, 1
f_cpu_id(s) = cpu
if((counter.lt.num_pairs)) then
if(p.eq.n_pairs(cpu+1)) then
cpu = cpu + 1
p = 0
end if
end if
p = p + 1
end do
call mp_set_displs( n_pairs, f_displs, num_pairs, nproc_image)
f_displs = f_displs*nat_sl*nat_sl*3
end subroutine mbdvdw_para_init_int_forces
and the full method for the matrix multiplication is
subroutine mbdvdw_interacting_energy(energy, forcedR, forcedh, forcedV)
implicit none
real(dp), intent(out) :: energy
real(dp), dimension(nat, 3), intent(out) :: forcedR
real(dp), dimension(3,3), intent(out) :: forcedh
real(dp), dimension(nat), intent(out) :: forcedV
real(dp), dimension(3*nat_sl, 3*nat_sl) :: temp
real(dp), dimension(:,:,:,:), allocatable :: my_dCpqdR
integer :: num_negative, i_atom, s, i, j, counter
integer, parameter :: eigs_check = 200
integer :: subarr_typ, ierr
! lapack work variables
integer :: LWORK, errorflag
real(dp) :: WORK((3*nat_sl)*(3+(3*nat_sl)/2)), eigenvalues(3*nat_sl)
call start_clock('mbd_int_energy')
call mp_sum(Cpq, intra_image_comm)
eigenvalues = 0.0_DP
forcedR = 0.0_DP
energy = 0.0_DP
num_negative = 0
forcedV = 0.0_DP
errorflag=0
LWORK=3*nat_sl*(3+(3*nat_sl)/2)
call DSYEV('V', 'U', 3*nat_sl, Cpq, 3*nat_sl, eigenvalues, WORK, LWORK, errorflag)
if(errorflag.eq.0) then
do i_atom=1, 3*nat_sl, 1
!open (unit=eigs_check, file="eigs.tmp",action="write",status="unknown",position="append")
! write(eigs_check, *) eigenvalues(i_atom)
!close(eigs_check)
if(eigenvalues(i_atom).ge.0.0_DP) then
energy = energy + dsqrt(eigenvalues(i_atom))
else
num_negative = num_negative + 1
end if
end do
if(num_negative.ge.1) then
write(stdout, '(3X," WARNING: Found ", I3, " Negative Eigenvalues.")'), num_negative
end if
else
end if
energy = energy*nat/nat_sl
!!!!!!!!!!!!!!!!!!!!
! Forces below here. There's going to be some long parallelization business.
!!!!!!!!!!!!!!!!!!!!
call start_clock('mbd_int_forces')
if(.not.allocated(my_dCpqdR)) allocate(my_dCpqdR(nat_sl, nat_sl, n_pairs(me_image+1), 3)), my_dCpqdR = 0.0_DP
if(mbd_vdw_forces) then
do s=1,nat,1
if(me_image.eq.(f_cpu_id(s)+1)) then
do i=1,3,1
temp = MATMUL(TRANSPOSE(Cpq), MATMUL(my_dCpqdR(:, :, counter, i), Cpq))
do j=1,3*nat_sl,1
if(eigenvalues(j).ge.0.0_DP) then
forcedR(s, i) = forcedR(s, i) + 1.0_DP/(2.0_DP*dsqrt(eigenvalues(j)))*temp(j,j)
end if
end do
end do
counter = counter + 1
end if
end do
forcedR = forcedR*nat/nat_sl
do s=1,3,1
do i=1,3,1
temp = MATMUL(TRANSPOSE(Cpq), MATMUL(dCpqdh(:, :, s, i), Cpq))
do j=1,3*nat_sl,1
if(eigenvalues(j).ge.0.0_DP) then
forcedh(s, i) = forcedh(s, i) + 1.0_DP/(2.0_DP*dsqrt(eigenvalues(j)))*temp(j,j)
end if
end do
end do
end do
forcedh = forcedh*nat/nat_sl
call mp_sum(forcedR, intra_image_comm)
call mp_sum(forcedh, intra_image_comm)
end if
call stop_clock('mbd_int_forces')
call stop_clock('mbd_int_energy')
return
end subroutine mbdvdw_interacting_energy
But when run, it's complaining that
[MathBook Pro:58100] *** An error occurred in MPI_Type_create_subarray
[MathBook Pro:58100] *** reported by process [2560884737,2314885530279477248]
[MathBook Pro:58100] *** on communicator MPI_COMM_WORLD
[MathBook Pro:58100] *** MPI_ERR_ARG: invalid argument of some other kind
[MathBook Pro:58100] *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort,
[MathBook Pro:58100] *** and potentially your MPI job)
so something is going wrong, but I have no idea what. I know my description is somewhat sparse to start with, so please let me know what information would be necessary to help.