I'm trying to parallelize the following code.
subroutine log_likelihood(y, theta, lli, ll)
doubleprecision, allocatable, intent(in) :: y(:)
doubleprecision, intent(in) :: theta(2)
doubleprecision, allocatable, intent(out) :: lli(:)
doubleprecision, intent(out) :: ll
integer :: i
ALLOCATE (lli(size(y)))
lli = 0.0d0
ll = 0.0d0
do i = 1, size(y)
lli(i) = -log(sqrt(theta(2))) - 0.5*log(2.0d0*pi) &
- (1.0d0/(2.0d0*theta(2)))*((y(i)-theta(1))**2)
end do
ll = sum(lli)
end subroutine log_likelihood
To do this, I'm trying to use MPI_ALLGATHER. This is the code I wrote
subroutine log_likelihood(y, theta, lli, ll)
doubleprecision, allocatable, intent(in) :: y(:)
doubleprecision, intent(in) :: theta(2)
doubleprecision, allocatable, intent(out) :: lli(:)
doubleprecision, intent(out) :: ll
integer :: i, size_y, diff
size_y=size(y)
ALLOCATE (lli(size_y))
!Broadcasting
call MPI_BCAST(theta, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
call MPI_BCAST(y, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
! Determine how many points to handle with each proc
points_per_proc = (size_y + numprocs - 1)/numprocs
! Determine start and end index for this proc's points
istart = proc_num * points_per_proc + 1
iend = min((proc_num + 1)*points_per_proc, size_y)
diff = iend-istart+1
allocate(proc_contrib(istart:iend))
do i = istart, iend
proc_contrib(i) = -log(sqrt(theta(2))) - 0.5*log(2.0d0*pi) &
- (1.0d0/(2.0d0*theta(2)))*((y(i)-theta(1))**2)
end do
call MPI_ALLGATHER(proc_contrib, diff, MPI_DOUBLE_PRECISION, &
lli, diff, MPI_DOUBLE_PRECISION, &
MPI_COMM_WORLD, ierr)
ll = sum(lli)
end subroutine log_likelihood
When I try to run my program, I get the following error.
$ mpiexec -n 2 ./mle.X
Fatal error in PMPI_Allgather: Internal MPI error!, error stack:
PMPI_Allgather(961)......: MPI_Allgather(sbuf=0x7ff2f251b860, scount=1500000, MPI_DOUBLE_PRECISION, rbuf=0x7ff2f2ad5650, rcount=3000000, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD) failed
MPIR_Allgather_impl(807).:
MPIR_Allgather(766)......:
MPIR_Allgather_intra(560):
MPIR_Localcopy(357)......: memcpy arguments alias each other, dst=0x7ff2f2ad5650 src=0x7ff2f251b860 len=12000000
===================================================================================
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= EXIT CODE: 1
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================
Can somebody please explain to me what I'm doing wrong?
Thanks!
I finally was able to solve my problem. The serial and parallel versions of my code are available at https://bitbucket.org/ignacio82/bhhh
subroutine log_likelihood(y, n, theta, lli, ll)
integer, intent(in) :: n
doubleprecision, intent(in) :: y(n)
doubleprecision, intent(in) :: theta(2)
doubleprecision, intent(out) :: lli(n)
doubleprecision, intent(out) :: ll
integer :: i
do i = istart, iend
proc_contrib(i-istart+1) = -log(sqrt(theta(2))) - 0.5*log(2.0d0*pi) &
- (1.0d0/(2.0d0*theta(2)))*((y(i)-theta(1))**2)
end do
if ( mod(n,numprocs)==0 ) then
call MPI_ALLGATHER(proc_contrib, points_per_proc, MPI_DOUBLE_PRECISION, &
lli, points_per_proc, MPI_DOUBLE_PRECISION, &
MPI_COMM_WORLD, ierr)
else if (numprocs-1 == proc_num ) then
recvcounts(numprocs) = iend-istart+1
call MPI_ALLGATHERV(proc_contrib, points_per_proc, MPI_DOUBLE_PRECISION, &
lli, recvcounts, displs, MPI_DOUBLE_PRECISION, &
MPI_COMM_WORLD, ierr)
end if
ll = sum(lli)
end subroutine log_likelihood
Related
I have a DNS (Direct Numerical Simulations) code that has to handle large amounts of data. While running the code, the code is able to read from a file using the MPI_File_read_all, but after a few iterations, when I want to write out the data using MPI_File_write_all I get a memory error (signal 9). When I used Valgrind to memory profile, I see that the master node uses more memory than the rest of the nodes, even though there are no data operations that are exclusive to the master node.
This is the read subroutine
subroutine read3DField(filename,vars)
! Read binary data file using parallel MPI-IO
use commons
include 'sphere.cmn'
character(len=8), intent(in) :: filename ! File name
integer :: ierr
real vars(nxm:nxp,nym:nyp,nzm:nzp,ns+6)
type(MPI_File) :: fh
type(MPI_Status) :: status
integer(kind=MPI_OFFSET_KIND) :: disp
write (*, *) 'Entered read', cartcoord(1)
call MPI_File_open(MPI_COMM_WORLD, filename, &
MPI_MODE_RDONLY, MPI_INFO_NULL, fh, ierr)
write (*, *) 'File opened by ', id
write(*,*) ierr
disp = 0
call MPI_File_set_view(fh, disp, MPI_dp, type_global, &
'native', MPI_INFO_NULL, ierr)
write (*, *) 'view set', id
write(*,*) ierr
call MPI_File_read_all(fh, vars, 1, type_local, status, ierr)
write (*, *) 'read data', id
write(*,*) ierr
call MPI_File_close(fh, ierr)
write(*,*) ierr
end subroutine read3DField
this is the write subroutine
subroutine write3DField(filename,vars)
! Write binary data file using parallel MPI-IO
use commons
character(len=8), intent(in) :: filename ! File name
integer :: ierr
! Array to write
real vars(nxm:nxp,nym:nyp,nzm:nzp,ns+6)
type(MPI_File) :: fh
type(MPI_Status) :: status
integer(kind=MPI_OFFSET_KIND) :: disp
call MPI_File_open(MPI_COMM_WORLD, filename, &
IOR(MPI_MODE_WRONLY, MPI_MODE_CREATE), MPI_INFO_NULL, fh, ierr)
disp = 0
call MPI_File_set_view(fh, disp, MPI_dp, type_global, &
'native', MPI_INFO_NULL, ierr)
call MPI_File_write_all(fh, vars, 1, type_local, status, ierr)
call MPI_File_close(fh, ierr)
end subroutine write3DField
type_global and type_local are MPI data types that are initialized previously with appropriate offsets and committed using MPI_commit.
My question is where is this "excess" memory coming from and how to remove it.
I'm new to fortran and MPI and currently processing a very large matrix row by row on different processors. I gather all the results on all the processors as it is needed. The following is the sample code which has the same structure as my real code.
I keep running into SIGBUS problems at MPI_Allgather, Line 49 of mod_test.f in the bounded function iter. How could I resolve this?
Compiler details:
$ mpifort --version
ifort (IFORT) 19.0.1.144 20181018
Copyright (C) 1985-2018 Intel Corporation. All rights reserved.
The code is compiled as follows:
mpifort mod_test.f main.f -o main -traceback -g -debug
mod_test.f
module TEST
include "mpif.h"
type A
real ,allocatable:: pf(:,:)
integer :: nx=100, ny=10
contains
procedure :: init
procedure :: iter
end type A
type(A) :: A_obj
contains
integer function init(this, x, y)
implicit none
class(A) , intent(inout):: this
integer , intent(in):: x, y
this% nx = x
this% ny = y
allocate( this% pf(x, y) )
this% pf = 0.0
init = 1
return
end function init
integer function iter(this, y_iter)
implicit none
class(A) , intent(inout):: this
integer , intent(in):: y_iter
integer :: i
real ,target :: a(this%nx+1), ar(this%nx+1)
real , dimension(:), pointer :: abuff, arbuff
a = 0.0
ar = 0.0
do i = 1, this% nx
this%pf(i, y_iter) = i * y_iter
enddo
a(1:this%nx) = this% pf(:, y_iter)
a(this%nx+1) = y_iter
call MPI_Allgather(a, this%nx+1, MPI_REAL, ar,
& this%nx+1, MPI_REAL,
& MPI_COMM_WORLD)
write(*,*) "Reached after MPI_Allgather"
do i = 1, this%nx + 1
write(*,*)ar(i)
enddo
this% pf(:, ar(this%nx+1)) = ar(1:this%nx)
write(*,*) "Got the solution from another processor"
iter = 1
end function iter
subroutine INIT_A
integer :: j, rank, ierr, size
! - Allocate
ierr= A_obj% init(100, 10)
! - Iterate
call MPI_COMM_RANK( MPI_COMM_WORLD, rank, ierr)
call MPI_COMM_SIZE( MPI_COMM_WORLD, size, ierr)
do j = 1, A_obj % ny
if ( rank == mod(j, size) ) then
ierr = A_obj % iter( j )
endif
enddo
end subroutine INIT_A
end module TEST
main.f
PROGRAM MAIN
use TEST
implicit none
integer :: ierr
call MPI_INIT(ierr)
call INIT_A
write(*,*) "Done with test"
call MPI_FINALIZE(ierr)
end PROGRAM MAIN
I am trying to compile a Linux source file on Windows with the tool Cygwin. When I try to make, error occurs:
mpif90 -c -O3 -I/usr/include -Iobj -Jobj -o obj/sdf_output_point_ru.o
src/io/sdf_output_point_ru.f90
src/io/sdf_output_point_ru.f90:260:39:
MPI_STATUS_IGNORE, errcode)
1
error: There is no specific subroutine for the generic ‘mpi_file_write’ at (1)
src/io/sdf_output_point_ru.f90:265:37:
MPI_STATUS_IGNORE, errcode)
1
error: There is no specific subroutine for the generic ‘mpi_file_write’ at (1)
make: *** [Makefile:166:sdf_output_point_ru.o] error 1
The original source file of sdf_output_point_ru.o file is as follows:
MODULE sdf_output_point_ru
USE mpi
USE sdf_common
USE sdf_output
IMPLICIT NONE
CONTAINS
SUBROUTINE write_point_mesh_meta_r8(h, id, name, dim_labels, dim_units, &
dim_mults)
TYPE(sdf_file_handle) :: h
CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: id, name
CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: dim_labels(:), dim_units(:)
REAL(r8), DIMENSION(:), INTENT(IN), OPTIONAL :: dim_mults
INTEGER :: ndims
TYPE(sdf_block_type), POINTER :: b
INTEGER :: i, errcode
b => h%current_block
b%blocktype = c_blocktype_point_mesh
ndims = b%ndims
b%nelements = b%ndims * b%npoints
! Metadata is
! - mults REAL(r8), DIMENSION(ndims)
! - labels CHARACTER(id_length), DIMENSION(ndims)
! - units CHARACTER(id_length), DIMENSION(ndims)
! - geometry INTEGER(i4)
! - minval REAL(r8), DIMENSION(ndims)
! - maxval REAL(r8), DIMENSION(ndims)
! - npoints INTEGER(i8)
b%info_length = h%block_header_length + soi4 + soi8 &
+ (3 * ndims) * sof8 + 2 * ndims * c_id_length
b%data_length = b%nelements * b%type_size
! Write header
IF (PRESENT(id)) THEN
ALLOCATE(b%dim_labels(ndims), b%dim_units(ndims), b%dim_mults(ndims))
IF (PRESENT(dim_labels)) THEN
DO i = 1,ndims
CALL safe_copy_string(dim_labels(i), b%dim_labels(i))
ENDDO
ELSE
IF (ndims .GE. 1) CALL safe_copy_string('X', b%dim_labels(1))
IF (ndims .GE. 2) CALL safe_copy_string('Y', b%dim_labels(2))
IF (ndims .GE. 3) CALL safe_copy_string('Z', b%dim_labels(3))
ENDIF
IF (PRESENT(dim_units)) THEN
DO i = 1,ndims
CALL safe_copy_string(dim_units(i), b%dim_units(i))
ENDDO
ELSE
DO i = 1,ndims
CALL safe_copy_string('m', b%dim_units(i))
ENDDO
ENDIF
IF (PRESENT(dim_mults)) THEN
DO i = 1,ndims
b%dim_mults(i) = REAL(dim_mults(i),r8)
ENDDO
ELSE
DO i = 1,ndims
b%dim_mults(i) = 1.d0
ENDDO
ENDIF
CALL sdf_write_block_header(h, id, name)
ELSE
CALL write_block_header(h)
ENDIF
IF (h%rank .EQ. h%rank_master) THEN
CALL MPI_FILE_WRITE(h%filehandle, b%dim_mults, ndims, MPI_REAL8, &
MPI_STATUS_IGNORE, errcode)
DO i = 1,ndims
CALL sdf_safe_write_id(h, b%dim_labels(i))
ENDDO
DO i = 1,ndims
CALL sdf_safe_write_id(h, b%dim_units(i))
ENDDO
CALL MPI_FILE_WRITE(h%filehandle, b%geometry, 1, MPI_INTEGER4, &
MPI_STATUS_IGNORE, errcode)
CALL MPI_FILE_WRITE(h%filehandle, b%extents, 2 * ndims, MPI_REAL8, &
MPI_STATUS_IGNORE, errcode)
CALL MPI_FILE_WRITE(h%filehandle, b%npoints, 1, MPI_INTEGER8, &
MPI_STATUS_IGNORE, errcode)
ENDIF
h%current_location = b%block_start + b%info_length
b%done_info = .TRUE.
END SUBROUTINE write_point_mesh_meta_r8
SUBROUTINE write_point_mesh_meta_r4(h, id, name, dim_labels, dim_units, &
dim_mults)
TYPE(sdf_file_handle) :: h
CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: id, name
CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: dim_labels(:), dim_units(:)
REAL(r4), DIMENSION(:), INTENT(IN), OPTIONAL :: dim_mults
REAL(r8), DIMENSION(c_maxdims) :: dim_mults8
TYPE(sdf_block_type), POINTER :: b
INTEGER :: i
IF (PRESENT(dim_mults)) THEN
b => h%current_block
DO i = 1,b%ndims
dim_mults8(i) = REAL(dim_mults(i),r8)
ENDDO
CALL write_point_mesh_meta_r8(h, id, name, dim_labels, dim_units, &
dim_mults8)
ELSE
CALL write_point_mesh_meta_r8(h, id, name, dim_labels, dim_units)
ENDIF
END SUBROUTINE write_point_mesh_meta_r4
SUBROUTINE write_point_variable_meta_r8(h, id, name, units, mesh_id, mult)
TYPE(sdf_file_handle) :: h
CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: id, name, units
CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: mesh_id
REAL(r8), INTENT(IN), OPTIONAL :: mult
INTEGER :: ndims
TYPE(sdf_block_type), POINTER :: b
INTEGER :: errcode
b => h%current_block
b%blocktype = c_blocktype_point_variable
ndims = b%ndims
b%nelements = b%ndims * b%npoints
! Metadata is
! - mult REAL(r8)
! - units CHARACTER(id_length)
! - meshid CHARACTER(id_length)
! - npoints INTEGER(i8)
b%info_length = h%block_header_length + soi8 + sof8 + 2 * c_id_length
b%data_length = b%nelements * b%type_size
! Write header
IF (PRESENT(id)) THEN
CALL safe_copy_string(units, b%units)
CALL safe_copy_string(mesh_id, b%mesh_id)
IF (PRESENT(mult)) THEN
b%mult = REAL(mult,r8)
ELSE
b%mult = 1.d0
ENDIF
CALL sdf_write_block_header(h, id, name)
ELSE
CALL write_block_header(h)
ENDIF
IF (h%rank .EQ. h%rank_master) THEN
CALL MPI_FILE_WRITE(h%filehandle, b%mult, 1, MPI_REAL8, &
MPI_STATUS_IGNORE, errcode)
CALL sdf_safe_write_id(h, b%units)
CALL sdf_safe_write_id(h, b%mesh_id)
CALL MPI_FILE_WRITE(h%filehandle, b%npoints, 1, MPI_INTEGER8, &
MPI_STATUS_IGNORE, errcode)
ENDIF
h%current_location = b%block_start + b%info_length
b%done_info = .TRUE.
END SUBROUTINE write_point_variable_meta_r8
SUBROUTINE write_point_variable_meta_r4(h, id, name, units, mesh_id, mult)
TYPE(sdf_file_handle) :: h
CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: id, name, units
CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: mesh_id
REAL(r4), INTENT(IN), OPTIONAL :: mult
IF (PRESENT(mult)) THEN
CALL write_point_variable_meta_r8(h, id, name, units, mesh_id, &
REAL(mult,r8))
ELSE
CALL write_point_variable_meta_r8(h, id, name, units, mesh_id)
ENDIF
END SUBROUTINE write_point_variable_meta_r4
SUBROUTINE write_srl_pt_var_int_i8_r8(h, id, name, units, array, &
npoint_global, mesh_id, mult)
TYPE(sdf_file_handle) :: h
CHARACTER(LEN=*), INTENT(IN) :: id, name, units
INTEGER, DIMENSION(:), INTENT(IN) :: array
INTEGER(i8), INTENT(IN) :: npoint_global
CHARACTER(LEN=*), INTENT(IN) :: mesh_id
REAL(r8), INTENT(IN), OPTIONAL :: mult
INTEGER(i8) :: i, idx, npoint_max, npoint_rem
INTEGER :: errcode
TYPE(sdf_block_type), POINTER :: b
IF (npoint_global .LE. 0) RETURN
CALL sdf_get_next_block(h)
b => h%current_block
b%type_size = INT(h%soi,r4)
b%datatype = h%datatype_integer
b%mpitype = h%mpitype_integer
b%ndims = 1
b%npoints = npoint_global
! Write header
CALL write_point_variable_meta_r8(h, id, name, units, mesh_id, mult)
! Write the real data
IF (h%rank .EQ. h%rank_master) THEN
h%current_location = b%data_location
CALL MPI_FILE_SEEK(h%filehandle, h%current_location, MPI_SEEK_SET, &
errcode)
! This is all a bit messy, but it is necessary because MPI_FILE_WRITE
! accepts an INTEGER count of elements to write, which may not be
! big enough for npoint_global which is an INTEGER*8
npoint_max = HUGE(npoint_max)
npoint_rem = MOD(npoint_global, npoint_max)
idx = 1
DO i = 1, npoint_global / npoint_max
CALL MPI_FILE_WRITE(h%filehandle, array(idx), npoint_max, b%mpitype, &
MPI_STATUS_IGNORE, errcode)
idx = idx + npoint_max
ENDDO
CALL MPI_FILE_WRITE(h%filehandle, array(idx), npoint_rem, b%mpitype, &
MPI_STATUS_IGNORE, errcode)
ENDIF
h%current_location = b%data_location + b%data_length
b%done_data = .TRUE.
END SUBROUTINE write_srl_pt_var_int_i8_r8
SUBROUTINE write_srl_pt_var_int_i4_r8(h, id, name, units, array, &
npoint_global, mesh_id, mult)
TYPE(sdf_file_handle) :: h
CHARACTER(LEN=*), INTENT(IN) :: id, name, units
INTEGER, DIMENSION(:), INTENT(IN) :: array
INTEGER, INTENT(IN) :: npoint_global
CHARACTER(LEN=*), INTENT(IN) :: mesh_id
REAL(r8), INTENT(IN), OPTIONAL :: mult
CALL write_srl_pt_var_int_i8_r8(h, id, name, units, array, &
INT(npoint_global,i8), mesh_id, mult)
END SUBROUTINE write_srl_pt_var_int_i4_r8
SUBROUTINE write_srl_pt_var_int_i8_r4(h, id, name, units, array, &
npoint_global, mesh_id, mult)
TYPE(sdf_file_handle) :: h
CHARACTER(LEN=*), INTENT(IN) :: id, name, units
INTEGER, DIMENSION(:), INTENT(IN) :: array
INTEGER(i8), INTENT(IN) :: npoint_global
CHARACTER(LEN=*), INTENT(IN) :: mesh_id
REAL(r4), INTENT(IN) :: mult
CALL write_srl_pt_var_int_i8_r8(h, id, name, units, array, &
npoint_global, mesh_id, REAL(mult,r8))
END SUBROUTINE write_srl_pt_var_int_i8_r4
SUBROUTINE write_srl_pt_var_int_i4_r4(h, id, name, units, array, &
npoint_global, mesh_id, mult)
TYPE(sdf_file_handle) :: h
CHARACTER(LEN=*), INTENT(IN) :: id, name, units
INTEGER, DIMENSION(:), INTENT(IN) :: array
INTEGER, INTENT(IN) :: npoint_global
CHARACTER(LEN=*), INTENT(IN) :: mesh_id
REAL(r4), INTENT(IN) :: mult
CALL write_srl_pt_var_int_i8_r8(h, id, name, units, array, &
INT(npoint_global,i8), mesh_id, REAL(mult,r8))
END SUBROUTINE write_srl_pt_var_int_i4_r4
END MODULE sdf_output_point_ru
I have seen this question being asked many times, but didn't find an answer that could resolve my issue. I want to be able to send a linked list, in Fortran, to another process through MPI. I have done something similar where the derived data type in the linked list was as follows
type a
{
integer :: varA
type(a), pointer :: next=>null()
real :: varB
}
The way I did this was to create an MPI datatype which contained all the varA values together, and receive it as an array of integers. Then do the same for varB.
What I am trying to do now is to create the linked list, and then pack all the varA and varB values together to form the MPI datatype. I give below the code that does this.
PROGRAM TEST
USE MPI
IMPLICIT NONE
TYPE a
INTEGER:: b
REAL :: e
TYPE(a), POINTER :: nextPacketInList => NULL()
END TYPE
TYPE PacketComm
INTEGER :: numPacketsToComm
TYPE(a), POINTER :: PacketListHeadPtr => NULL()
TYPE(a), POINTER :: PacketListTailPtr => NULL()
END TYPE PacketComm
TYPE(PacketComm), DIMENSION(:), ALLOCATABLE :: PacketCommArray
INTEGER :: packPacketDataType !New data type
INTEGER :: ierr, size, rank, dest, ind
integer :: b
real :: e
CALL MPI_INIT(ierr)
CALL MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr)
CALL MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr)
IF(.NOT. ALLOCATED(PacketCommArray)) THEN
ALLOCATE(PacketCommArray(0:size-1), STAT=ierr)
DO ind=0, size-1
PacketCommArray(ind)%numPacketsToComm = 0
END DO
ENDIF
b = 2
e = 4
dest = 1
CALL addPacketToList(b, e, dest)
b = 3
e = 5
dest = 1
CALL addPacketToList(b, e, dest)
dest = 1
CALL packPacketList(dest)
IF(rank == 0) THEN
dest = 1
CALL sendPacketList(dest)
ELSE
CALL recvPacketList()
ENDIF
CALL MPI_FINALIZE(ierr)
CONTAINS
SUBROUTINE addPacketToList(b, e, rank)
IMPLICIT NONE
INTEGER :: b, rank, ierr
REAL :: e
TYPE(a), POINTER :: head
IF(.NOT. ASSOCIATED(PacketCommArray(rank)%PacketListHeadPtr)) THEN
ALLOCATE(PacketCommArray(rank)%PacketListHeadPtr, STAT=ierr)
PacketCommArray(rank)%PacketListHeadPtr%b = b
PacketCommArray(rank)%PacketListHeadPtr%e = e
PacketCommArray(rank)%PacketListHeadPtr%nextPacketInList => NULL()
PacketCommArray(rank)%PacketListTailPtr => PacketCommArray(rank)%PacketListHeadPtr
PacketCommArray(rank)%numPacketsToComm = PacketCommArray(rank)%numPacketsToComm+1
ELSE
ALLOCATE(PacketCommArray(rank)%PacketListTailPtr%nextPacketInList, STAT=ierr)
PacketCommArray(rank)%PacketListTailPtr => PacketCommArray(rank)%PacketListTailPtr%nextPacketInList
PacketCommArray(rank)%PacketListTailPtr%b = b
PacketCommArray(rank)%PacketListTailPtr%e = e
PacketCommArray(rank)%PacketListTailPtr%nextPacketInList => NULL()
PacketCommArray(rank)%numPacketsToComm = PacketCommArray(rank)%numPacketsToComm+1
ENDIF
END SUBROUTINE addPacketToList
SUBROUTINE packPacketList(rank)
IMPLICIT NONE
INTEGER :: rank
INTEGER :: numListNodes
INTEGER(kind=MPI_ADDRESS_KIND), DIMENSION(:), ALLOCATABLE :: listNodeAddr
INTEGER(kind=MPI_ADDRESS_KIND), DIMENSION(:), ALLOCATABLE :: listNodeDispl
INTEGER, DIMENSION(:), ALLOCATABLE :: listNodeTypes
INTEGER, DIMENSION(:), ALLOCATABLE :: listNodeCount
TYPE(a), POINTER :: head
INTEGER :: numNode
head => PacketCommArray(rank)%PacketListHeadPtr
numListNodes = PacketCommArray(rank)%numPacketsToComm
PRINT *, ' Number of nodes to allocate for rank ', rank , ' is ', numListNodes
ALLOCATE(listNodeTypes(2*numListNodes), stat=ierr)
ALLOCATE(listNodeCount(2*numListNodes), stat=ierr)
DO numNode=1, 2*numListNodes, 2
listNodeTypes(numNode) = MPI_INTEGER
listNodeTypes(numNode+1) = MPI_REAL
END DO
DO numNode=1, 2*numListNodes, 2
listNodeCount(numNode) = 1
listNodeCount(numNode+1) = 1
END DO
ALLOCATE(listNodeAddr(2*numListNodes), stat=ierr)
ALLOCATE(listNodeDispl(2*numListNodes), stat=ierr)
numNode = 1
DO WHILE(ASSOCIATED(head))
CALL MPI_GET_ADDRESS(head%b, listNodeAddr(numNode), ierr)
CALL MPI_GET_ADDRESS(head%e, listNodeAddr(numNode+1), ierr)
numNode = numNode + 2
head => head%nextPacketInList
END DO
DO numNode=1, UBOUND(listNodeAddr,1)
listNodeDispl(numNode) = listNodeAddr(numNode) - listNodeAddr(1)
END DO
CALL MPI_TYPE_CREATE_STRUCT(UBOUND(listNodeAddr,1), listNodeCount, listNodeDispl, listNodeTypes, packPacketDataType, ierr)
CALL MPI_TYPE_COMMIT(packPacketDataType, ierr)
END SUBROUTINE packPacketList
SUBROUTINE sendPacketList(rank)
IMPLICIT NONE
INTEGER :: rank, ierr, numNodes
TYPE(a), POINTER :: head
head => PacketCommArray(rank)%PacketListHeadPtr
numNodes = PacketCommArray(rank)%numPacketsToComm
CALL MPI_SSEND(head%b, 1, packPacketDataType, rank, 0, MPI_COMM_WORLD, ierr)
END SUBROUTINE sendPacketList
SUBROUTINE recvPacketList
IMPLICIT NONE
TYPE(a), POINTER :: head
TYPE(a), DIMENSION(:), ALLOCATABLE :: RecvPacketCommArray
INTEGER, DIMENSION(:), ALLOCATABLE :: recvB
INTEGER :: numNodes, ierr, numNode
INTEGER, DIMENSION(MPI_STATUS_SIZE):: status
head => PacketCommArray(rank)%PacketListHeadPtr
numNodes = PacketCommArray(rank)%numPacketsToComm
ALLOCATE(RecvPacketCommArray(numNodes), stat=ierr)
ALLOCATE(recvB(numNodes), stat=ierr)
CALL MPI_RECV(RecvPacketCommArray, 1, packPacketDataType, 0, 0, MPI_COMM_WORLD, status, ierr)
DO numNode=1, numNodes
PRINT *, ' value in b', RecvPacketCommArray(numNode)%b
PRINT *, ' value in e', RecvPacketCommArray(numNode)%e
END DO
END SUBROUTINE recvPacketList
END PROGRAM TEST
So basically I create a linked list with two nodes containing the following data
Node 1
b = 2, e = 4
Node 2
b = 3, e = 5
When I run this code on two cores, the results I get on core 1 are
value in b 2
value in e 4.000000
value in b 0
value in e 0.0000000E+00
So my code seems to send the data in the first node of the linked list correctly, but not the second one. Please could someone let me know if what I am trying to do is feasible, and what is wrong with the code. I know I can send the values of b in all nodes together and then the values of e together. But my derived data type will probably contain more variables (including arrays) and I want to be able to send all the data in one go instead of using multiple sends.
Thanks
It's not easy for me to read that code, but it seems like you are expecting the receiving buffer to get contiguous data, which is not the case. The strange type you construct by computing address offsets is not going to match the receiving buffer. To illustrate this, I though I might present this simple example (it's quickly written, don't take it as a good code example):
program example
use mpi
integer :: nprocs, myrank
integer :: buf(4)
integer :: n_elements
integer :: len_element(2)
integer(MPI_ADDRESS_KIND) :: disp_element(2)
integer :: type_element(2)
integer :: newtype
integer :: istat(MPI_STATUS_SIZE)
integer :: ierr
call mpi_init(ierr)
call mpi_comm_size(mpi_comm_world, nprocs, ierr)
call mpi_comm_rank(mpi_comm_world, myrank, ierr)
! simple example illustrating mpi_type_create_struct
! take an integer array buf(4):
! on rank 0: [ 7, 2, 6, 4 ]
! on rank 1: [ 1, 1, 1, 1 ]
! and we create a struct to send only elements 1 and 3
! so that on rank 1 we'll get [7, 1, 6, 1]
if (myrank == 0) then
buf = [7, 2, 6, 4]
else
buf = 1
end if
n_elements = 2
len_element = 1
disp_element(1) = 0
disp_element(2) = 8
type_element = MPI_INTEGER
call mpi_type_create_struct(n_elements, len_element, disp_element, type_element, newtype, ierr)
call mpi_type_commit(newtype, ierr)
write(6,'(1x,a,i2,1x,a,4i2)') 'SEND| rank ', myrank, 'buf = ', buf
if (myrank == 0) then
call mpi_send (buf, 1, newtype, 1, 13, MPI_COMM_WORLD, ierr)
else
call mpi_recv (buf, 1, newtype, 0, 13, MPI_COMM_WORLD, istat, ierr)
!the below call does not scatter the received integers, try to see the difference
!call mpi_recv (buf, 2, MPI_INTEGER, 0, 13, MPI_COMM_WORLD, istat, ierr)
end if
write(6,'(1x,a,i2,1x,a,4i2)') 'RECV| rank ', myrank, 'buf = ', buf
end program
I hope this clearly shows that the receiving buffer will have to accommodate any offsets in the constructed type, and does not receive any contiguous data.
EDIT: updated the code to illustrate a different receive type that does not scatter the data.
I want to apply three different methods, selected with the value of an integer switch. The first method uses two integers, the second a real array and an integer and the third a real 2D array. In my current implementation, I allocate and pass as parameters all the above data (2 int + real_array + int + real_2array). I could also use a module, but it would be similar. I'm searching for a method to define only the data that my method will use (i.e. only the matrix for method 3) and nothing else. Any suggestions?
Edit:
I have made a simplified version of what I described above.
A part of the main program:
INTEGER :: m, imeth
REAL*8 :: x, y
REAL*8, DIMENSION(:), ALLOCATABLE :: uu, wc
REAL*8, DIMENSION(:,:), ALLOCATABLE :: BCH
imeth = 0
m = 64
ALLOCATE(uu(m), uu_(m), wc(m))
ALLOCATE(BCH(m,m))
if (imeth .EQ. 0) then
x = 1.0d0
y = 2.0d0
elseif (imeth .EQ. 1) then
!Assign values to wc
else
!Assign values to BCH
endif
call subr(m,x,y,uu,uu_,imeth,BCH,wc)
STOP
END
and a subroutine
SUBROUTINE subr(n,a,b,u,u_,imeth,DCH,ws)
IMPLICIT NONE
INTEGER, INTENT(IN) :: n, imeth
REAL*8, INTENT(IN) :: u(n), DCH(n,n), ws(n)
REAL*8, INTENT(OUT) :: u_(n)
INTEGER :: i
if (imeth .EQ. 0) then
u_ = -u_ * 0.5d0 / (a+b)
elseif (imeth .EQ. 1) then
u_ = -u / ws
else
u_ = matmul(DCH,u)
endif
RETURN
END SUBROUTINE subr
I want the main program to have a form like
imeth = 0
m = 64
ALLOCATE(uu(m), uu_(m))
if (imeth .EQ. 0) then
a = 1.0d0
b = 2.0d0
elseif (imeth .EQ. 1) then
ALLOCATE(wc(m))
!Assign values to wc
else
ALLOCATE(BCH(m,m))
!Assign values to BCH
endif
if (imeth .EQ. 0) then
call subrA(m,x,y,uu,uu_)
elseif (imeth .EQ. 1) then
call subrB(m,wc,uu,uu_)
else
call subrC(m,BCH,uu,uu_)
endif
EDIT: After OP added the code I think that using optional arguments in conjunction with the present intrinsic might be better suited for this task. The subroutine could then read
SUBROUTINE subr(n,u_,a,b,u,DCH,ws)
IMPLICIT NONE
INTEGER, INTENT(IN) :: n
REAL*8, INTENT(OUT) :: u_(n)
REAL*8, INTENT(IN),OPTIONAL :: a(n)
REAL*8, INTENT(IN),OPTIONAL :: b(n)
REAL*8, INTENT(IN),OPTIONAL :: u(n)
REAL*8, INTENT(IN),OPTIONAL :: DCH(n,n)
REAL*8, INTENT(IN),OPTIONAL :: ws(n)
INTEGER :: i
if ( present(a) .and. present(b) ) then
u_ = -u_ * 0.5d0 / (a+b)
elseif ( present(u) .and. present(ws) ) then
u_ = -u / ws
elseif ( present(wch) .and. present(u) ) then
u_ = matmul(DCH,u)
else
stop 'invalid combination'
endif
END SUBROUTINE subr
Here is the old answer as it still might be helpful:
Maybe you could try interfaces:
module interface_test
implicit none
interface method
module procedure method1
module procedure method2
module procedure method3
end interface
contains
subroutine method1(int1, int2)
implicit none
integer,intent(in) :: int1
integer,intent(out) :: int2
int2 = 2*int1
end subroutine
subroutine method2(int, realArray)
implicit none
integer,intent(in) :: int
real,intent(out) :: realArray(:)
realArray = real(2*int)
end subroutine
subroutine method3(realArray)
implicit none
real,intent(inout) :: realArray(:,:)
realArray = 2*realArray
end subroutine
end module
program test
use interface_test, only: method
implicit none
integer :: int1, int2
real :: arr1D(10)
real :: arr2D(10,10)
int1 = 1
call method(int1, int2)
print *, int2
call method(int1,arr1D)
print *, arr1D(1)
arr2D = 1.
call method(arr2D)
print *, arr2D(1,1)
end program