No specific subroutine for ‘mpi_scatter’ - fortran

I'm starting with MPI in Fortran and try to make an scatter from an array. This is the code:
program test_scatter
use mpi
implicit none
integer :: ierr, rank, size, comm, i, j
integer, parameter :: dim = 5, dim_nodos = 4
real, dimension(dim, dim) :: panel_pos
real, dimension(dim_nodos) :: nodos
real :: rev_buf
forall(i = 1:dim_nodos) nodos(i) = i
comm = MPI_COMM_WORLD
call MPI_INIT(ierr)
call MPI_COMM_SIZE(comm, size, ierr)
call MPI_COMM_RANK(comm, rank, ierr)
call MPI_Bcast(panel_pos, dim*dim, MPI_REAL, 1, comm, ierr)
call MPI_Scatter(nodos, 1, MPI_REAL, rev_buf, 1, 1, comm, ierr)
print *, panel_pos, 'from rank', rank
! Finalizar MPI
call MPI_FINALIZE(ierr)
end program test_scatter
I try to compile using:
mpif90 test_scatter.F90 -o test_scatter.e
But I get this error:
call MPI_Scatter(nodos, 1, MPI_REAL, rev_buf, 1, 1, comm, ierr)
1
Error: There is no specific subroutine for the generic ‘mpi_scatter’ at (1)
I have Debian testing. The system is up to date. I install OpenMPI like:
$ sudo apt-get install openmpi-bin openmpi-common openmpi-doc libopenmpi-dev
What did I do wrong?

As noticed in the comments, you need to additionally specify the MPI_Datatype of the receive buffer:
call MPI_Scatter(nodos, 1, MPI_REAL, rev_buf, 1, MPI_REAL, 1, comm, ierr)

Related

MPI Write taking too much memory at the master node

I have a DNS (Direct Numerical Simulations) code that has to handle large amounts of data. While running the code, the code is able to read from a file using the MPI_File_read_all, but after a few iterations, when I want to write out the data using MPI_File_write_all I get a memory error (signal 9). When I used Valgrind to memory profile, I see that the master node uses more memory than the rest of the nodes, even though there are no data operations that are exclusive to the master node.
This is the read subroutine
subroutine read3DField(filename,vars)
! Read binary data file using parallel MPI-IO
use commons
include 'sphere.cmn'
character(len=8), intent(in) :: filename ! File name
integer :: ierr
real vars(nxm:nxp,nym:nyp,nzm:nzp,ns+6)
type(MPI_File) :: fh
type(MPI_Status) :: status
integer(kind=MPI_OFFSET_KIND) :: disp
write (*, *) 'Entered read', cartcoord(1)
call MPI_File_open(MPI_COMM_WORLD, filename, &
MPI_MODE_RDONLY, MPI_INFO_NULL, fh, ierr)
write (*, *) 'File opened by ', id
write(*,*) ierr
disp = 0
call MPI_File_set_view(fh, disp, MPI_dp, type_global, &
'native', MPI_INFO_NULL, ierr)
write (*, *) 'view set', id
write(*,*) ierr
call MPI_File_read_all(fh, vars, 1, type_local, status, ierr)
write (*, *) 'read data', id
write(*,*) ierr
call MPI_File_close(fh, ierr)
write(*,*) ierr
end subroutine read3DField
this is the write subroutine
subroutine write3DField(filename,vars)
! Write binary data file using parallel MPI-IO
use commons
character(len=8), intent(in) :: filename ! File name
integer :: ierr
! Array to write
real vars(nxm:nxp,nym:nyp,nzm:nzp,ns+6)
type(MPI_File) :: fh
type(MPI_Status) :: status
integer(kind=MPI_OFFSET_KIND) :: disp
call MPI_File_open(MPI_COMM_WORLD, filename, &
IOR(MPI_MODE_WRONLY, MPI_MODE_CREATE), MPI_INFO_NULL, fh, ierr)
disp = 0
call MPI_File_set_view(fh, disp, MPI_dp, type_global, &
'native', MPI_INFO_NULL, ierr)
call MPI_File_write_all(fh, vars, 1, type_local, status, ierr)
call MPI_File_close(fh, ierr)
end subroutine write3DField
type_global and type_local are MPI data types that are initialized previously with appropriate offsets and committed using MPI_commit.
My question is where is this "excess" memory coming from and how to remove it.

Use MPI_Send and MPI_Recv between Parent and Child process created with MPI_Comm_spawn

I am trying to use MPI_Send() and MPI_Recv() to communicate between a child and its parent process, created by using MPI_Comm_spawn as can be seen below:
Parent.f90
program master
use mpi
implicit none
integer :: ierr, num_procs, my_id, intercomm, i, array(10), tag
CALL MPI_INIT(ierr)
CALL MPI_COMM_RANK(MPI_COMM_WORLD, my_id, ierr)
CALL MPI_COMM_SIZE(MPI_COMM_WORLD, num_procs, ierr)
if (.not. (ierr .eq. 0)) then
print*, "S.Unable to initilaize!"
stop
endif
if (my_id .eq. 0) then
call MPI_Comm_spawn("./child.out", MPI_ARGV_NULL, 1, MPI_INFO_NULL, my_id, &
& MPI_COMM_WORLD, intercomm, MPI_ERRCODES_IGNORE, ierr)
call MPI_Send(array, 255, MPI_INTEGER, my_id, tag, intercomm, ierr)
endif
call MPI_Finalize(ierr)
end program master
Child.f90
program name
use mpi
implicit none
! type declaration statements
integer :: ierr, parent, my_id, n_procs, i, array(10), tag, intercomm
logical :: flag, high
! executable statements
call MPI_Init(ierr)
call MPI_Initialized(flag, ierr)
call MPI_Comm_get_parent(parent, ierr)
call MPI_Comm_rank(MPI_COMM_WORLD, my_id, ierr)
call MPI_Comm_size(MPI_COMM_WORLD, n_procs, ierr)
print *, "Initilaized? ", flag
print *, "My mommy is: ", parent
print *, "My rank is:", my_id
tag = 1
call MPI_Recv(array, 255, MPI_INTEGER, MPI_ANY_SOURCE, tag, parent, MPI_STATUS_IGNORE, ierr)
print *, "Client received array."
call MPI_Finalize(ierr)
end program name
When the above program is run, the Parent seem to run through fine, but the Child never prints: "Client received array.", leading me to believe that I have messed something up with the send/recv.
If it is not clear what I am trying to achieve, I want the parent to spawn a child, send an array to that child, the child to process the array and the child to send the array back to the parent. (italics is yet to be written, I want to get this basic communication working first)
At the moment, when I run: mpiexec -np 1 parent.out, the child prints:
Initilaized? T
My mommy is: 3
My rank is: 0
but not "Client received array."
I was able to solve my issue. The following code starts a parent, sends an array of size 1000000 to its child, the child squares the array and sends it back to its parent.
Parent.f90
program master
use mpi
implicit none
integer :: ierr, num_procs, my_id, intercomm, i, array(1000000), s_tag, s_dest, siffra
CALL MPI_INIT(ierr)
CALL MPI_COMM_RANK(MPI_COMM_WORLD, my_id, ierr)
CALL MPI_COMM_SIZE(MPI_COMM_WORLD, num_procs, ierr)
!print *, "S.Rank =", my_id
!print *, "S.Size =", num_procs
if (.not. (ierr .eq. 0)) then
print*, "S.Unable to initilaize bös!"
stop
endif
do i=1,size(array)
array(i) = 2
enddo
if (my_id .eq. 0) then
call MPI_Comm_spawn("./client2.out", MPI_ARGV_NULL, 1, MPI_INFO_NULL, my_id, &
& MPI_COMM_WORLD, intercomm, MPI_ERRCODES_IGNORE, ierr)
s_dest = 0 !rank of destination (integer)
s_tag = 1 !message tag (integer)
call MPI_Send(array(1), 1000000, MPI_INTEGER, s_dest, s_tag, intercomm, ierr)
call MPI_Recv(array(1), 1000000, MPI_INTEGER, s_dest, s_tag, intercomm, MPI_STATUS_IGNORE, ierr)
!do i=1,10
! print *, "S.Array(",i,"): ", array(i)
!enddo
endif
call MPI_Finalize(ierr)
end program master
Child.f90
program name
use mpi
implicit none
! type declaration statements
integer :: ierr, parent, my_id, n_procs, i, array(1000000), ctag, csource, intercomm, siffra
logical :: flag
! executable statements
call MPI_Init(ierr)
call MPI_Initialized(flag, ierr)
call MPI_Comm_get_parent(parent, ierr)
call MPI_Comm_rank(MPI_COMM_WORLD, my_id, ierr)
call MPI_Comm_size(MPI_COMM_WORLD, n_procs, ierr)
csource = 0 !rank of source
ctag = 1 !message tag
call MPI_Recv(array(1), 1000000, MPI_INTEGER, csource, ctag, parent, MPI_STATUS_IGNORE, ierr)
!do i=1,10
! print *, "C.Array(",i,"): ", array(i)
!enddo
do i=1,size(array)
array(i) = array(i)**2
enddo
!do i=1,10
! print *, "C.Array(",i,"): ", array(i)
!enddo
call MPI_Send(array(1), 1000000, MPI_INTEGER, csource, ctag, parent, ierr)
call MPI_Finalize(ierr)
end program name

Creating HDF5 File and datasets with OpenMPI

I need to write my HDF5 datasets to one HDF5 file in parallel and I want to create my file just with one thread and to do this I can use if statements like:
if( currentThread == 0)
{
createHDF5File( );
}
But I don't know which thread will come first. For example, when thread 1 comes first then it tries to write a dataset to a non-existing file. Is there any way to select the first thread? Or is there any better way to do this?
It sounds like you really should be using parallel IO with HDF5. HDF5 is capable of using MPI-IO (under the hood), if you built it with parallel support.
Here's a sample program (in Fortran).
! Program to use MPI_Cart and Parallel HDF5
!
program hdf_pwrite
use, intrinsic :: iso_c_binding, only: c_double
use mpi
use hdf5
use kinds, only : r_dp
implicit none
! external interface
interface
subroutine get_walltime(t) &
bind(c, name="get_walltime")
import :: c_double
real(kind=c_double), intent(out) :: t
end subroutine get_walltime
end interface
! Local 4000x4000 with a 1x1 halo
integer, parameter :: ndims = 2
integer, parameter :: N = 4000
integer, parameter :: halo = 1
integer :: argc ! Command line args
integer :: ierr ! Error status
integer :: id ! My rank/ID
integer :: np ! Number of processors
integer :: iunit ! File descriptor
integer :: i,j ! Loop indexers
integer :: total ! Total dimension size
integer :: lcount ! Luster count size
integer :: lsize ! Lustre stripe size
character(len=1024) :: clcount, clsize ! Strings of LFS
integer :: info ! MPI IO Info
integer :: m_dims(ndims) ! MPI cart dims
integer :: coords(ndims) ! Co-ords of procs in the grid
logical :: is_periodic(ndims) ! Periodic boundary conditions
logical :: reorder ! Reorder the MPI structure
integer :: MPI_COMM_2D ! New communicator
integer(KIND=MPI_OFFSET_KIND) :: offset
character(len=1024) :: filename
integer(kind=hid_t) :: p_id, f_id, x_id, d_id
integer(kind=hid_t) :: memspace, filespace
! Local hyper slab info
integer(kind=hsize_t) :: d_size(ndims), s_size(ndims), h_size(ndims),&
stride(ndims), block(ndims)
! Global hyper slab info
integer(kind=hsize_t) :: g_size(ndims), g_start(ndims)
real(kind=r_dp), allocatable :: ld(:,:)
! Timing vars
real(kind=r_dp) :: s, e, dt, mdt
argc = 0
ierr = 0
offset = 0
m_dims = (/ 0, 0/)
is_periodic = .false. ! Non-periodic
reorder = .false. ! Not allowed to reorder
call mpi_init(ierr)
! Set up the MPI cartesian topology
call mpi_comm_size(MPI_COMM_WORLD, np, ierr)
call mpi_dims_create(np, ndims, m_dims, ierr)
call mpi_cart_create(MPI_COMM_WORLD, ndims, m_dims, is_periodic, &
reorder, MPI_COMM_2D, ierr)
call mpi_comm_rank(MPI_COMM_2D, id, ierr)
call mpi_cart_coords(MPI_COMM_2D, id, ndims, coords, ierr)
if (id .eq. 0) then
if (mod(N,np) .ne. 0) then
write(0,*) 'Must use divisiable number of procs.'
call mpi_abort(MPI_COMM_WORLD, 1, ierr)
endif
! get the filename
argc = iargc()
if (argc .lt. 1 ) then
write(0, *) 'Must supply a filename'
call exit(1)
endif
call get_command_argument(1, filename)
endif
! Broadcast the filename
call mpi_bcast(filename, len(filename), MPI_CHAR, 0, &
MPI_COMM_WORLD, ierr)
! Init the HDF5 library
call h5open_f(ierr)
! Set a stripe count of 4 and a stripe size of 4MB
lcount = 4
lsize = 4 * 1024 * 1024
write(clcount, '(I4)') lcount
write(clsize, '(I8)') lsize
call mpi_info_create(info, ierr)
call mpi_info_set(info, "striping_factor", trim(clcount), ierr)
call mpi_info_set(info, "striping_unit", trim(clsize), ierr)
! Set up the access properties
call h5pcreate_f(H5P_FILE_ACCESS_F, p_id, ierr)
call h5pset_fapl_mpio_f(p_id, MPI_COMM_2D, info, ierr)
! Open the file
call h5fcreate_f(filename, H5F_ACC_TRUNC_F, f_id, ierr, &
access_prp = p_id)
if (ierr .ne. 0) then
write(0,*) 'Unable to open: ', trim(filename), ': ', ierr
call mpi_abort(MPI_COMM_WORLD, 1, ierr)
endif
! Generate our 4000x4000 matrix with a 1x1 halo
total = N + 2 * halo
allocate(ld(0:total-1, 0:total-1))
ld = -99.99
! init the local data
do j = 1, N
do i = 1, N
ld(i,j) = (i - 1 + (j-1)*N)
enddo
enddo
! Create the local memory space and hyperslab
do i = 1, ndims
d_size(i) = total
s_size(i) = N
h_size(i) = halo
stride(i) = 1
block(i) = 1
enddo
call h5screate_simple_f(ndims, d_size, memspace, ierr)
call h5sselect_hyperslab_f(memspace, H5S_SELECT_SET_F, &
h_size, s_size, ierr, &
stride, block)
! Create the global file space and hyperslab
do i = 1, ndims
g_size(i) = N * m_dims(i)
g_start(i) = N * coords(i)
enddo
call h5screate_simple_f(ndims, g_size, filespace, ierr)
call h5sselect_hyperslab_f(filespace, H5S_SELECT_SET_F, &
g_start, s_size, ierr, &
stride, block)
! Create a data transfer property
call h5pcreate_f(H5P_DATASET_XFER_F, x_id, ierr)
call h5pset_dxpl_mpio_f(x_id, H5FD_MPIO_COLLECTIVE_F, ierr)
! Create the dataset id
call h5dcreate_f(f_id, "/data", H5T_IEEE_F64LE, filespace, d_id, &
ierr)
! Write the data
call get_walltime(s)
call h5dwrite_f(d_id, H5T_NATIVE_DOUBLE, ld, s_size, ierr, &
file_space_id=filespace, mem_space_id=memspace, &
xfer_prp=x_id)
call get_walltime(e)
dt = e - s
call mpi_reduce(dt, mdt, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_2D, ierr)
if (id .eq. 0) then
write(6,*) mdt / np
endif
if (allocated(ld)) then
deallocate(ld)
endif
! Close everything and exit
call h5dclose_f(d_id, ierr)
call h5sclose_f(filespace, ierr)
call h5sclose_f(memspace, ierr)
call h5pclose_f(x_id, ierr)
call h5pclose_f(p_id, ierr)
call h5fclose_f(f_id, ierr)
call h5close_f(ierr)
call mpi_finalize(ierr)
end program hdf_pwrite
Please note this is my teaching example that I interactively get the class to play with. So there are a few different things in it.
I introduce the iso_c_binding as we have a timing routine in C (gettimeofday) wrapper.
I use MPI topologies.
The root rank is the only one that processes the filename to write, and then we broadcast this to all ranks.
We set a stripe count and size for the lustre file system.
Use hyper slabs for the data placement.
Use MPI IO collective call.
Hope this helps.
Do you calculate the data you want to write in parallel? If so, you want to make sure that all workers have finished their processing before you write, so that your data is in fact complete.
In other words,
// Collect all the data using some form of MPI_Collect, MPI_Reduce
// or whatevs. I'll just put this here for proof-of-concept
MPI_Barrier();
// Now, all the threads have "joined", so you can write from 0 without worrying
// that some other thread got here way before
if (currentThread == 0) { createdHDF5File(); }
If not, I assume you want to write data from each thread. Why not just write it to different files?
// Calculate stuff on each thread
// Then write to different files depending on thread num
createHDF5File(currentThread); // Chooses file name that includes the thread num

Block execution until children called via MPI_Comm_spawn have finished, no access to children

I would like to spawn the child.exe from the main.f90 code, and I don't have access to modify the child.f90. A simplified code looks like:
main.f90:
program main
implicit none
include 'mpif.h'
integer, parameter:: root = 0
integer, dimension(:), allocatable:: errCodes, status
integer:: numprocs, request, ierr, taskid, INTERCOMM
character*(*), PARAMETER:: cmd = "./child.exe"
!
call mpi_init(ierr)
call mpi_comm_rank(mpi_comm_world, taskid, ierr)
call mpi_comm_size(mpi_comm_world, numprocs, ierr )
call mpi_barrier(mpi_comm_world, ierr)
!
allocate( errCodes(numprocs), status(numprocs))
print*, "from parent, CPU: ", taskid
call MPI_COMM_SPAWN(cmd, MPI_ARGV_NULL, 2, MPI_INFO_NULL, root, MPI_COMM_WORLD,INTERCOMM, errCodes, ierr)
!
call mpi_barrier(mpi_comm_world, ierr)
!
print*, "After children, from parent CPU", taskid
call mpi_finalize(ierr)
end program main
child.f90:
program child
implicit none
include 'mpif.h'
integer:: numprocs, temp, ierr, arg_count, taskid, maxproces, errCodes(2)
!
call mpi_init(ierr)
call mpi_comm_rank(mpi_comm_world, taskid, ierr)
call mpi_comm_size(mpi_comm_world, numprocs, ierr )
!
call sleep(1)
!
print*, "from child, CPU: ", taskid
call mpi_finalize(ierr)
end program child
My question is similar to this one in C++, but I would appreciate it if someone could clarify how to do the process if I don't have access to the children. I have tried to use mpi_barrier, but it is not helpful.
For me the Hristo's advice from the linked answer works quite well, just follow it closely. Pay attention to the communicators (INTERCOMM, parent). There is still some problem at the end, I have never worked with child processes before, but it should point you to the right direction.
program main
use mpi
implicit none
integer, parameter:: root = 0
integer, dimension(:), allocatable:: errCodes, status
integer:: numprocs, request, ierr, taskid, INTERCOMM
character*(*), PARAMETER:: cmd = "./child.exe"
!
call mpi_init(ierr)
call mpi_comm_rank(mpi_comm_world, taskid, ierr)
call mpi_comm_size(mpi_comm_world, numprocs, ierr )
call mpi_barrier(mpi_comm_world, ierr)
!
allocate( errCodes(numprocs), status(numprocs))
print*, "from parent, CPU: ", taskid
call MPI_COMM_SPAWN(cmd, MPI_ARGV_NULL, 1, MPI_INFO_NULL, root, MPI_COMM_WORLD, INTERCOMM, errCodes, ierr)
!
call mpi_barrier(INTERCOMM, ierr)
!
call mpi_barrier(INTERCOMM, ierr)
print*, "After children, from parent CPU", taskid
call mpi_finalize(ierr)
end program main
program child
use mpi
implicit none
integer:: numprocs, temp, ierr, arg_count, taskid, maxproces, errCodes(2), parent
!
call mpi_init(ierr)
call mpi_Comm_get_parent(parent, ierr)
call mpi_comm_rank(parent, taskid, ierr)
call mpi_comm_size(parent, numprocs, ierr )
!
call sleep(5)
call mpi_barrier(parent, ierr)
!
print*, "from child, CPU: ", taskid
call sleep(1)
call mpi_barrier(parent, ierr)
! call sleep(2)
call mpi_finalize(ierr)
end program child

MPI_ALLGATHER error parallelizing code

I'm trying to parallelize the following code.
subroutine log_likelihood(y, theta, lli, ll)
doubleprecision, allocatable, intent(in) :: y(:)
doubleprecision, intent(in) :: theta(2)
doubleprecision, allocatable, intent(out) :: lli(:)
doubleprecision, intent(out) :: ll
integer :: i
ALLOCATE (lli(size(y)))
lli = 0.0d0
ll = 0.0d0
do i = 1, size(y)
lli(i) = -log(sqrt(theta(2))) - 0.5*log(2.0d0*pi) &
- (1.0d0/(2.0d0*theta(2)))*((y(i)-theta(1))**2)
end do
ll = sum(lli)
end subroutine log_likelihood
To do this, I'm trying to use MPI_ALLGATHER. This is the code I wrote
subroutine log_likelihood(y, theta, lli, ll)
doubleprecision, allocatable, intent(in) :: y(:)
doubleprecision, intent(in) :: theta(2)
doubleprecision, allocatable, intent(out) :: lli(:)
doubleprecision, intent(out) :: ll
integer :: i, size_y, diff
size_y=size(y)
ALLOCATE (lli(size_y))
!Broadcasting
call MPI_BCAST(theta, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
call MPI_BCAST(y, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
! Determine how many points to handle with each proc
points_per_proc = (size_y + numprocs - 1)/numprocs
! Determine start and end index for this proc's points
istart = proc_num * points_per_proc + 1
iend = min((proc_num + 1)*points_per_proc, size_y)
diff = iend-istart+1
allocate(proc_contrib(istart:iend))
do i = istart, iend
proc_contrib(i) = -log(sqrt(theta(2))) - 0.5*log(2.0d0*pi) &
- (1.0d0/(2.0d0*theta(2)))*((y(i)-theta(1))**2)
end do
call MPI_ALLGATHER(proc_contrib, diff, MPI_DOUBLE_PRECISION, &
lli, diff, MPI_DOUBLE_PRECISION, &
MPI_COMM_WORLD, ierr)
ll = sum(lli)
end subroutine log_likelihood
When I try to run my program, I get the following error.
$ mpiexec -n 2 ./mle.X
Fatal error in PMPI_Allgather: Internal MPI error!, error stack:
PMPI_Allgather(961)......: MPI_Allgather(sbuf=0x7ff2f251b860, scount=1500000, MPI_DOUBLE_PRECISION, rbuf=0x7ff2f2ad5650, rcount=3000000, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD) failed
MPIR_Allgather_impl(807).:
MPIR_Allgather(766)......:
MPIR_Allgather_intra(560):
MPIR_Localcopy(357)......: memcpy arguments alias each other, dst=0x7ff2f2ad5650 src=0x7ff2f251b860 len=12000000
===================================================================================
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= EXIT CODE: 1
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================
Can somebody please explain to me what I'm doing wrong?
Thanks!
I finally was able to solve my problem. The serial and parallel versions of my code are available at https://bitbucket.org/ignacio82/bhhh
subroutine log_likelihood(y, n, theta, lli, ll)
integer, intent(in) :: n
doubleprecision, intent(in) :: y(n)
doubleprecision, intent(in) :: theta(2)
doubleprecision, intent(out) :: lli(n)
doubleprecision, intent(out) :: ll
integer :: i
do i = istart, iend
proc_contrib(i-istart+1) = -log(sqrt(theta(2))) - 0.5*log(2.0d0*pi) &
- (1.0d0/(2.0d0*theta(2)))*((y(i)-theta(1))**2)
end do
if ( mod(n,numprocs)==0 ) then
call MPI_ALLGATHER(proc_contrib, points_per_proc, MPI_DOUBLE_PRECISION, &
lli, points_per_proc, MPI_DOUBLE_PRECISION, &
MPI_COMM_WORLD, ierr)
else if (numprocs-1 == proc_num ) then
recvcounts(numprocs) = iend-istart+1
call MPI_ALLGATHERV(proc_contrib, points_per_proc, MPI_DOUBLE_PRECISION, &
lli, recvcounts, displs, MPI_DOUBLE_PRECISION, &
MPI_COMM_WORLD, ierr)
end if
ll = sum(lli)
end subroutine log_likelihood