The code below tries to finds the max number of an array using mpi. However I keep getting the following error:
Rank 2 in job 47 caused collective abort of all ranks.
Exit status of rank 2 : killed by signal 9
Can anyone please tell me what's wrong?
#include <stdio.h>
#include <stdlib.h>
#include "mpi.h"
int main(int argc , char * argv[])
{
int myRank , numOfProcesses;
int source , destination;
int tag = 0;
int i = 0, j = 0, k = 0;
int masterArray[] = {5,6,8,10,12,3,9,-1,3,7};
int max , globalMax = -100000;
int flag = 0;
MPI_Init(&argc, &argv);
MPI_Status status;
MPI_Comm_rank(MPI_COMM_WORLD , &myRank);
MPI_Comm_size(MPI_COMM_WORLD , &numOfProcesses);
printf("Process : %d \n" , myRank);
int masterSize = sizeof(masterArray)/sizeof(int);
//printf("%d \n" , masterSize);
int slaveSize = masterSize/(numOfProcesses-1);
//printf("%d \n" , slaveSize);
int slaveArray[slaveSize];
if (myRank == 0){
for (i=1; i<numOfProcesses; i++){
for (j=0; j<slaveSize; j++){
slaveArray[j] = masterArray[k];
// printf("%d \n" , masterArray[k]);
k++;
}
MPI_Send(slaveArray, slaveSize, MPI_INT, i, tag, MPI_COMM_WORLD);
}
for (i=1; i<numOfProcesses; i++){
MPI_Recv(max , 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
if (globalMax < max)
max = globalMax;
}
printf("Global Maximum %d \n" , globalMax);
}
else{
MPI_Recv(slaveArray , slaveSize, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
max = slaveArray[0];
for (i=0; i<slaveSize; i++){
if (slaveArray[i] > max)
max = slaveArray[i];
}
printf("Max in %d %d \n" , myRank, max);
MPI_Send(max , 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
}
MPI_Finalize();
return 0;
}
Sending and receiving messages in MPI always works through addresses. In the following:
MPI_Recv(max , 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
...
MPI_Send(max , 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
You use the value. You must add & to take the address.
You should also learn to use the appropriate collective operations: MPI_Scatter and MPI_Reduce.
By the way, this line is also in the wrong order:
max = globalMax;
Please also learn to listen to your compiler! Any reasonable compiler at resonable settings will warn you of passing an integer as an address.
Related
i am new to MPI , i have an array of 256 integer , i want to divide each number by 16 , I suggested to Scatter 32 element on each Processor but i couldn't gather them as each Return value contains array of 32
int globalhistogram[256];
float globalProb[256];
float* localprob = new float[32];
int localpixel[32];
MPI_Scatter(&globalhistogram, 32, MPI_INT, localpixel, 32, MPI_INT, 0, MPI_COMM_WORLD);
for (int i = 0; i < 32; i++)
{
localprob[i] = (float)localpixel[i] / 16;
}
MPI_Gather(localprob, 32, MPI_FLOAT, &globalprob, 32, MPI_FLOAT, 0, MPI_COMM_WORLD);
I don't understand the issue - the code appears to run correctly after I correct what I assume is a typo float globalProb[256] -> float globalprob[256].
I agree with #victor-eijkhout about the &globalprob issue but it doesn't appear to make a difference.
If I compile and run the appended code I get the expected answer:
dsh#laptop$ mpicxx -o play play.cpp
dsh#laptop$ mpirun -n 8 ./play
rank 0: globalprob[0] = 0.000000
...
rank 0: globalprob[31] = 31.000000
rank 0: globalprob[32] = 64.000000
...
rank 0: globalprob[255] = 2040.000000
Here's the full code:
#include <stdio.h>
#include <mpi.h>
int main(void)
{
int rank, size, i;
MPI_Init(NULL, NULL);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int globalhistogram[256];
float globalprob[256];
float* localprob = new float[32];
int localpixel[32];
for (i=0; i < 256; i++)
{
globalhistogram[i] = i;
}
MPI_Scatter(&globalhistogram, 32, MPI_INT, localpixel, 32, MPI_INT, 0, MPI_COMM_WORLD);
for (int i = 0; i < 32; i++)
{
localprob[i] = (float)localpixel[i] *(rank+1);
}
MPI_Gather(localprob, 32, MPI_FLOAT, &globalprob, 32, MPI_FLOAT, 0, MPI_COMM_WORLD);
if (rank == 0)
{
for (i=0; i < 256; i++)
{
printf("rank %d: globalprob[%d] = %f\n", rank, i, globalprob[i]);
}
}
MPI_Finalize();
}
I am writing code with mpi in c++, and I have done this:
#include <stdio.h>
#include "mpi.h"
#define NMAX 100
#define NTIMES 10
int main(int argc, char **argv)
{
int rank, size, i, n, lmax;
double time_start, time, bandwidth, max, a[NMAX];
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
time_start = MPI_Wtime();
n = 0;
max = 0.0;
lmax = 0;
while(n<=NMAX){
time_start = MPI_Wtime();
for(i = 0; i<NTIMES; i++){
if(rank==0){
MPI_Send(a, n, MPI_DOUBLE, 1, 0, MPI_COMM_WORLD);
MPI_Recv(a, n, MPI_DOUBLE, 1, 1, MPI_COMM_WORLD, &status);
}
if(rank==1){
MPI_Recv(a, n, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD, &status);
MPI_Send(a, n, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD);
}
}
time = (MPI_Wtime()-time_start)/(2*NTIMES);
bandwidth = (sizeof(double)*n*1.0/(1024*1024*time));
if(max<bandwidth){
max = bandwidth;
lmax = sizeof(double)*n;
}
if(rank==0)
if(n==0) printf("latency = %lf seconds\n", time);
else printf("%d bytes, bandwidth = %lf Mb/s\n", (int)sizeof(double)*n, bandwidth);
if(n==0) n = 1;
else n = 2*n;
}
if(rank==0) printf("max bandwidth = %lf Mb/s length = %d bytes\n", max, lmax);
MPI_Finalize();
}
It shows no errors, but when i am trying to run the code, this what i have..
[1]: https://i.stack.imgur.com/tl0HF.png
Maybe someone knows hov can i fix it?
I am learning MPI and I am trying to convert my MPI program from Point to Point Communication to MPI Collectives ..
Below is a fragment of my code for Matrix Multiplication using MPI Point to Point communication ...
int i;
if(rank == 0) {
for(i = 1; i < size; i++){
MPI_Send(&rows, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
MPI_Send(&columns, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
}
} else {
MPI_Recv(&rows, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
MPI_Recv(&columns, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
}
int local_block_size = rows / size;
int process, column_pivot;
if(rank == 0) {
for(i = 1; i < size; i++){
MPI_Send((matrix_1D_mapped + (i * (local_block_size * rows))), (local_block_size * rows), MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
MPI_Send((rhs + (i * local_block_size)), local_block_size, MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
}
for(i = 0; i < local_block_size * rows; i++){
matrix_local_block[i] = matrix_1D_mapped[i];
}
for(i = 0; i < local_block_size; i++){
rhs_local_block[i] = rhs[i];
}
} else {
MPI_Recv(matrix_local_block, local_block_size * rows, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
MPI_Recv(rhs_local_block, local_block_size, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
}
I am thinking about replacing MPI_Send with MPI_Bcast ... will that be the correct approach ?
For the first communication that data sent to all receivers is in fact identical, thus MPI_Bcast is the correct approach. The second communication distributes different chunks of a larger array to the recipients, this is done as a collective with MPI_Scatter. Note that scatter includes the root rank in the communication, so you can omit the manual local copy.
I was trying to install and run MS-MPI following this tutorial. I have installed MS-MPI and all my system variables are set correctly, see:
I have set all links in VS
Having these linked to the project, I would expect MPI to work. In IDE no syntax errors are shown, MPI functions are recognized, just as in next picture. However compiling an c++ source file with MPI functions produces Undeclared identifiers errors. What do I do wrong?
Here is my code if it matters
/*
* Transmit a message in a 3-process system.
*/
#include <mpi.h>
#include "stdafx.h"
#include <stdio.h>
#include <stdlib.h>
#define BUFSIZE 10
int main(int argc, char *argv[])
{ int size, rank;
int slave;
int buf[BUFSIZE];
int n, value;
float rval;
MPI_Status status;
/* Initialize MPI */
MPI_Init(&argc, &argv);
/*
* Determine size in the world group.
*/
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (size == 3) {/* Correct number of processes *}
/*
* Determine my rank in the world group.
* The master will be rank 0 and the slaves, rank 1...size-1
*/
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if (rank == 0) { /* Master */
buf[0] = 5; buf[1] = 1; buf[2] = 8; buf[3] = 7; buf[4] = 6;
buf[5] = 5; buf[6] = 4; buf[7] = 2; buf[8] = 3; buf[9] = 1;
printf("\n Sending the values {5,1,8,7,6,5,4,2,3,1}");
printf("\n -----------------------------");
for (slave = 1; slave < size; slave++) {
printf("\n from master %d to slave %d", rank, slave);
MPI_Send(buf, 10, MPI_INT, slave, 1, MPI_COMM_WORLD);
}
printf("\n\n Receiving the results from slaves");
printf("\n ---------------------------------");
MPI_Recv(&value, 1, MPI_INT, 1, 11, MPI_COMM_WORLD, &status);
printf("\n Minimum %4d from slave 1", value);
MPI_Recv(&value, 1, MPI_INT, 2, 21, MPI_COMM_WORLD, &status);
printf("\n Sum %4d from slave 2", value);
MPI_Recv(&value, 1, MPI_INT, 1, 12, MPI_COMM_WORLD, &status);
printf("\n Maximum %4d from slave 1", value);
MPI_Recv(&rval, 1, MPI_FLOAT, 2, 22, MPI_COMM_WORLD, &status);
printf("\n Average %4.2f from slave 2\n", rval);
}
else {
if (rank == 1) { /* minmax slave */
MPI_Recv(buf, 10, MPI_INT, 0, 1, MPI_COMM_WORLD, &status);
value = 100;
for (n = 0; n<BUFSIZE; n++) {
if (value>buf[n]) { value = buf[n]; }
}
MPI_Send(&value, 1, MPI_INT, 0, 11, MPI_COMM_WORLD);
value = 0;
for (n = 0; n<BUFSIZE; n++) {
if (value<buf[n]) { value = buf[n]; }
}
MPI_Send(&value, 1, MPI_INT, 0, 12, MPI_COMM_WORLD);
}
else { /* sumave slave */
MPI_Recv(buf, 10, MPI_INT, 0, 1, MPI_COMM_WORLD, &status);
value = 0;
for (n = 0; n<BUFSIZE; n++) {
value = value + buf[n];
}
MPI_Send(&value, 1, MPI_INT, 0, 21, MPI_COMM_WORLD);
rval = (float)value / BUFSIZE;
MPI_Send(&rval, 1, MPI_FLOAT, 0, 22, MPI_COMM_WORLD);
}
}
}
MPI_Finalize();
return(0);
}
After a time I have found where was the bug. Everything is fine with my setting, problem is only with the .cpp extension. Changing it to .c project helped and all works as expected.
If I want to run it as c++, #include stdafx.h bust take place before mpi
I am trying to simply sum up all variables called "train_hr" and "test_hr" from all 10 processors and store and print the sum on processor 0. I checked to make sure the individual sums are NOT 0 (they are not, they are all in the 1000s). The sum it keeps reporting is 0. I have no idea why. I have looked at many examples of this, and I have done it exactly as instructed. Any help would be appreciated.
double train_hr = 0, test_hr = 0;
double train_hr_global = 0, test_hr_global = 0;
//Master processor
if (my_rank == 0) {
// sends a task to each processor
int curr_task = 0;
for(i = 0; i < num_procs; i++) {
if (curr_task < nsamples_all) {
MPI_Send(&curr_task, 1, MPI_INT, i, 1, MPI_COMM_WORLD);
curr_task++;
}
}
int r;
MPI_Status status;
//keeps sending tasks to processors until there are no more tasks
while (curr_task < nsamples_all) {
MPI_Recv(&r, 1, MPI_INT, MPI_ANY_SOURCE, 1, MPI_COMM_WORLD, &status);
MPI_Send(&curr_task, 1, MPI_INT, status.MPI_SOURCE, 1, MPI_COMM_WORLD);
curr_task++;
}
//tell all processors to stop receiving
int a = -1;
for (i = 0; i < num_procs; i++) {
MPI_Send(&a, 1, MPI_INT, i, 1, MPI_COMM_WORLD);
}
}
//Helper processors
else {
int stop = 1;
while(stop != 0){
int i;
//Receives task OR stop alert from master
MPI_Status status;
MPI_Recv(&i, 1, MPI_INT, 0, 1, MPI_COMM_WORLD, &status);
if (i == -1) {
stop = 0;
}
//computations
else{
float r;
//unimportant computations here
train_hr += r;
test_hr += r;
//Tells master processor it is done
MPI_Send(&i, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
}
}
}
//At this point I checked the current values of train_hr and test_hr on each helper processor. They are all non-zero.
MPI_Reduce(&train_hr, &train_hr_global, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce(&test_hr, &test_hr_global, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
//at this point, the vales of train_hr_global and test_hr_global on the master processor (processor 0) are 0 when they should be the sum of all the processors values.
}