This is my first time working with MPI. I am wrking on a piece of code which does matrix multiplication using vectors and MPI. My program compiles successfully but when I run the program I keep getting errors.Can any one help me on why I am getting this errors. TIA
* An error occurred in MPI_Recv
reported by process [139967236014081,139964394242049]
on communicator MPI_COMM_WORLD
MPI_ERR_RANK: invalid rank
MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort,
* and potentially your MPI job)
/**********************************************************************
* MPI-based matrix multiplication AxB=C
*********************************************************************/
#include<ctime>
#include<iostream>
#include<time.h>
#include <vector>
#include <cstdlib>
#include "mpi.h"
#define N 10 /* number of rows and columns in matrix */
using namespace std;
MPI_Status status;
//double a[N][N],b[N][N],c[N][N];
int main(int argc, char *argv[])
{
int numtasks,taskid,numworkers,source,dest,rows,offset,i,j,k;
int averow, extra;
int loop = 1000;
double randomNum;
vector<vector<double> > a;
vector<vector<double> > b;
vector<vector<double> > c;
vector<vector<double> > avg_Matrix;
a.resize(N);
for (int i = 0; i < N; ++i)
a[i].resize(N);
b.resize(N);
for (int i = 0; i < N; ++i)
b[i].resize(N);
c.resize(N);
for (int i = 0; i < N; ++i)
c[i].resize(N);
avg_Matrix.resize(N);
for (int i = 0; i < N; ++i)
avg_Matrix[i].resize(N);
srand ( time(NULL) ); //Initializing random seed.
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &taskid);
MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
//cout<<"Number of works"<<numworkers;
//cout<<"taskid"<<taskid;
numworkers = numtasks-1;
/*---------------------------- master ----------------------------*/
if (taskid == 0) {
for (i=0; i<N; i++) {
for (j=0; j<N; j++) {
randomNum = rand() % 400 + (-199.999); // generate randomnumbers between -199.999 to 199.999 for first matrix.
a[i][j] = randomNum;
}
}
cout<<"Printing First Matrix"<<endl;
for(i=0; i< N; i++)
{
cout<<endl;
for(j=0; j< N; j++)
{
cout<<a[i][j];
cout<<"\t";
}
}
/* second matrix. */
for(i=0; i< N; i++)
{
for(j=0; j< N; j++)
{
randomNum = rand() % 400 + (-199.999); // generate randomnumbers between -199.999 to 199.999 for first matrix.
b[i][j] = randomNum;
}
}
cout<<endl;
cout<<endl;
/* Printing the second Matrix*/
cout<<"Printing Second Matrix"<<endl;
for(i=0; i< N; i++)
{
cout<<endl;
for(j=0; j< N; j++)
{
cout<<b[i][j];
cout<<"\t";
}
}
/* send matrix data to the worker tasks */
averow = N/numworkers;
extra = N%numworkers;
offset = 0;
for (dest=1; dest<=numworkers; dest++)
{
rows = (dest <= extra) ? averow+1 : averow;
MPI_Send(&offset, 1, MPI_INT, dest, 1, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, dest, 1, MPI_COMM_WORLD);
MPI_Send(&a[offset][0], rows*N, MPI_DOUBLE,dest,1, MPI_COMM_WORLD);
MPI_Send(&b, N*N, MPI_DOUBLE, dest, 1, MPI_COMM_WORLD);
offset = offset + rows;
}
/* wait for results from all worker tasks */
if(taskid == 0) {
for (i=1; i<=numworkers; i++)
{
source = i;
MPI_Recv(&offset, 1, MPI_INT, source, 2, MPI_COMM_WORLD, &status);
MPI_Recv(&rows, 1, MPI_INT, source, 2, MPI_COMM_WORLD, &status);
MPI_Recv(&c[offset][0], rows*N, MPI_DOUBLE, source, 2, MPI_COMM_WORLD, &status);
}
}
cout<<"Here is the result matrix:"<<endl;
for (i=0; i<N; i++) {
for (j=0; j<N; j++)
cout<<"\t"<<c[i][j];
cout<<endl;
}
}
/*---------------------------- worker----------------------------*/
if (taskid > 0) {
source = 0;
MPI_Recv(&offset, 1, MPI_INT, source, 1, MPI_COMM_WORLD, &status);
MPI_Recv(&rows, 1, MPI_INT, source, 1, MPI_COMM_WORLD, &status);
MPI_Recv(&a, rows*N, MPI_DOUBLE, source, 1, MPI_COMM_WORLD, &status);
MPI_Recv(&b, N*N, MPI_DOUBLE, source, 1, MPI_COMM_WORLD, &status);
/* Matrix multiplication */
for (k=0; k<N; k++){
for (i=0; i<rows; i++) {
for (j=0; j<N; j++){
c[i][k] = c[i][k] + a[i][j] * b[j][k];
}
}
}
MPI_Send(&offset, 1, MPI_INT, 0, 2, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, 0, 2, MPI_COMM_WORLD);
MPI_Send(&c, rows*N, MPI_DOUBLE, 0, 2, MPI_COMM_WORLD);
}
MPI_Finalize();
return 0;
}
Related
I have written a c program to run on MPI with 4 nodes.
The program takes in an int of N elements. This is then Bcasted to the various nodes in MPI. This int is used to dynamically create arrays of N size on each of the nodes.
I have tried running this program and inputting from 64 to 1 million and this works fine.
When I try inputting 10 million or higher, MPI crashes and occasionally gives the following error:
Fatal error in MPI_Bcast: Other MPI error, error stack:
MPI_Bcast(buf=0x000000000067FD74, count=1, MPI_INT, root=0, MPI_COMM_WORLD) failed
failed to attach to a bootstrap queue - 6664:280
10 million is within the limits of an Integer, so Im not sure why this is occurring.
The code is below:
#include <stdio.h>
#include <stdlib.h>
#include "mpi.h"
#include <time.h>
int main(int argc, char *argv[]){
int process_Rank, size_Of_Cluster;
int number_of_elements;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size_Of_Cluster);
MPI_Comm_rank(MPI_COMM_WORLD, &process_Rank);
if(process_Rank == 0){
printf("Enter the number of elements:\n");
fflush(stdout);
scanf("%d", &number_of_elements);
}
MPI_Bcast(&number_of_elements,1,MPI_INT, 0, MPI_COMM_WORLD);
int *outputs = (int*)malloc(number_of_elements * sizeof(int));
unsigned long long chunk_size = number_of_elements/ size_Of_Cluster;
int my_input[chunk_size], my_output[chunk_size];
for(int i = 0; i < number_of_elements; i++){
outputs[i] = i+1;
}
MPI_Barrier(MPI_COMM_WORLD);
clock_t begin = clock();
MPI_Scatter(outputs, chunk_size, MPI_INT, &my_input, chunk_size, MPI_INT, 0, MPI_COMM_WORLD);
for(int i = 0; i <= chunk_size; i++){
my_output[i] = my_input[i];
}
MPI_Gather(&my_output, chunk_size, MPI_INT, outputs, chunk_size, MPI_INT, 0, MPI_COMM_WORLD);
int iterate_terms[5] = {2,4,8,4,2};
int starting_terms[5] = {1,3,7,3,1};
int subtract_terms[5] = {1,2,4,0,0};
int adding_terms[5] = {0,0,0,2,1};
for(int j = 0; j < 5; j++){
MPI_Scatter(outputs, chunk_size, MPI_INT, &my_input, chunk_size, MPI_INT, 0, MPI_COMM_WORLD);
for(int i = starting_terms[j]; i <= chunk_size; i+= iterate_terms[j]){
my_output[i+adding_terms[j]] += my_input[i-subtract_terms[j]];
}
MPI_Gather(&my_output, chunk_size, MPI_INT, outputs, chunk_size, MPI_INT, 0, MPI_COMM_WORLD);
}
MPI_Barrier(MPI_COMM_WORLD);
if(process_Rank == 0){
for(int i = chunk_size-1; i < number_of_elements; i+=chunk_size){
outputs[i+1] += outputs[i];
outputs[i+2] += outputs[i];
outputs[i+3] += outputs[i];
}
clock_t end = clock();
double time_spent = (double)(end-begin) / CLOCKS_PER_SEC;
for(int i = 0; i < number_of_elements; i++){
printf("%d \n", outputs[i]);
fflush(stdout);
}
printf("took %f", time_spent);
fflush(stdout);
} else {
clock_t end = clock();
}
MPI_Finalize();
return 0;
}
I was trying to calculate elementwise multiplication of matrix elements.
But I've got this error and don't know what to do.
===================================================================================
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= PID 16855 RUNNING AT kevlinsky-PC
= EXIT CODE: 139
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault (signal 11)
This typically refers to a problem with your application.
Please see the FAQ page for debugging suggestions
The task was to split it between processes, calculate the result and return it to the zero process.
Code example:
#include <iostream>
#include <math.h>
#include "mpi.h"
int main(int argc, char *argv[]){
MPI_Init(&argc, &argv);
int rank, size;
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
const int n = 4;
int arrayA[n][n];
int arrayB[n][n];
for (int i = 0; i < n; i++){
for (int j = 0; j < n; i++) {
arrayA[i][j] = (rand() % 1000) - 500;
}
for (int j = 0; j < n; i++) {
arrayB[i][j] = (rand() % 1000) - 500;
}
}
int getbufA[n];
int getbufB[n];
int arrayC[n][n];
int bufC[n];
MPI_Scatter(&arrayA, n, MPI_INT, &getbufA, n, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Scatter(&arrayB, n, MPI_INT, &getbufB, n, MPI_INT, 0, MPI_COMM_WORLD);
for (int i = 0; i < n; i++) {
bufC[i] = getbufA[i] * getbufB[i];
}
MPI_Gather(&bufC, n, MPI_INT, &arrayC, n, MPI_INT, 0, MPI_COMM_WORLD);
if (rank == 0) {
printf("MATRIX C \n");
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
printf("%d ", arrayC[i][j]);
}
printf("\n");
}
}
MPI_Finalize();
}
Can someone help with this?
I think this is your error:
for (int j = 0; j < n; i++) {
arrayA[i][j] = (rand() % 1000) - 500;
}
You need j++ in this loop. And you have this error in two places. j is never incremented and stays 0, and i is incremented indefinitely (because the condition for the loop is based on j), so very soon you go out of bounds for the array, hence the segmentation fault.
I got syntax error in MPI send command. I wanted to send some rows and respective columns with it. I have got error in this line MPI_Send(&(array[ch_row][ch_col]), ch_size*col, MPI_INT, p, 1, MPI_COMM_WORLD) at ch_col. I can't understand why I and getting this error.
int tot_processes;
int process_id;
MPI_Comm_size(MPI_COMM_WORLD, &tot_processes);
MPI_Comm_rank(MPI_COMM_WORLD, &process_id);
if (process_id == 0) {
int row, col;
cout << "Enter rows and columns: ";
cin >> row >> col;
int *array = new int[row*col];
for (int i = 0; i < row; i++) {
for (int j = 0; j < col; j++) {
array[i][j] = 1;
}
}
int ch_size = row / tot_processes;
for (int p = 1; p < tot_processes; p++) {
int ch_row = ch_size * (p - 1);
int ch_col = ch_size * col;
MPI_Send(&ch_size, 1, MPI_INT, p, 0, MPI_COMM_WORLD);
MPI_Send(&(array[ch_row][ch_col]), ch_size*col, MPI_INT, p, 1, MPI_COMM_WORLD);
}
}
I am new to MPI. I wrote a simple code to display a matrix using multiple process. Say if I have a matrix of 8x8 and launching the MPI program with 4 processes, the 1st 2 rows will be printed my 1st process the 2nd set of 2 rows will be printed by 2nd thread so on by dividing itself equally.
#define S 8
MPI_Status status;
int main(int argc, char *argv[])
{
int numtasks, taskid;
int i, j, k = 0;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &taskid);
MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
int rows, offset, remainPart, orginalRows, height, width;
int **a;
// int a[S][S];
if(taskid == 0)
{
cout<<taskid<<endl;
height = width = S;
a = (int **)malloc(height*sizeof(int *));
for(i=0; i<height; i++)
a[i] = (int *)malloc(width*sizeof(int));
for(i=0; i<S; i++)
for(j=0; j<S; j++)
a[i][j] = ++k;
rows = S/numtasks;
offset = rows;
remainPart = S%numtasks;
cout<<"Num Rows : "<<rows<<endl;
for(i=1; i<numtasks; i++)
if(remainPart > 0)
{
orginalRows = rows;
rows++;
remainPart--;
MPI_Send(&offset, 1, MPI_INT, i, 1, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, i, 1, MPI_COMM_WORLD);
MPI_Send(&width, 1, MPI_INT, i, 1, MPI_COMM_WORLD);
MPI_Send(&a[offset][0], rows*S, MPI_INT,i,1, MPI_COMM_WORLD);
offset += rows;
rows = orginalRows;
}
else
{
MPI_Send(&offset, 1, MPI_INT, i, 1, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, i, 1, MPI_COMM_WORLD);
MPI_Send(&width, 1, MPI_INT, i, 1, MPI_COMM_WORLD);
MPI_Send(&a[offset][0], rows*S, MPI_INT,i,1, MPI_COMM_WORLD);
offset += rows;
}
//Processing
rows = S/numtasks;
for(i=0; i<rows; i++)
{
for(j=0; j<width; j++)
cout<<a[i][j]<<"\t";
cout<<endl;
}
}else
{
cout<<taskid<<endl;
MPI_Recv(&offset, 1, MPI_INT, 0, 1, MPI_COMM_WORLD, &status);
MPI_Recv(&rows, 1, MPI_INT, 0, 1, MPI_COMM_WORLD, &status);
MPI_Recv(&width, 1, MPI_INT, 0, 1, MPI_COMM_WORLD, &status);
a = (int **)malloc(rows*sizeof(int *));
for(i=0; i<rows; i++)
a[i] = (int *)malloc(width*sizeof(int));
MPI_Recv(&a, rows*width, MPI_INT, 0, 1, MPI_COMM_WORLD, &status);
cout<<"Offset : "<<offset<<"\nRows : "<<rows<<"\nWidth : "<<width<<endl;
for(i=0; i<rows; i++)
{
for(j=0; j<width; j++)
cout<<a[i][j]<<"\t";
cout<<endl;
}
}
getch();
MPI_Finalize();
return 0;
}
This is my complete code, here I have allocated the memory dynamically for 'a', while printing a[i][j], under the else part, I am getting runtime error. If I change the dynamic memory allocation to static like changing int **a to int a[N][N] and removing
a = (int **)malloc(rows*sizeof(int));
for(i=0; i<rows; i++)
a[i] = (int *)malloc(width*sizeof(int));
it works perfectly.
There are at least two ways to dynamically allocate a 2D array.
The first one is the one of #HRoid : each row is allocated one at a time. Look here for getting an scheme.
The second one is suggested by #Claris, and it will ensure that the data is contiguous in memory. This is required by many MPI operations...it is also required by libraries like FFTW (2D fast fourier transform) or Lapack (dense matrices for linear algebra). Your program may fail at
MPI_Send(&a[offset][0], rows*S, MPI_INT,i,1, MPI_COMM_WORLD);
if S>1, this program will try to send items that are after the end of the line n°offset...That may trigger a segmentation fault or undefined behavior.
You may allocate your array this way :
a = malloc(rows * sizeof(int *));
if(a==NULL){fprintf(stderr,"out of memory...i will fail\n");}
int *t = malloc(rows * width * sizeof(int));
if(t==NULL){fprintf(stderr,"out of memory...i will fail\n");}
for(i = 0; i < rows; ++i)
a[i] = &t[i * width];
Watch out : malloc does not initialize memory to 0 !
It seems that you want to spread a 2D array over many process. Look at MPI_Scatterv() here. Look at this question too.
If you want to know more about 2D arrays and MPI, look here.
You may find a basic example of MPI_Scatterv here.
I changed #define S 8 for #define SQUARE_SIZE 42. It's always better to give descriptive names.
And here is a working code using MPI_Scatterv() !
#include <mpi.h>
#include <iostream>
#include <cstdlib>
using namespace std;
#define SQUARE_SIZE 42
MPI_Status status;
int main(int argc, char *argv[])
{
int numtasks, taskid;
int i, j, k = 0;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &taskid);
MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
int rows, offset, remainPart, orginalRows, height, width;
int **a;
height = width = SQUARE_SIZE;
//on rank 0, let's build a big mat of int
if(taskid == 0){
a=new int*[height];
int *t =new int[height * width];
for(i = 0; i < height; ++i)
a[i] = &t[i * width];
for(i=0; i<height; i++)
for(j=0; j<width; j++)
a[i][j] = ++k;
}
//for everyone, lets compute numbers of rows, numbers of int and displacements for everyone. Only 0 will use these arrays, but it's a practical way to get `rows`
int nbrows[numtasks];
int sendcounts[numtasks];
int displs[numtasks];
displs[0]=0;
for(i=0;i<numtasks;i++){
nbrows[i]=height/numtasks;
if(i<height%numtasks){
nbrows[i]=nbrows[i]+1;
}
sendcounts[i]=nbrows[i]*width;
if(i>0){
displs[i]=displs[i-1]+sendcounts[i-1];
}
}
rows=nbrows[taskid];
//scattering operation.
//The case of the root is particular, since the communication is not to be done...Hence, the flag MPI_IN_PLACE is used.
if(taskid==0){
MPI_Scatterv(&a[0][0],sendcounts,displs,MPI_INT,MPI_IN_PLACE,0,MPI_INT,0,MPI_COMM_WORLD);
}else{
//allocation of memory for the piece of mat on the other nodes.
a=new int*[rows];
int *t =new int[rows * width];
for(i = 0; i < rows; ++i)
a[i] = &t[i * width];
MPI_Scatterv(NULL,sendcounts,displs,MPI_INT,&a[0][0],rows*width,MPI_INT,0,MPI_COMM_WORLD);
}
//printing, one proc at a time
if(taskid>0){
MPI_Status status;
MPI_Recv(NULL,0,MPI_INT,taskid-1,0,MPI_COMM_WORLD,&status);
}
cout<<"rank"<< taskid<<" Rows : "<<rows<<" Width : "<<width<<endl;
for(i=0; i<rows; i++)
{
for(j=0; j<width; j++)
cout<<a[i][j]<<"\t";
cout<<endl;
}
if(taskid<numtasks-1){
MPI_Send(NULL,0,MPI_INT,taskid+1,0,MPI_COMM_WORLD);
}
//freeing the memory !
delete[] a[0];
delete[] a;
MPI_Finalize();
return 0;
}
To compile : mpiCC main.cpp -o main
To run : mpiexec -np 3 main
This code looks awfully suspect.
a = (int **)malloc(rows*sizeof(int));
for(i=0; i<rows; i++)
a[i] = (int *)malloc(width*sizeof(int));
MPI_Recv(&a, rows*width, MPI_INT, 0, 1, MPI_COMM_WORLD, &status);
Your creating an array of int** and allocating correctly but then you don't pass the individual pointers. MPI_Recv expects int* as an argument, right?
Note that when you do a int[][], the memory allocated will be contiguous. When you do malloc, you should expect non-contiguous blocks of memory.
An easy solution may be to just do a = (int**) malloc ( big ), and then index against that large memory allocation.
In my parallel programming book, I came across this code that says the slaves generate the data set, however, I think the master acutally generates the data set.
This line in particular is why I believe that master generates the data set.
for (i=0; i < ARRAY_SIZE; i++)
numbers[i] = i;
Can someone confirm if master or slaves generate the data set?
#include "mpi.h"
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#define TRIALS 20
#define ARRAY_SIZE 1000000
int main(int argc, char *argv[])
{
int myid, numprocs;
double startwtime, endwtime;
int namelen;
int* numbers = new int[ARRAY_SIZE];
int i, j, sum, part_sum;
int s, s0, startIndex, endIndex;
double totalTime;
char processor_name[MPI_MAX_PROCESSOR_NAME];
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&myid);
MPI_Get_processor_name(processor_name,&namelen);
fprintf(stderr,"Process %d on %s\n", myid, processor_name);
fflush(stderr);
for (i=0; i < ARRAY_SIZE; i++)
numbers[i] = i;
if (myid == 0)
{
s = (int) floor(ARRAY_SIZE/numprocs);
s0 = s + ARRAY_SIZE%numprocs;
//printf("s=%d , s0= %d\n", s, s0);
}
MPI_Bcast(&s, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&s0, 1, MPI_INT, 0, MPI_COMM_WORLD);
startIndex = s0 + (myid - 1)*s;
endIndex = startIndex + s;
totalTime = 0;
for (j = 1; j <= TRIALS; j++)
{
if (myid == 0)
{
startwtime = MPI_Wtime();
}
sum = 0;
part_sum = 0;
if (myid == 0) // master
{
// compute sum of master's numbers
for (i = 0; i < s0; i++)
{
part_sum += numbers[i];
}
}
else
{
for (i = startIndex; i < endIndex; i++)
{
part_sum += numbers[i];
}
}
MPI_Reduce(&part_sum, &sum, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
if (myid == 0)
{
double runTime;
endwtime = MPI_Wtime();
runTime = endwtime - startwtime;
printf("Trial %d : Execution time (sec) = %f\n", j, runTime);
printf("Sum = %d \n", sum);
totalTime += runTime;
}
} // end for
if (myid == 0)
printf("Average time for %d trials = %f", TRIALS, totalTime/TRIALS);
MPI_Finalize();
}
Both the master and the slaves generate the entire array. You have to remember that your program runs on all nodes and the part of the code in question doesn't distinguish between master/slave. So the wording of your book isn't wrong, but it could be clarified. :)