Here is my code I want to multiply a 2D array with a vector array:
#include<iostream>
#include<mpi.h>
using namespace std;
int v_array[10] ;
int ch_size, start, close;
int res ;
int rows, cols;
int main(int argc, char *argv[])
{
int pro_id, tot_pros;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &pro_id);
MPI_Comm_size(MPI_COMM_WORLD, &tot_pros);
if (pro_id == 0) {
cout << "Enter rows and columns: ";
cin >> rows >> cols;
int **array = new int*[rows];
int size1 = rows * cols;
array[0] = new int[size1];
for (int j = 1; j < rows; j++) {
array[j] = &array[0][j*cols];
}
for (int i = 0; i < rows; i++) {
v_array[i] = 1;
for (int j = 0; j < cols; j++) {
array[i][j] = 1;
}
}
for (int i = 1; i < tot_pros; i++) {
ch_size = (rows / (tot_pros - 1));
start = (i - 1) * ch_size;
if (((i + 1) == tot_pros) && ((rows % (tot_pros - 1)) != 0)) {
close = rows;
}
else {
close = start + ch_size;
}
MPI_Send(&start, 1, MPI_INT, i, 1, MPI_COMM_WORLD);
MPI_Send(&close, 1, MPI_INT, i, 2, MPI_COMM_WORLD);
MPI_Send(&cols, 1, MPI_INT, i, 4, MPI_COMM_WORLD);
MPI_Send(&array[start][0], ch_size *cols, MPI_INT, i, 3, MPI_COMM_WORLD);
}
}
else
{
int cols;
MPI_Recv(&start, 1, MPI_INT, 0, 1, MPI_COMM_WORLD, MPI_STATUSES_IGNORE);
MPI_Recv(&close, 1, MPI_INT, 0, 2, MPI_COMM_WORLD, MPI_STATUSES_IGNORE);
MPI_Recv(&cols, 1, MPI_INT, 0, 4, MPI_COMM_WORLD, MPI_STATUSES_IGNORE);
int **array = new int*[(close - start)*cols];
MPI_Recv(array, (close - start) *cols , MPI_INT, 0, 3, MPI_COMM_WORLD, MPI_STATUSES_IGNORE);
for (int i = start; i < close; i++) {
for (int j = 0; j < cols; j++) {
cout << array[i]<<array[j];
res += array[i][j] * v_array[i];
cout << res;
}
}
}
MPI_Finalize();
return 0;
}
This same program is working fine when I have static array, But with dynamic I got this error.
E:\MS(CS)\2nd Semester\Parallel
Programing\programs\arr_multi\Debug>mpiexec -n 4 arr_multi.exe Enter
rows and columns: 3 2
job aborted: [ranks] message
[0-1] terminated
[2] process exited without calling finalize
[3] terminated
---- error analysis -----
[2] on RAMISHA-PC arr_multi.exe ended prematurely and may have
crashed. exit code 0xc0000005
---- error analysis -----
I declared an array with contiguous location and my rows are divided correctly among processes. I think I have problem with my data structure and tried many solutions but in vain.
First and foremost, there are ways to debug an MPI application, which should really be your top priority. A general approach for multi-process applications is to pause at the beginning of your application e.g. with a getchar() then attach to each process with a debugger as described here:
compile, link and start running your MPI program (you may wish to put a read statement early on to hold the program while you do the next steps)
attach to one of the currently running MPI processes: Debug - Attach to Process brings up a dialogue box which lists Available Processes. You should see NUM instances (where N is from mpiexec -n NUM) of your executable. Select all of these and click on Attach. You can now debug by adding breakpoints etc. To move between MPI processes use the Process drop-down menu just above the code listing.
Having said that, at least one of the problems is with this part: int **array = new int*[(close - start)*cols]; (in the receive part of the application). You allocate the first dimension but not the second, so all pointers in the first dimension are uninitialized.
Change it to something like:
int *array = new int[(close - start) * cols];
MPI_Recv(array, (close - start) *cols, MPI_INT, 0, 3, MPI_COMM_WORLD, MPI_STATUSES_IGNORE);
for (int i = start; i < close; i++) {
for (int j = 0; j < cols; j++) {
cout << array[(i - start) * cols];
res += array[(i - start) * cols] * v_array[i];
cout << res;
}
}
delete[] array;
Or if you really want to use a 2D array, copy the initialization code from the sending part:
int rows = close - start;
int **array = new int*[rows];
int size1 = rows * cols;
array[0] = new int[size1];
for (int j = 1; j < rows; j++) {
array[j] = &array[0][j*cols];
}
The second problem is that v_array, being a global is not initialized in your receiver processes. Remember that in MPI each process is an independent program. So you should initialize v_array always, i.e. regardless of pro_id.
for (int i = 0; i < rows; i++) {
v_array[i] = 1;
}
Related
I was trying to calculate elementwise multiplication of matrix elements.
But I've got this error and don't know what to do.
===================================================================================
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= PID 16855 RUNNING AT kevlinsky-PC
= EXIT CODE: 139
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault (signal 11)
This typically refers to a problem with your application.
Please see the FAQ page for debugging suggestions
The task was to split it between processes, calculate the result and return it to the zero process.
Code example:
#include <iostream>
#include <math.h>
#include "mpi.h"
int main(int argc, char *argv[]){
MPI_Init(&argc, &argv);
int rank, size;
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
const int n = 4;
int arrayA[n][n];
int arrayB[n][n];
for (int i = 0; i < n; i++){
for (int j = 0; j < n; i++) {
arrayA[i][j] = (rand() % 1000) - 500;
}
for (int j = 0; j < n; i++) {
arrayB[i][j] = (rand() % 1000) - 500;
}
}
int getbufA[n];
int getbufB[n];
int arrayC[n][n];
int bufC[n];
MPI_Scatter(&arrayA, n, MPI_INT, &getbufA, n, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Scatter(&arrayB, n, MPI_INT, &getbufB, n, MPI_INT, 0, MPI_COMM_WORLD);
for (int i = 0; i < n; i++) {
bufC[i] = getbufA[i] * getbufB[i];
}
MPI_Gather(&bufC, n, MPI_INT, &arrayC, n, MPI_INT, 0, MPI_COMM_WORLD);
if (rank == 0) {
printf("MATRIX C \n");
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
printf("%d ", arrayC[i][j]);
}
printf("\n");
}
}
MPI_Finalize();
}
Can someone help with this?
I think this is your error:
for (int j = 0; j < n; i++) {
arrayA[i][j] = (rand() % 1000) - 500;
}
You need j++ in this loop. And you have this error in two places. j is never incremented and stays 0, and i is incremented indefinitely (because the condition for the loop is based on j), so very soon you go out of bounds for the array, hence the segmentation fault.
I am trying to create three matrices a,b,c where c = a*b with using MPI. Also, I am taking the length of these matrices as N (common for all) as it I have to create a square matrix. However, whenever I enter the value of N in runtime, I am getting a segmentation fault error and if I put I gave the value of N in the program, then it works fine.
I have tried this with scatter and gather as given it in here: matrix multiplication using Mpi_Scatter and Mpi_Gather
. Now I have to di dynamically, so that can check the time consumption done by the program to execute it. Just want to inform that I have done this OpenMP, and which was great, but want to compare which one is really good i.e. OpenMP or MPI.
#include <iostream>
#include <math.h>
#include <sys/time.h>
#include <stdlib.h>
#include <stddef.h>
#include "mpi.h"
int main(int argc, char *argv[])
{
int i, j, k, rank, size, tag = 99, blksz, sum = 0,N=0;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int aa[N],cc[N];
if(rank ==0)
{
std::cout << "input value of N" << '\n';
std::cin >> N;
}
MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD);
int **a = new int*[N];
for (int i = 0; i < N; i++)
a[i] = new int[N];
int **b = new int*[N];
for (int i = 0; i < N; i++)
b[i] = new int[N];
int **c = new int*[N];
for (int i = 0; i < N; i++)
c[i] = new int[N];
if (rank == 0)
{
for (int i = 0; i < N; i++)
{
for (int j = 0; j < N; j++)
{
a[i][j] =rand() % 10;
std::cout << a[i][j];
}
std::cout << '\n';
}
std::cout << '\n';
for (int i = 0; i < N; i++)
{
for (int j = 0; j < N; j++)
{
b[i][j] =rand() % 10;
std::cout << b[i][j];
}
std::cout << '\n';
}
}
MPI_Scatter(a, N*N/size, MPI_INT, aa, N*N/size, MPI_INT,0,MPI_COMM_WORLD);
//broadcast second matrix to all processes
MPI_Bcast(b, N*N, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
//perform vector multiplication by all processes
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
sum = sum + aa[j] * b[j][i]; //MISTAKE_WAS_HERE
}
cc[i] = sum;
sum = 0;
}
MPI_Gather(cc, N*N/size, MPI_INT, c, N*N/size, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
MPI_Finalize();
if (rank == 0) //I_ADDED_THIS
{
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++)
{
std::cout << a[i][j]<< '\n';
}
std::cout << '\n';
}
std::cout << '\n' << '\n';
}
delete *a;
delete *b;
delete *c;
}
The error which I am getting is:
mpirun noticed that process rank 3 with PID 3580 on node localhost exited on signal 11 (Segmentation fault).
I just wanted here that matrix multiplication to be done.
Declaring array like this
int **a = new int*[N];
for (int i = 0; i < N; i++)
a[i] = new int[N];
will not allocate it in contiguous memory location. Replacing above declaration with one of the following will make the application work.
int a[N][N]; // or
int **a=malloc(N*N*sizeof(int));
MPI_Scatter, Gather etc works on arrays with contiguous memory location.
#include <iostream>
#include <math.h>
#include <sys/time.h>
#include <stdlib.h>
#include <stddef.h>
#include "mpi.h"
int main(int argc, char *argv[])
{
int i, j, k, rank, size, tag = 99, blksz, sum = 0,N=0;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if(rank ==0)
{
std::cout << "input value of N" << '\n';
std::cin >> N;
}
MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD);
int size_array=(N*N)/size;
int aa[size_array],cc[size_array]; // Declare arrays here since value of N is 0 otherwise
int a[N][N];
int b[N][N];
int c[N][N];
for (int i = 0; i < N; i++)
c[i] = new int[N];
if (rank == 0)
{
for (int i = 0; i < N; i++)
{
for (int j = 0; j < N; j++)
{
a[i][j] =rand() % 10;
std::cout << a[i][j];
}
std::cout << '\n';
}
std::cout << '\n';
for (int i = 0; i < N; i++)
{
for (int j = 0; j < N; j++)
{
b[i][j] =rand() % 10;
std::cout << b[i][j];
}
std::cout << '\n';
}
}
MPI_Scatter(a, N*N/size, MPI_INT, aa, N*N/size, MPI_INT,0,MPI_COMM_WORLD);
//broadcast second matrix to all processes
MPI_Bcast(b, N*N, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
//perform vector multiplication by all processes
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
sum = sum + aa[j] * b[j][i]; //MISTAKE_WAS_HERE
}
cc[i] = sum;
sum = 0;
}
MPI_Gather(cc, N*N/size, MPI_INT, c, N*N/size, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
MPI_Finalize();
if (rank == 0) //I_ADDED_THIS
{
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++)
{
std::cout << a[i][j]<< '\n';
}
std::cout << '\n';
}
std::cout << '\n' << '\n';
}
}
Also declare the array int aa[N],cc[N]; after the scanf.
I got syntax error in MPI send command. I wanted to send some rows and respective columns with it. I have got error in this line MPI_Send(&(array[ch_row][ch_col]), ch_size*col, MPI_INT, p, 1, MPI_COMM_WORLD) at ch_col. I can't understand why I and getting this error.
int tot_processes;
int process_id;
MPI_Comm_size(MPI_COMM_WORLD, &tot_processes);
MPI_Comm_rank(MPI_COMM_WORLD, &process_id);
if (process_id == 0) {
int row, col;
cout << "Enter rows and columns: ";
cin >> row >> col;
int *array = new int[row*col];
for (int i = 0; i < row; i++) {
for (int j = 0; j < col; j++) {
array[i][j] = 1;
}
}
int ch_size = row / tot_processes;
for (int p = 1; p < tot_processes; p++) {
int ch_row = ch_size * (p - 1);
int ch_col = ch_size * col;
MPI_Send(&ch_size, 1, MPI_INT, p, 0, MPI_COMM_WORLD);
MPI_Send(&(array[ch_row][ch_col]), ch_size*col, MPI_INT, p, 1, MPI_COMM_WORLD);
}
}
I want to scatter a 2D array in other 2D arrays (one for each process) using this specific way of allocating memory.
int (*matrix)[cols] = malloc(sizeof *matrix* rows);
I keep getting this error:
One of the processes started by mpirun has exited with a nonzero exit
code. This typically indicates that the process finished in error.
If your process did not finish in error, be sure to include a "return
0" or "exit(0)" in your C code before exiting the application.
PID 7035 failed on node n0 (127.0.0.1) due to signal 11.
I think the problem is on scatter but I am new to parallel programming so if anyone knows what the issue is please help me.
Thanks in advance.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "mpi.h"
int main(int argc, char** argv) {
int my_rank;
int p;
int root;
int rows = 0;
int cols = 0;
int **matrix;
int i, j;
int local_rows;
int answer = 0;
int broke = 0;
MPI_Init(& argc, & argv);
MPI_Comm_rank(MPI_COMM_WORLD, & my_rank);
MPI_Comm_size(MPI_COMM_WORLD, & p);
if (my_rank == 0) {
do {
printf("Enter Dimensions NxN\n");
scanf("%d", & rows);
scanf("%d", & cols);
if (cols != rows) {
printf("Columns must be the same as rows,enter dimensions again.\n");
}
} while (rows != cols);
int (*matrix)[cols] = malloc(sizeof *matrix* rows);
printf("Fill array %dx%d\n", rows, cols);
for (i = 0; i < rows; i++) {
for (j = 0; j < cols; j++) {
scanf("%d",&matrix[i][j]);
}
}
printf("\n");
for (i = 0; i < rows; i++) {
for (j = 0; j < cols; j++) {
printf("%d ",matrix[i][j]);
}
printf("\n");
}
}
root = 0;
MPI_Bcast(&rows, 1, MPI_INT, root, MPI_COMM_WORLD);
MPI_Bcast(&cols, 1, MPI_INT, root, MPI_COMM_WORLD);
local_rows = rows / p;
int (*local_matrix)[rows] = malloc(sizeof *local_matrix* local_rows);
MPI_Scatter(matrix, local_rows*rows, MPI_INT,local_matrix, local_rows*rows, MPI_INT, 0, MPI_COMM_WORLD);
printf("\nLocal matrix fo the process %d is :\n", my_rank);
for (i = 0; i < local_rows; i++) {
for (j = 0; j < cols; j++) {
printf("%d ", local_matrix[i][j]);
}
printf("\n");
}
if (my_rank==0){
free(matrix);
free(local_matrix);
}
MPI_Finalize();
}
The problem with your code is that you declared two variables with the name matrix:
int **matrix;
and
int (*matrix)[cols] = malloc(sizeof *matrix* rows);
and since the latter was declared inside the if (my_rank == 0) {..} the variable begin used in the scatter MPI_Scatter(matrix, local_rows*rows, MPI_INT,local_matrix, local_rows*rows, MPI_INT, 0, MPI_COMM_WORLD);
is the first one, the not allocated one, and not the one you allocated space for. That is why you are getting the error.
Try this:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "mpi.h"
int main(int argc, char** argv) {
int my_rank;
int p;
int root;
int rows = 0;
int cols = 0;
int i, j;
int local_rows;
int answer = 0;
int broke = 0;
MPI_Init(& argc, & argv);
MPI_Comm_rank(MPI_COMM_WORLD, & my_rank);
MPI_Comm_size(MPI_COMM_WORLD, & p);
int (*matrix)[cols];
if (my_rank == 0) {
do {
printf("Enter Dimensions NxN\n");
scanf("%d", & rows);
scanf("%d", & cols);
if (cols != rows) {
printf("Columns must be the same as rows,enter dimensions again.\n");
}
} while (rows != cols);
matrix = malloc(sizeof *matrix * rows);
printf("Fill array %dx%d\n", rows, cols);
for (i = 0; i < rows; i++) {
for (j = 0; j < cols; j++) {
scanf("%d",&matrix[i][j]);
}
}
printf("\n");
for (i = 0; i < rows; i++) {
for (j = 0; j < cols; j++) {
printf("%d ",matrix[i][j]);
}
printf("\n");
}
}
root = 0;
MPI_Bcast(&rows, 1, MPI_INT, root, MPI_COMM_WORLD);
MPI_Bcast(&cols, 1, MPI_INT, root, MPI_COMM_WORLD);
local_rows = rows / p;
// Changed from the original
int (*local_matrix)[cols] = malloc(sizeof *local_matrix* local_rows);
printf("R = (%d, %d, %d) \n",my_rank, local_rows, cols);
if(my_rank == 0)
{
printf("\n");
for (i = 0; i < rows; i++) {
for (j = 0; j < cols; j++) {
printf("%d ",matrix[i][j]);
}
printf("\n");
}
}
MPI_Scatter(matrix, local_rows*cols, MPI_INT,local_matrix,
local_rows*cols, MPI_INT, 0, MPI_COMM_WORLD);
...
Btw I think you meant:
int (*local_matrix)[cols] = malloc(sizeof *local_matrix* local_rows);
and not
int (*local_matrix)[rows] = malloc(sizeof *local_matrix* local_rows);
Also do not forget to free the "local_matrix" for the slaves also.
I am new to MPI. I wrote a simple code to display a matrix using multiple process. Say if I have a matrix of 8x8 and launching the MPI program with 4 processes, the 1st 2 rows will be printed my 1st process the 2nd set of 2 rows will be printed by 2nd thread so on by dividing itself equally.
#define S 8
MPI_Status status;
int main(int argc, char *argv[])
{
int numtasks, taskid;
int i, j, k = 0;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &taskid);
MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
int rows, offset, remainPart, orginalRows, height, width;
int **a;
// int a[S][S];
if(taskid == 0)
{
cout<<taskid<<endl;
height = width = S;
a = (int **)malloc(height*sizeof(int *));
for(i=0; i<height; i++)
a[i] = (int *)malloc(width*sizeof(int));
for(i=0; i<S; i++)
for(j=0; j<S; j++)
a[i][j] = ++k;
rows = S/numtasks;
offset = rows;
remainPart = S%numtasks;
cout<<"Num Rows : "<<rows<<endl;
for(i=1; i<numtasks; i++)
if(remainPart > 0)
{
orginalRows = rows;
rows++;
remainPart--;
MPI_Send(&offset, 1, MPI_INT, i, 1, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, i, 1, MPI_COMM_WORLD);
MPI_Send(&width, 1, MPI_INT, i, 1, MPI_COMM_WORLD);
MPI_Send(&a[offset][0], rows*S, MPI_INT,i,1, MPI_COMM_WORLD);
offset += rows;
rows = orginalRows;
}
else
{
MPI_Send(&offset, 1, MPI_INT, i, 1, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, i, 1, MPI_COMM_WORLD);
MPI_Send(&width, 1, MPI_INT, i, 1, MPI_COMM_WORLD);
MPI_Send(&a[offset][0], rows*S, MPI_INT,i,1, MPI_COMM_WORLD);
offset += rows;
}
//Processing
rows = S/numtasks;
for(i=0; i<rows; i++)
{
for(j=0; j<width; j++)
cout<<a[i][j]<<"\t";
cout<<endl;
}
}else
{
cout<<taskid<<endl;
MPI_Recv(&offset, 1, MPI_INT, 0, 1, MPI_COMM_WORLD, &status);
MPI_Recv(&rows, 1, MPI_INT, 0, 1, MPI_COMM_WORLD, &status);
MPI_Recv(&width, 1, MPI_INT, 0, 1, MPI_COMM_WORLD, &status);
a = (int **)malloc(rows*sizeof(int *));
for(i=0; i<rows; i++)
a[i] = (int *)malloc(width*sizeof(int));
MPI_Recv(&a, rows*width, MPI_INT, 0, 1, MPI_COMM_WORLD, &status);
cout<<"Offset : "<<offset<<"\nRows : "<<rows<<"\nWidth : "<<width<<endl;
for(i=0; i<rows; i++)
{
for(j=0; j<width; j++)
cout<<a[i][j]<<"\t";
cout<<endl;
}
}
getch();
MPI_Finalize();
return 0;
}
This is my complete code, here I have allocated the memory dynamically for 'a', while printing a[i][j], under the else part, I am getting runtime error. If I change the dynamic memory allocation to static like changing int **a to int a[N][N] and removing
a = (int **)malloc(rows*sizeof(int));
for(i=0; i<rows; i++)
a[i] = (int *)malloc(width*sizeof(int));
it works perfectly.
There are at least two ways to dynamically allocate a 2D array.
The first one is the one of #HRoid : each row is allocated one at a time. Look here for getting an scheme.
The second one is suggested by #Claris, and it will ensure that the data is contiguous in memory. This is required by many MPI operations...it is also required by libraries like FFTW (2D fast fourier transform) or Lapack (dense matrices for linear algebra). Your program may fail at
MPI_Send(&a[offset][0], rows*S, MPI_INT,i,1, MPI_COMM_WORLD);
if S>1, this program will try to send items that are after the end of the line n°offset...That may trigger a segmentation fault or undefined behavior.
You may allocate your array this way :
a = malloc(rows * sizeof(int *));
if(a==NULL){fprintf(stderr,"out of memory...i will fail\n");}
int *t = malloc(rows * width * sizeof(int));
if(t==NULL){fprintf(stderr,"out of memory...i will fail\n");}
for(i = 0; i < rows; ++i)
a[i] = &t[i * width];
Watch out : malloc does not initialize memory to 0 !
It seems that you want to spread a 2D array over many process. Look at MPI_Scatterv() here. Look at this question too.
If you want to know more about 2D arrays and MPI, look here.
You may find a basic example of MPI_Scatterv here.
I changed #define S 8 for #define SQUARE_SIZE 42. It's always better to give descriptive names.
And here is a working code using MPI_Scatterv() !
#include <mpi.h>
#include <iostream>
#include <cstdlib>
using namespace std;
#define SQUARE_SIZE 42
MPI_Status status;
int main(int argc, char *argv[])
{
int numtasks, taskid;
int i, j, k = 0;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &taskid);
MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
int rows, offset, remainPart, orginalRows, height, width;
int **a;
height = width = SQUARE_SIZE;
//on rank 0, let's build a big mat of int
if(taskid == 0){
a=new int*[height];
int *t =new int[height * width];
for(i = 0; i < height; ++i)
a[i] = &t[i * width];
for(i=0; i<height; i++)
for(j=0; j<width; j++)
a[i][j] = ++k;
}
//for everyone, lets compute numbers of rows, numbers of int and displacements for everyone. Only 0 will use these arrays, but it's a practical way to get `rows`
int nbrows[numtasks];
int sendcounts[numtasks];
int displs[numtasks];
displs[0]=0;
for(i=0;i<numtasks;i++){
nbrows[i]=height/numtasks;
if(i<height%numtasks){
nbrows[i]=nbrows[i]+1;
}
sendcounts[i]=nbrows[i]*width;
if(i>0){
displs[i]=displs[i-1]+sendcounts[i-1];
}
}
rows=nbrows[taskid];
//scattering operation.
//The case of the root is particular, since the communication is not to be done...Hence, the flag MPI_IN_PLACE is used.
if(taskid==0){
MPI_Scatterv(&a[0][0],sendcounts,displs,MPI_INT,MPI_IN_PLACE,0,MPI_INT,0,MPI_COMM_WORLD);
}else{
//allocation of memory for the piece of mat on the other nodes.
a=new int*[rows];
int *t =new int[rows * width];
for(i = 0; i < rows; ++i)
a[i] = &t[i * width];
MPI_Scatterv(NULL,sendcounts,displs,MPI_INT,&a[0][0],rows*width,MPI_INT,0,MPI_COMM_WORLD);
}
//printing, one proc at a time
if(taskid>0){
MPI_Status status;
MPI_Recv(NULL,0,MPI_INT,taskid-1,0,MPI_COMM_WORLD,&status);
}
cout<<"rank"<< taskid<<" Rows : "<<rows<<" Width : "<<width<<endl;
for(i=0; i<rows; i++)
{
for(j=0; j<width; j++)
cout<<a[i][j]<<"\t";
cout<<endl;
}
if(taskid<numtasks-1){
MPI_Send(NULL,0,MPI_INT,taskid+1,0,MPI_COMM_WORLD);
}
//freeing the memory !
delete[] a[0];
delete[] a;
MPI_Finalize();
return 0;
}
To compile : mpiCC main.cpp -o main
To run : mpiexec -np 3 main
This code looks awfully suspect.
a = (int **)malloc(rows*sizeof(int));
for(i=0; i<rows; i++)
a[i] = (int *)malloc(width*sizeof(int));
MPI_Recv(&a, rows*width, MPI_INT, 0, 1, MPI_COMM_WORLD, &status);
Your creating an array of int** and allocating correctly but then you don't pass the individual pointers. MPI_Recv expects int* as an argument, right?
Note that when you do a int[][], the memory allocated will be contiguous. When you do malloc, you should expect non-contiguous blocks of memory.
An easy solution may be to just do a = (int**) malloc ( big ), and then index against that large memory allocation.