MPI_scatter: Invalid buffer pointer - c++

Can anybody tell what am I doing wrong due to which I am getting this error.
Code:
#include<stdio.h>
#include<mpi.h>
void transpose(int ** p, int row, int col)
{
int ** tempVar;
tempVar = (int *)malloc(sizeof(int *)* row);
int i = 0;
for (; i < row; i++)
{
tempVar[i] = (int *)malloc(sizeof (int *)* col);
int j = 0;
while (j < col)
{
tempVar[i][j] = p[j][i];
j++;
}
}
p = tempVar;
}
void main(int argc, char * argv[])
{
int rank, size;
MPI_Init(argc, argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
int d[] = { 1000, 1000, 1000, 1000, 1000, 1000 };
int vt[6] = { 1000, 1000, 1000, 1000, 1000, 1000 };
int ** p;
p = (int *)malloc(sizeof(int *)* 6);
int i = 0;
int row = 6;
int col = 6;
while (i < 6)
{
p[i] = (int *)malloc(sizeof(int *)* 6);
/*int j = 0;
if (rank == 0)
{
while (j < 6)
{
scanf("%d", p[i][j]);
j++;
}
}*/
i++;
}
p[0][0] = 0; p[0][1] =2 ; p[0][2] =3 ; p[0][3] =1 ; p[0][4] =1000 ; p[0][5] =1000 ;
p[1][0] = 2; p[1][1] = 0; p[1][2] = 1000; p[1][3] = 1000; p[1][4] = 5; p[1][5] = 1000;
p[2][0] = 3; p[2][1] = 1000; p[2][2] = 0; p[2][3] = 1000; p[2][4] = 1000; p[2][5] = 1;
p[3][0] = 1; p[3][1] = 1000; p[3][2] = 1000; p[3][3] = 0; p[3][4] = 4; p[3][5] = 3;
p[4][0] = 1000; p[4][1] = 5; p[4][2] = 1000; p[4][3] = 4; p[4][4] = 0; p[4][5] = 2;
p[5][0] = 1000; p[5][1] = 1000; p[5][2] = 1; p[5][3] = 3; p[5][4] = 2; p[5][5] = 0;
int smallest;
if (rank == 0)
{
//transpose(&p , row , col);
smallest = 0;
vt[smallest] = smallest;
//MPI_Bcast();
}
int vt1, d1;
vt1 = d1 = 0;
int roww[6];
MPI_Scatter(vt, 6, MPI_INT, vt1, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Scatter(d, 6, MPI_INT, d1, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Scatter(p, row *row, MPI_INT,roww, 6, MPI_INT, 0, MPI_COMM_WORLD);
i = 0;
while (i < (row*row)/size)
{
MPI_Bcast(smallest, 1, MPI_INT, 0, MPI_COMM_WORLD);
if (vt1 != rank)
{
if (roww[smallest] != 1000)
{
if (d1 > roww[smallest])
d1 = roww[smallest];
}
}
MPI_Gather(d1, 1, MPI_INT, d, row, MPI_INT, 0, MPI_COMM_WORLD);
if (rank == 0)
{
smallest = d[0];
int k = 1;
int index = 0;
while (k < 6)
{
if (d[k] < smallest)
{
smallest = d[k];
index = k;
}
k++;
}
vt[k] = index;
}
MPI_Scatter(vt, 6, MPI_INT, vt1, (row) / size, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Scatter(d, 6, MPI_INT, d1, (row) / size, MPI_INT, 0, MPI_COMM_WORLD);
i++;
}
MPI_Finalize();
}
The error that I am getting is
Fatal Error: fatal error in MPI_Scatter: Invalid buffer pointer, error stack:
MPI_Scatter(760): MPI_Scatter(sbuf=0x0085f7ac , scount , MPI_INT , rbuf =0x0000000 , rcount =1, MPI_INT , root= 0 , MPI_COMM_WORLD) failed

The code you provided compiles with lots of warnings that are not to be ignored such as :
passing argument 2 of ‘MPI_Init’ from incompatible pointer type
Look carefully at the prototype of functions : int* fun(int* b); is likely to fail if you call something like int d;fun(d);. If the function needs a pointer to the data, fun(&d) may work better. This problem occurs many times, as MPI functions are called.
More : the function transpose(int ** p) tries to modify p by doing p= tempVar. As signaled by #WhozCraig, by doing int **p;...;transpose(p,...), a copy of p in the scope of the function transpose() is modified, but not p. Hence, the right prototype of this function is transpose(int ***p,...) and the right way to call it is int** p;...;transpose(&p,...);
Regarding memory allocation : you found a way to allocate 2D array ! But the data is not contiguous in memory since rows are allocated one at a time. If you plan to use MPI functions such as MPI_Scatter(), allocating a contiguous 2D array is the right way to go (more).
Additional advice : call free() at the right time to free the memory and avoid memory leaks. Do not cast the return of malloc()
Here is a piece of code that should compile well with mpicc main.c -o main -Wall. The option -Wall enables all warnings. It seems to run fine, though i did not check if the result is correct.
#include<stdio.h>
#include<mpi.h>
#include<stdlib.h>
void transpose(int *** p, int row, int col)
{
int ** tempVar;
tempVar = malloc(sizeof(int *)* row);
if (tempVar==NULL){printf("malloc failed\n"); exit (1);}
tempVar[0] = malloc(sizeof (int )* col*row);
if (tempVar[0]==NULL){printf("malloc failed\n"); exit (1);}
int i = 0;
for (i=0; i < row; i++)
{
tempVar[i] = &tempVar[0][col*i];
int j = 0;
while (j < col)
{
tempVar[i][j] = (*p)[j][i];
j++;
}
}
free((*p)[0]);
free(*p);
*p = tempVar;
}
int main(int argc, char * argv[])
{
int rank, size;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
int d[] = { 1000, 1000, 1000, 1000, 1000, 1000 };
int vt[6] = { 1000, 1000, 1000, 1000, 1000, 1000 };
int ** p;
int i = 0;
int row = 6;
int col = 6;
p = malloc(sizeof(int *)* row);
if (p==NULL){printf("malloc failed\n"); exit (1);}
p[0] = malloc(sizeof(int )* row*col);
if (p[0]==NULL) {printf("malloc failed\n"); exit (1);}
while (i < row)
{
p[i] = &p[0][i*col];
/*int j = 0;
if (rank == 0)
{
while (j < 6)
{
scanf("%d", p[i][j]);
j++;
}
}*/
i++;
}
p[0][0] = 0; p[0][1] =2 ; p[0][2] =3 ; p[0][3] =1 ; p[0][4] =1000 ; p[0][5] =1000 ;
p[1][0] = 2; p[1][1] = 0; p[1][2] = 1000; p[1][3] = 1000; p[1][4] = 5; p[1][5] = 1000;
p[2][0] = 3; p[2][1] = 1000; p[2][2] = 0; p[2][3] = 1000; p[2][4] = 1000; p[2][5] = 1;
p[3][0] = 1; p[3][1] = 1000; p[3][2] = 1000; p[3][3] = 0; p[3][4] = 4; p[3][5] = 3;
p[4][0] = 1000; p[4][1] = 5; p[4][2] = 1000; p[4][3] = 4; p[4][4] = 0; p[4][5] = 2;
p[5][0] = 1000; p[5][1] = 1000; p[5][2] = 1; p[5][3] = 3; p[5][4] = 2; p[5][5] = 0;
int smallest;
if (rank == 0)
{
//transpose(&p , row , col);
smallest = 0;
vt[smallest] = smallest;
//MPI_Bcast();
}
int vt1, d1;
vt1 = d1 = 0;
int roww[col];
MPI_Scatter(vt, 1, MPI_INT, &vt1, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Scatter(d, 1, MPI_INT, &d1, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Scatter(&p[0][0], col, MPI_INT,roww, col, MPI_INT, 0, MPI_COMM_WORLD);
i = 0;
while (i < (row*row)/size)
{
MPI_Bcast(&smallest, 1, MPI_INT, 0, MPI_COMM_WORLD);
if (vt1 != rank)
{
if (roww[smallest] != 1000)
{
if (d1 > roww[smallest])
d1 = roww[smallest];
}
}
MPI_Gather(&d1, 1, MPI_INT, d, 1, MPI_INT, 0, MPI_COMM_WORLD);
if (rank == 0)
{
smallest = d[0];
int k = 1;
int index = 0;
while (k < 6)
{
if (d[k] < smallest)
{
smallest = d[k];
index = k;
}
k++;
}
vt[k] = index;
}
MPI_Scatter(vt, 1, MPI_INT, &vt1, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Scatter(d, 1, MPI_INT, &d1, 1, MPI_INT, 0, MPI_COMM_WORLD);
i++;
}
free(p[0]);
free(p);
MPI_Finalize();
return 0;
}

Related

Debug Assertion Failed For MPI program for matrix multiplication with some threads

Good day. I have some issues with running MPI program that multiply matrices.
This is code (it is not my code) I get it from http://dkl.cs.arizona.edu/teaching/csc522-fall16/examples/hybrid-openmp-mm.c
I will be very grateful if you help me
Also I was looking for similar problems and solutions, but it didn't solve my problem
#include <omp.h>
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#define TAG 13
int main(int argc, char* argv[]) {
double** A, ** B, ** C, * tmp;
double startTime, endTime;
int numElements, offset, stripSize, myrank, numnodes, N, i, j, k;
int numThreads, chunkSize = 10;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
MPI_Comm_size(MPI_COMM_WORLD, &numnodes);
N = atoi(argv[1]);
numThreads = atoi(argv[2]); // difference from MPI: how many threads/rank?
omp_set_num_threads(numThreads); // OpenMP call to set threads per rank
// allocate A, B, and C --- note that you want these to be
// contiguously allocated. Workers need less memory allocated.
if (myrank == 0) {
tmp = (double*)malloc(sizeof(double) * N * N);
A = (double**)malloc(sizeof(double*) * N);
for (i = 0; i < N; i++)
A[i] = &tmp[i * N];
}
else {
tmp = (double*)malloc(sizeof(double) * N * N / numnodes);
A = (double**)malloc(sizeof(double*) * N / numnodes);
for (i = 0; i < N / numnodes; i++)
A[i] = &tmp[i * N];
}
tmp = (double*)malloc(sizeof(double) * N * N);
B = (double**)malloc(sizeof(double*) * N);
for (i = 0; i < N; i++)
B[i] = &tmp[i * N];
if (myrank == 0) {
tmp = (double*)malloc(sizeof(double) * N * N);
C = (double**)malloc(sizeof(double*) * N);
for (i = 0; i < N; i++)
C[i] = &tmp[i * N];
}
else {
tmp = (double*)malloc(sizeof(double) * N * N / numnodes);
C = (double**)malloc(sizeof(double*) * N / numnodes);
for (i = 0; i < N / numnodes; i++)
C[i] = &tmp[i * N];
}
if (myrank == 0) {
// initialize A and B
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
A[i][j] = 1.0;
B[i][j] = 1.0;
}
}
}
// start timer
if (myrank == 0) {
startTime = MPI_Wtime();
}
stripSize = N / numnodes;
// send each node its piece of A -- note could be done via MPI_Scatter
if (myrank == 0) {
offset = stripSize;
numElements = stripSize * N;
for (i = 1; i < numnodes; i++) {
MPI_Send(A[offset], numElements, MPI_DOUBLE, i, TAG, MPI_COMM_WORLD);
offset += stripSize;
}
}
else { // receive my part of A
MPI_Recv(A[0], stripSize * N, MPI_DOUBLE, 0, TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}
// everyone gets B
MPI_Bcast(B[0], N * N, MPI_DOUBLE, 0, MPI_COMM_WORLD);
// Let each process initialize C to zero
for (i = 0; i < stripSize; i++) {
for (j = 0; j < N; j++) {
C[i][j] = 0.0;
}
}
// do the work---this is the primary difference from the pure MPI program
#pragma omp parallel for shared(A,B,C,numThreads) private(i,j,k) schedule (static, chunkSize)
for (i = 0; i < stripSize; i++) {
for (j = 0; j < N; j++) {
for (k = 0; k < N; k++) {
C[i][j] += A[i][k] * B[k][j];
}
}
}
// master receives from workers -- note could be done via MPI_Gather
if (myrank == 0) {
offset = stripSize;
numElements = stripSize * N;
for (i = 1; i < numnodes; i++) {
MPI_Recv(C[offset], numElements, MPI_DOUBLE, i, TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
offset += stripSize;
}
}
else { // send my contribution to C
MPI_Send(C[0], stripSize * N, MPI_DOUBLE, 0, TAG, MPI_COMM_WORLD);
}
// stop timer
if (myrank == 0) {
endTime = MPI_Wtime();
printf("Time is %f\n", endTime - startTime);
}
// print out matrix here, if I'm the master
if (myrank == 0 && N < 10) {
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
printf("%f ", C[i][j]);
}
printf("\n");
}
}
MPI_Finalize();
return 0;
}
And this is my issue
You are doing a MPI_Bcast on B as if it's a contiguous block of N*N elements. However, it's not: it's an array of pointers to N separate arrays for length N. So either you need to allocate B contiguously, or you need to do N broadcasts.

How to send and receive data in mpi c++

I am a beginner in MPI programing and I am trying to do a matrix-vector multiplication (Ax=b).
let say A matrix is as follows,
|3 2 5|
matrix A= |4 3 1|
|2 4 2|
I divided A matrix into two matrix A1 and A2 as follows
|1 2 3|
matrix A1= |3 2 1|
|1 2 0|
|2 0 2|
matrix A2= |1 1 0|
|1 2 2|
The x vector is,
| 2 |
vector x= | 1 |
| 3 |
I need to calculate Ax=b such a way that the process number 1 does the A1 * x multiplication and gives C1 and process number 2 does the A2 * x multiplication and gives C2 and at the end the sum of C1 and C2 will be wrapped up in C. when I run the code through cmd it stops working and I don't know what is the problem. I would be really grateful if you could help me to find out what is the problem in code,
here is my code,
#define _CRT_SECURE_NO_WARNINGS
#include<iostream>
#include<fstream>
#include<vector>
#include<iterator>
#include<sstream>
#include<string>
#include<cstdlib>
#include<cmath>
#include<stdio.h>
#include<conio.h>
#include<algorithm>
#include<ctime>
#include<iomanip>
#include<mpi.h>
#include<time.h>
#include<assert.h>
using namespace std;
void Initialise(int **res, int rows, int cols);
void Multiply(int **res, int **A, int **B, int aRows, int aCols, int bRows, int bCols);
void timestamp();
//**********************************************************
/* |3 2 5|
matrix A= |4 3 1|
|2 4 2|
matrix A is divided into two matrix A1,A2
|1 2 3|
matrix A1= |3 2 1|
|1 2 0|
|2 0 2|
matrix A2= |1 1 0|
|1 2 2|
| 2 |
vector x= | 1 |
| 3 |
| 23 |
C = | 14 |
| 14 |
//*********************************************************
*/
int main(int argc,char **argv)
{
int id, p;
MPI_Status status;
// p -> no. of processes
// id -> process id
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &id);
MPI_Comm_size(MPI_COMM_WORLD, &p);
cout << p << endl;
char processor_name[MPI_MAX_PROCESSOR_NAME];
int name_len;
MPI_Get_processor_name(processor_name, &name_len);
if (id == 0)// master
{
wtime = MPI_Wtime();
int aRows = 3;
int aCols = 3;
int bRows = 3;
int bCols = 1;
int** A = new int*[aRows];
for (int i = 0; i < aRows; i++)
{
A[i] = new int[aCols];
}
int** A1 = new int*[aRows];
for (int i = 0; i < aRows; i++)
{
A1[i] = new int[aCols];
}
int** A2 = new int*[aRows];
for (int i = 0; i < aRows; i++)
{
A2[i] = new int[aCols];
}
int** B = new int*[bRows];
for (int i = 0; i < bRows; i++)
{
B[i] = new int[bCols];
}
//***************************************
A[0][0] = 3;
A[0][1] = 2;
A[0][2] = 5;
A[1][0] = 4;
A[1][1] = 3;
A[1][2] = 1;
A[2][0] = 2;
A[2][1] = 4;
A[2][2] = 2;
B[0][0] = 2;
B[1][0] = 1;
B[2][0] = 3;
//**************************************
A1[0][0] = 1;
A1[0][1] = 2;
A1[0][2] = 3;
A1[1][0] = 3;
A1[1][1] = 2;
A1[1][2] = 1;
A1[2][0] = 1;
A1[2][1] = 2;
A1[2][2] = 0;
//**************************************
A2[0][0] = 2;
A2[0][1] = 0;
A2[0][2] = 2;
A2[1][0] = 1;
A2[1][1] = 1;
A2[1][2] = 0;
A2[2][0] = 1;
A2[2][1] = 2;
A2[2][2] = 2;
//*************************************
B[0][0] = 2;
B[1][0] = 1;
B[2][0] = 3;
//*************************************
int** C;
C = new int*[aRows];
for (int i = 0; i < aRows; i++)
{
C[i] = new int[bCols];
}
//************************************
int** C1;
C1 = new int*[aRows];
for (int i = 0; i < aRows; i++)
{
C1[i] = new int[bCols];
}
//************************************
int** C2;
C2 = new int*[aRows];
for (int i = 0; i < aRows; i++)
{
C2[i] = new int[bCols];
}
//***********************************
Multiply(C, A, B, aRows, aCols, bRows, bCols);
for (int i = 0; i < aRows; i++)
{
for (int j = 0; j < bCols; j++)
{
std::cout << C[i][j] << ' ';
}
std::cout << '\n';
}
MPI_Send(&aRows, 1, MPI_INT, 1, 1, MPI_COMM_WORLD);
MPI_Send(&aCols, 1, MPI_INT, 1, 2, MPI_COMM_WORLD);
MPI_Send(&bRows, 1, MPI_INT, 1, 3, MPI_COMM_WORLD);
MPI_Send(&bCols, 1, MPI_INT, 1, 4, MPI_COMM_WORLD);
MPI_Send(&aRows, 1, MPI_INT, 2, 5, MPI_COMM_WORLD);
MPI_Send(&aCols, 1, MPI_INT, 2, 6, MPI_COMM_WORLD);
MPI_Send(&bRows, 1, MPI_INT, 2, 7, MPI_COMM_WORLD);
MPI_Send(&bCols, 1, MPI_INT, 2, 8, MPI_COMM_WORLD);
MPI_Send(&A1, aRows*aCols, MPI_INT, 1, 9, MPI_COMM_WORLD);
MPI_Send(&B , bRows*bCols, MPI_INT, 1, 10, MPI_COMM_WORLD);
MPI_Send(&A2, aRows*aCols, MPI_INT, 2, 11, MPI_COMM_WORLD);
MPI_Send(&B, bRows*bCols, MPI_INT, 2, 12, MPI_COMM_WORLD);
}
for (id=1;id<3;id++)
{
if (id == 1)
{
int aRows, aCols, bRows, bCols;
MPI_Recv(&aRows, 1, MPI_INT, 0, 1, MPI_COMM_WORLD, &status);
printf("receive data:%d", aRows);
MPI_Recv(&aCols, 1, MPI_INT, 0, 2, MPI_COMM_WORLD, &status);
printf("receive data:%d", aCols);
MPI_Recv(&bRows, 1, MPI_INT, 0, 3, MPI_COMM_WORLD, &status);
printf("receive data:%d", bRows);
MPI_Recv(&bCols, 1, MPI_INT, 0, 4, MPI_COMM_WORLD, &status);
printf("receive data:%d", bCols);
//int s = status.MPI_SOURCE;
//int t = status.MPI_TAG;
int** A1 = new int*[aRows];
for (int i = 0; i < aRows; i++)
{
A1[i] = new int[aCols];
}
int** B = new int*[bRows];
for (int i = 0; i < bRows; i++)
{
B[i] = new int[bCols];
}
int** C1 = new int*[bRows];
for (int i = 0; i < bRows; i++)
{
C1[i] = new int[bCols];
}
//***********************************************************
MPI_Recv(&A1, aRows*aCols, MPI_INT, 0, 9, MPI_COMM_WORLD, &status);
printf("receive data:%d", A1);
MPI_Recv(&B , aRows*aCols, MPI_INT, 0, 10, MPI_COMM_WORLD, &status);
printf("receive data:%d", B);
Multiply(C1, A1, B, aRows, aCols, bRows, bCols);
for (int i = 0; i < aRows; i++)
{
for (int j = 0; j < bCols; j++)
{
cout << C1[i][j] << endl;
}
}
}
else
{
int aRows, aCols, bRows, bCols;
MPI_Recv(&aRows, 1, MPI_INT, 0, 5, MPI_COMM_WORLD, &status);
MPI_Recv(&aCols, 1, MPI_INT, 0, 6, MPI_COMM_WORLD, &status);
MPI_Recv(&bRows, 1, MPI_INT, 0, 7, MPI_COMM_WORLD, &status);
MPI_Recv(&bCols, 1, MPI_INT, 0, 8, MPI_COMM_WORLD, &status);
int** A2 = new int*[aRows];
for (int i = 0; i < aRows; i++)
{
A2[i] = new int[aCols];
}
int** B = new int*[bRows];
for (int i = 0; i < bRows; i++)
{
B[i] = new int[bCols];
}
int** C2 = new int*[bRows];
for (int i = 0; i < bRows; i++)
{
C2[i] = new int[bCols];
}
MPI_Recv(&A2, aRows*aCols, MPI_INT, 0, 11, MPI_COMM_WORLD, &status);
printf("receive data:%d", A2);
MPI_Recv(&B , aRows*aCols, MPI_INT, 0, 12, MPI_COMM_WORLD, &status);
printf("receive data:%d", B);
//**************************************************************
MPI_Status status;
Multiply(C2, A2, B, aRows, aCols, bRows, bCols);
for (int i = 0; i < aRows; i++)
{
for (int j = 0; j < bCols; j++)
{
cout << C2[i][j] << endl;
}
}
}
//MPI_Recv(&(C1[0][0]), aRows*bCols, MPI_INT, 0, tag, MPI_COMM_WORLD,&status);
}
MPI_Finalize();
return 0;
}
void Multiply(int **res, int **A, int **B, int aRows, int aCols, int bRows, int bCols)
{
if (aCols != bRows)
return;
for (int i = 0; i < aRows; i++)
{
for (int j = 0; j < bCols; j++)
{
res[i][j] = 0;
for (int k = 0; k < aCols; k++)
{
res[i][j] += A[i][k] * B[k][j];
}
}
}
}
void Initialise(int **res, int rows, int cols)
{
for (int i = 0; i < rows; i++)
{
for (int j = 0; j < cols; j++)
{
res[i][j] = 0;
}
}
}

How to evenly distribute an array with Scatterv

My program is meant to take an array size and the elements of that particular array from the user.
However, I want the program to be able to distribute the array elements evenly for any number of processors used.
I think the problem is on the displs array, but even after countless try-outs, I don't seem to be reaching any logical conclusion.
Let's say I enter a sequence of 7 numbers -> 1,2,3,4,5,6,7
I will have an output as such:
processor 0
arr[0] = 1
arr[1] = 2
arr[2] = 3
processor 1
arr[0] = 4
arr[1] = 5
processor 2
arr[0] = 7
arr[1] = 32767
The code is the following:
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#define ARRAY_SIZE 100
int main(int argc, char **argv)
{
int myrank, wsize;
int i,N;
int *arr,*displs, *arr_r, *sendcount;
int sum1=0;
int portion,remainder,x,y;
int root;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
MPI_Comm_size(MPI_COMM_WORLD, &wsize);
if(myrank == 0)
{
printf("Enter number N of integers\n");
scanf("%d", &N);
arr = (int*)malloc(N*sizeof(int));
for(i = 0; i < N; i++)
{
printf("Enter number %d\n", i+1);
scanf("%d",&arr[i]);
}
}
MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD);
portion = N / wsize;
remainder = N % wsize;
x = portion;
y = portion +1;
displs = (int*)malloc(N*sizeof(int));
sendcount = (int*)malloc(N*sizeof(int));
for(i=0; i < N; i++)
{
if(myrank < remainder)
{
sendcount[i] = portion + (remainder);
displs[i] = (portion + (remainder)) * i;
}
else if(remainder == 0)
{
sendcount[i] = portion;
displs[i] = portion *i;
}
else
{
sendcount[i] = portion;
displs[i] = portion * i;
}
}
arr_r = (int*)malloc(N *sizeof(int));
MPI_Scatterv(arr, sendcount, displs, MPI_INT, arr_r, N, MPI_INT, 0, MPI_COMM_WORLD);
if(myrank < remainder)
{
printf("process %d \n",myrank);
for(i = 0; i < portion + 1; i++)
{
printf("Arr[%d] = %d\n",i,arr_r[i]);
}
}
else if(remainder == 0)
{
printf("process %d \n",myrank);
for(i = 0; i < portion; i++)
{
printf("Arr[%d] = %d\n",i,arr_r[i]);
}
}
else
{
printf("process %d \n",myrank);
for(i = 0; i < portion; i++)
{
printf("Arr[%d] = %d\n",i,arr_r[i]);
}
}
MPI_Finalize();
return 0;
}

Expression must have pointer to object type error in MPI send

I got syntax error in MPI send command. I wanted to send some rows and respective columns with it. I have got error in this line MPI_Send(&(array[ch_row][ch_col]), ch_size*col, MPI_INT, p, 1, MPI_COMM_WORLD) at ch_col. I can't understand why I and getting this error.
int tot_processes;
int process_id;
MPI_Comm_size(MPI_COMM_WORLD, &tot_processes);
MPI_Comm_rank(MPI_COMM_WORLD, &process_id);
if (process_id == 0) {
int row, col;
cout << "Enter rows and columns: ";
cin >> row >> col;
int *array = new int[row*col];
for (int i = 0; i < row; i++) {
for (int j = 0; j < col; j++) {
array[i][j] = 1;
}
}
int ch_size = row / tot_processes;
for (int p = 1; p < tot_processes; p++) {
int ch_row = ch_size * (p - 1);
int ch_col = ch_size * col;
MPI_Send(&ch_size, 1, MPI_INT, p, 0, MPI_COMM_WORLD);
MPI_Send(&(array[ch_row][ch_col]), ch_size*col, MPI_INT, p, 1, MPI_COMM_WORLD);
}
}

Parallel processing - does the slaves generate the data set?

In my parallel programming book, I came across this code that says the slaves generate the data set, however, I think the master acutally generates the data set.
This line in particular is why I believe that master generates the data set.
for (i=0; i < ARRAY_SIZE; i++)
numbers[i] = i;
Can someone confirm if master or slaves generate the data set?
#include "mpi.h"
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#define TRIALS 20
#define ARRAY_SIZE 1000000
int main(int argc, char *argv[])
{
int myid, numprocs;
double startwtime, endwtime;
int namelen;
int* numbers = new int[ARRAY_SIZE];
int i, j, sum, part_sum;
int s, s0, startIndex, endIndex;
double totalTime;
char processor_name[MPI_MAX_PROCESSOR_NAME];
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&myid);
MPI_Get_processor_name(processor_name,&namelen);
fprintf(stderr,"Process %d on %s\n", myid, processor_name);
fflush(stderr);
for (i=0; i < ARRAY_SIZE; i++)
numbers[i] = i;
if (myid == 0)
{
s = (int) floor(ARRAY_SIZE/numprocs);
s0 = s + ARRAY_SIZE%numprocs;
//printf("s=%d , s0= %d\n", s, s0);
}
MPI_Bcast(&s, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&s0, 1, MPI_INT, 0, MPI_COMM_WORLD);
startIndex = s0 + (myid - 1)*s;
endIndex = startIndex + s;
totalTime = 0;
for (j = 1; j <= TRIALS; j++)
{
if (myid == 0)
{
startwtime = MPI_Wtime();
}
sum = 0;
part_sum = 0;
if (myid == 0) // master
{
// compute sum of master's numbers
for (i = 0; i < s0; i++)
{
part_sum += numbers[i];
}
}
else
{
for (i = startIndex; i < endIndex; i++)
{
part_sum += numbers[i];
}
}
MPI_Reduce(&part_sum, &sum, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
if (myid == 0)
{
double runTime;
endwtime = MPI_Wtime();
runTime = endwtime - startwtime;
printf("Trial %d : Execution time (sec) = %f\n", j, runTime);
printf("Sum = %d \n", sum);
totalTime += runTime;
}
} // end for
if (myid == 0)
printf("Average time for %d trials = %f", TRIALS, totalTime/TRIALS);
MPI_Finalize();
}
Both the master and the slaves generate the entire array. You have to remember that your program runs on all nodes and the part of the code in question doesn't distinguish between master/slave. So the wording of your book isn't wrong, but it could be clarified. :)