Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 8 years ago.
Improve this question
i already to gatherv it, still can not work. i got hang when input it... i already try and try again take the MPI_gatherv, every where in this code. i don't get the right code. this is the MPI_gatherv i put the last.
#include<iostream>
#include<mpi.h>
#include<cmath>
#include<opencv2/imgproc/imgproc.hpp>
#include<opencv2/highgui/highgui.hpp>
using namespace std;
using namespace cv;
int xGradient(Mat image, int x, int y)
{
return ((int)(image.at<uchar>(y-1, x-1))) +
2*image.at<uchar>(y, x-1) +
image.at<uchar>(y+1, x-1) -
image.at<uchar>(y-1, x+1) -
2*image.at<uchar>(y, x+1) -
image.at<uchar>(y+1, x+1);
}
int yGradient(Mat image, int x, int y)
{
return ((int)(image.at<uchar>(y-1, x-1))) +
2*image.at<uchar>(y-1, x) +
image.at<uchar>(y-1, x+1) -
image.at<uchar>(y+1, x-1) -
2*image.at<uchar>(y+1, x) -
image.at<uchar>(y+1, x+1);
}
int main()
{
Mat src, grey, dst;
Mat grey2;
double start, end;
int gx, gy, sum, argc, awal,akhir, size, rank;
int i;
int recvcounts[4] = { 0, 1, 2, 3 };
int displ[4] = { 0, 0, 1, 3 };
int buffer[6];
size_t total;
size_t elemsize;
int sizes[3];
int master=0;
char **argv;
awal= MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if( rank == master )
{
//start=MPI_Wtime();
src= imread("E:/sobel/Debug/jari.jpg");
cvtColor(src,grey,CV_BGR2GRAY);
src.release();
dst = grey.clone();
total=grey.total();
sizes[2]=grey.elemSize();
cv::Size s = grey.size();
sizes[0] = s.height;
sizes[1] = s.width;
cout<<"citra terdiri dari "<<total<<" elements dengan ukuran yaitu "<<sizes[0]<<" x "<<sizes[1]<<endl;
if( !grey.data )
{ return -1; }
//start=MPI_Wtime();
}
//if( rank == master )
start=MPI_Wtime();
MPI_Bcast( sizes, 3, MPI_INT, 0, MPI_COMM_WORLD);
// cout<<"rank "<<rank<<" : "<<sizes[0]<<" x "<<sizes[1]<<endl;
if(rank!=master){
grey.create(sizes[0],sizes[1],CV_8U);
}
MPI_Bcast( grey.data, sizes[0]*sizes[1], MPI_CHAR, 0, MPI_COMM_WORLD);
grey2.create(sizes[0],sizes[1],CV_8U);
int starty=(rank*grey.rows/size);
if(starty==0)
{starty=1;}
int stopy=((rank+1)*grey.rows/size);
if(stopy>grey.rows - 1)
{stopy=grey.rows - 1;}
for(int y = starty; y < stopy; y++)
{
for(int x = 1; x < grey.cols - 1; x++)
{
gx = xGradient(grey, x, y);
gy = yGradient(grey, x, y);
sum = abs(gx) + abs(gy);
sum = sum > 255 ? 255:sum;
sum = sum < 0 ? 0 : sum;
grey2.at<uchar>(y,x) = sum;
}
}
for (i=0; i<rank; i++)
{
buffer[i] = rank;
}
recvcounts[i]=grey.cols*(grey.rows/size);
displ[i+1]=displ[i]+recvcounts[i];
MPI_Gatherv(buffer, rank, MPI_INT,buffer, recvcounts, displ, MPI_INT, 0, MPI_COMM_WORLD);
if (rank == 0)
{
for (i=0; i<6; i++)
fflush(stdout);
}
grey.release();
imwrite("E:/sobel/Debug/deteksi tepi mpi.jpg", grey2);
//grey2.release();
end=MPI_Wtime();
cout<<"rank "<<rank<<" : waktu eksekusi sobel MPI adalah : "<< end-start << " detik " <<endl;
akhir=MPI_Finalize();
//waitKey();
return 0;
}
i got wrong at [i] (red underline)
int recvcounts[i]=grey.cols*(grey.rows/size);
int displ[i+1]=displ[i]+recvcounts[i];
what should i do again? please help me to repair it. i got hang when execute the code with 4 processus
In MPI, sending the pointer to an object is not enougth. Unlike threads or openmp, the default behavior is parrallel. If you write imwrite(name, grey2 );,the image grey2 will be written size times. If you send pointer grey from 0 to 1, the grey pointer on proc 1 will point a memory owned by proc 0. This will probably create failures.
MPI offers you many ways to communicate belong MPI_Send() and MPI_Receive(). For instance, MPI_Bcast() is suitable to send the image from proc 0 to all procs. http://www.mcs.anl.gov/research/projects/mpi/www/www3/MPI_Bcast.html
I changed your code to use MPI_Bcast() by sending the size of the image first and then the data.
#include<iostream>
#include<mpi.h>
#include<cmath>
#include<opencv2/imgproc/imgproc.hpp>
#include<opencv2/highgui/highgui.hpp>
using namespace std;
using namespace cv;
//int mod(int z, int l);
int xGradient(Mat image, int x, int y)
{
return ((int)(image.at<uchar>(y-1, x-1))) +
2*image.at<uchar>(y, x-1) +
image.at<uchar>(y+1, x-1) -
image.at<uchar>(y-1, x+1) -
2*image.at<uchar>(y, x+1) -
image.at<uchar>(y+1, x+1);
}
int yGradient(Mat image, int x, int y)
{
return ((int)(image.at<uchar>(y-1, x-1))) +
2*image.at<uchar>(y-1, x) +
image.at<uchar>(y-1, x+1) -
image.at<uchar>(y+1, x-1) -
2*image.at<uchar>(y+1, x) -
image.at<uchar>(y+1, x+1);
}
int main()
{
Mat src, grey, dst;
Mat grey2;
double start, end;
int gx, gy, sum, argc, awal,akhir, size, rank;
int master=0;
char **argv;
// MPI_Status status;
awal= MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
// start=MPI_Wtime();
cout<<"rank "<<rank<<endl;
size_t total;
size_t elemsize;
int sizes[3];
if( rank == master )
{
start=MPI_Wtime();
src= imread("jari1.jpg");
cvtColor(src,grey,CV_BGR2GRAY);
src.release();
//dst = grey.clone();
imwrite("jari2.jpg", grey );
cout<<"ok here"<<endl;
if(!grey.isContinuous()){
cout<<"trouble : data is not continuous"<<endl;
}
total=grey.total();
sizes[2]=grey.elemSize();
cv::Size s = grey.size();
sizes[0] = s.height;
sizes[1] = s.width;
cout<<"grey is made of "<<total<<" elements of size "<<sizes[2]<<" that is "<<sizes[0]<<" by "<<sizes[1]<<endl;
if( !grey.data )
{
return -1;
}
// MPI_Send(&grey, 1, MPI_LONG, 1, 1, MPI_COMM_WORLD);
cout<<"master mengirim data ke rank 1"<<endl;
//fflush (stdout);
}
/*else if (rank==1)
{
MPI_Recv(&grey, 1, MPI_LONG, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
cout<<"rank 1 menerima data"<<endl;
}*/
MPI_Bcast( sizes, 3, MPI_INT, 0, MPI_COMM_WORLD);
cout<<rank<<" : "<<sizes[0]<<" "<<sizes[1]<<endl;
if(rank!=master){
grey.create(sizes[0],sizes[1],CV_8U);
if(!grey.data){
cout<<"data not allocated, rank "<<rank<<endl;
}else{
cout<<" ok !"<<endl;
}
}
MPI_Bcast( grey.data, sizes[0]*sizes[1], MPI_CHAR, 0, MPI_COMM_WORLD);
//for output
grey2.create(sizes[0],sizes[1],CV_8U);
char name[100];
sprintf(name,"jari%d.jpg",rank+42+size);
imwrite(name, grey );
/*
for(int y = 0; y < grey.rows; y++)
for(int x = 0; x < grey.cols; x++)
grey.at<uchar>(y,x) = 0;
*/
int starty=(rank*grey.rows/size);
if(starty==0)
{starty=1;}
int stopy=((rank+1)*grey.rows/size);
if(stopy>grey.rows - 1)
{stopy=grey.rows - 1;}
for(int y = starty; y < stopy; y++)
{
for(int x = 1; x < grey.cols - 1; x++)
{
gx = xGradient(grey, x, y);
gy = yGradient(grey, x, y);
sum = abs(gx) + abs(gy);
//cout<<sum<<endl;
sum = sum > 255 ? 255:sum;
sum = sum < 0 ? 0 : sum;
grey2.at<uchar>(y,x) = sum;
//cout<<sum<<endl;
}
}
grey.release();
//namedWindow("deteksi tepi sobel");
//imshow("deteksi tepi sobel", dst);
//namedWindow("grayscale");
//imshow("grayscale", grey);
//namedWindow("Original");
//imshow("Original", src);
sprintf(name,"jari%d.jpg",rank+42);
imwrite(name, grey2 );
grey2.release();
//MPI_Barrier(MPI_COMM_WORLD);
end=MPI_Wtime();
cout<<"time: "<< end-start << " detik " <<endl;
akhir=MPI_Finalize();
//waitKey();
return 0;
}
To retreive the data on proc 0, the MPI_Gatherv() function seems useful. http://www.mcs.anl.gov/research/projects/mpi/www/www3/MPI_Gatherv.html or http://mpi.deino.net/mpi_functions/MPI_Gatherv.html I let you go on with your code. You may need an extended look at a tutorials and examples...
Edit :
I largely changed the code and i wish this piece of code will end your quest...
I changed my mind and used MPI_Scatterv() to send a little part of the image on each proc. I also changed the computation of the gradient... And then i retreive the image on one proc using MPI_Gatherv() In the end, the overall speed up is low, because most part of it is spend opening and writting files. Moreover, such filters (and this code in particular...) need a large memory bandwith.
I fear that you did not fully understood how this first piece of code works. But this one is far from being clear...I had trouble with indexes...
#include<iostream>
#include<mpi.h>
#include<cmath>
#include<opencv2/imgproc/imgproc.hpp>
#include<opencv2/highgui/highgui.hpp>
using namespace std;
using namespace cv;
//int mod(int z, int l);
static inline int xGradient(Mat image, int x, int y)
{
return ((int)(image.at<uchar>(y-1, x-1))) +
2*image.at<uchar>(y, x-1) +
image.at<uchar>(y+1, x-1) -
image.at<uchar>(y-1, x+1) -
2*image.at<uchar>(y, x+1) -
image.at<uchar>(y+1, x+1);
}
static inline int yGradient(Mat image, int x, int y)
{
return ((int)(image.at<uchar>(y-1, x-1))) +
2*image.at<uchar>(y-1, x) +
image.at<uchar>(y-1, x+1) -
image.at<uchar>(y+1, x-1) -
2*image.at<uchar>(y+1, x) -
image.at<uchar>(y+1, x+1);
}
static inline int xGradientd(uchar* pt, int cols)
{
return ((int)(pt[-cols+1])+2*pt[1]+pt[cols+1]-pt[-cols-1]-2*pt[-1]-pt[cols-1]);
}
static inline int yGradientd(uchar* pt, int cols )
{
return ((int)(pt[cols-1])+2*pt[cols]+pt[cols+1]-pt[-cols-1]-2*pt[-cols]-pt[-cols+1]);
}
int main()
{
Mat src, grey, dst;
Mat grey2;
Mat grey3;
double start, end;
int gx, gy, sum, argc, awal,akhir, size, rank;
char **argv;
// MPI_Status status;
awal= MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
// start=MPI_Wtime();
cout<<"rank "<<rank<<endl;
size_t total;
size_t elemsize;
int sizes[3];
if( rank == 0)
{
start=MPI_Wtime();
src= imread("jari1.jpg");
cvtColor(src,grey,CV_BGR2GRAY);
src.release();
//dst = grey.clone();
imwrite("jari2.jpg", grey );
cout<<"ok here"<<endl;
if(!grey.isContinuous()){
cout<<"trouble : data is not continuous"<<endl;
}
total=grey.total();
sizes[2]=grey.elemSize();
cv::Size s = grey.size();
sizes[0] = s.height;
sizes[1] = s.width;
cout<<"grey is made of "<<total<<" elements of size "<<sizes[2]<<" that is "<<sizes[0]<<" by "<<sizes[1]<<endl;
if( !grey.data )
{
return -1;
}
// MPI_Send(&grey, 1, MPI_LONG, 1, 1, MPI_COMM_WORLD);
cout<<"master mengirim data ke rank 1"<<endl;
//fflush (stdout);
}
//start of parallel part. To this point, only proc 0 was working.
if( rank == 0 )
{
start=MPI_Wtime();
}
//the sizes of the image grey are send to all processus.
MPI_Bcast( sizes, 3, MPI_INT, 0, MPI_COMM_WORLD);
//cout<<rank<<" : "<<sizes[0]<<" "<<sizes[1]<<endl;
int recvcount[size];
int displ[size];
int i;
//compute size of local image
//on each proc, a little slice of the image will be received from proc 0 through MPI_Scatterv
//to compute the gradient, two extra lines should be send on top and bottom of slice.(except for 0 and sizes-1)
//this is why there are so many tests.
//how many pixels on the slice ? sendcount.
int sendcount=sizes[1]*(sizes[0]/size)+2*sizes[1];
if(rank==size-1){
sendcount=sizes[1]*(sizes[0]-(size-1)*(sizes[0]/size))+sizes[1];
}
if(rank==0){
sendcount-=sizes[1];
}
//printf("creating image %d %d \n",sendcount/sizes[1],sizes[1]);
//image allocation :
grey3.create(sendcount/sizes[1],sizes[1],CV_8U);
if(!grey3.data){
cout<<"data not allocated, rank "<<rank<<endl;
}else{
//cout<<" ok !"<<endl;
}
//compute sizes and offsets on proc 0
//how many char should be sent from proc 0 to proc i ? recvcount[i].
//where does the data starts ? displ[i].
//these information are needed by MPI_Scatterv() on proc 0
if(rank==0){
displ[0]=0;
for(i=0;i<size;i++){
recvcount[i]=grey.cols*(grey.rows/size)+grey.cols;
if(i>0){
recvcount[i]+=grey.cols;
}
if(i>0){
displ[i]=recvcount[i-1]+displ[i-1]-2*grey.cols;
}
}
recvcount[size-1]=grey.cols*(grey.rows-(size-1)*(grey.rows/size));
if(size>1){
recvcount[size-1]+=grey.cols;
}
if(size-1>0){
displ[size-1]=grey.cols*(grey.rows)-recvcount[size-1];
}
}
/*
if(rank==master){
for(i=0;i<size;i++){
printf("count %d displ %d \n",recvcount[i],displ[i]);
}
}
*/
MPI_Scatterv( grey.data, recvcount, displ, MPI_CHAR, grey3.data, sendcount,MPI_CHAR,0, MPI_COMM_WORLD);
/*
char name[100];
sprintf(name,"jariscat%d.jpg",rank);
imwrite(name, grey3 );
*/
//MPI_Bcast( grey.data, sizes[0]*sizes[1], MPI_CHAR, 0, MPI_COMM_WORLD);
//for output
//this local slice will store the result of the gradient operation
grey2.create(sendcount/sizes[1],sizes[1],CV_8U);
/*
for(int y = 0; y < grey.rows; y++)
for(int x = 0; x < grey.cols; x++)
grey.at<uchar>(y,x) = 0;
*/
int starty=(rank*sizes[0]/size);
if(starty==0)
{starty=1;}
int stopy=((rank+1)*sizes[0]/size);
if(stopy>sizes[0] - 1)
{stopy=sizes[0] - 1;}
int ii=grey3.cols;
uchar* data=grey3.data;
uchar* datad=grey2.data;
for(int y = starty; y < stopy; y++)
{
ii++;
for(int x = 1; x < sizes[1] - 1; x++)
{
//gx = xGradient(grey, x, y);
gx=xGradientd(&data[ii],grey2.cols);
gy=yGradientd(&data[ii],grey2.cols);
//gy = yGradient(grey, x, y);
//printf("%d %d \n",gx,gy);
sum = abs(gx) + abs(gy);
//cout<<sum<<endl;
sum = sum > 255 ? 255:sum;
sum = sum < 0 ? 0 : sum;
datad[ii] = sum;
//cout<<sum<<endl;
ii++;
}
ii++;
}
//namedWindow("deteksi tepi sobel");
//imshow("deteksi tepi sobel", dst);
//namedWindow("grayscale");
//imshow("grayscale", grey);
//namedWindow("Original");
//imshow("Original", src);
/*
sprintf(name,"jarigrad%d.jpg",rank);
imwrite(name, grey2 );
*/
// now, the data in grey2 should be sent from every processor in image grey on proc 0
//MPI_Gatherv will be used.
//on proc 0, count of bytes to be received from each processor should be computed
// as well as displacements representing where each part should be placed in image grey
if(rank==0){
displ[0]=0;
for(i=0;i<size;i++){
recvcount[i]=grey.cols*(grey.rows/size);
if(i>0){
displ[i]=recvcount[i-1]+displ[i-1];
}
}
recvcount[size-1]=grey.cols*(grey.rows-(size-1)*(grey.rows/size));
if(size-1>0){
displ[size-1]=recvcount[size-2]+displ[size-2];
}
}
//on each processor, how many lines should be sent ? sendcount.
//where does the data in grey2 starts ? tosend.
sendcount=sizes[1]*(sizes[0]/size);
if(rank==size-1){
sendcount=sizes[1]*(sizes[0]-(size-1)*(sizes[0]/size));
}
uchar* tosend=&grey2.data[grey2.cols];
if(rank==0){
tosend=&grey2.data[0];
}
MPI_Gatherv(tosend,sendcount , MPI_CHAR,grey.data, recvcount, displ,MPI_CHAR, 0, MPI_COMM_WORLD);
grey2.release();
//everything is back on proc 0 in image grey
end=MPI_Wtime();
if(rank==0){
imwrite("output.jpg", grey );
cout<<"time: "<< end-start << " detik " <<endl;
grey.release();
}
akhir=MPI_Finalize();
//waitKey();
return 0;
}
Bye,
Francis
Related
Making Mandelbrot with MPI
So I've made a Mandelbrot generator and everything worked fine. Now I'm throwing in a speedup from MPI. Process 0 generates a file name mbrot.ppm and adds the appropriate metadata, then divides up the workload into chunks.
Each process receives the chunk's starting and ending positions and gets to work calculating its portion of the Mandelbrot set. To write to the mbrot.ppm file, each process saves its data in an array so it doesn't write to the file before the previous process finishes.
My Problem
Its a runtime error that says:
Primary job terminated normally, but 1 process returned
a non-zero exit code. Per user-direction, the job has been aborted.
--------------------------------------------------------------------------
--------------------------------------------------------------------------
mpirun noticed that process rank 0 with PID 0 on node Lenovo exited on signal 11 (Segmentation fault).
I believe it comes from the line int data[3][xrange][yrange]; (line 120) since the print statement after this line never executes. Would there be an obvious reason I'm missing why this multi-dimensional array is causing me problems?
Full Code
#include <iostream>
#include <mpi.h>
#include <unistd.h>
#include <stdlib.h>
#include <math.h>
#include <fstream>
#define MCW MPI_COMM_WORLD
using namespace std;
struct Complex {
double r;
double i;
};
Complex operator + (Complex s, Complex t) {
Complex v;
v.r = s.r + t.r;
v.i = s.i + t.i;
return v;
};
Complex operator * (Complex s, Complex t) {
Complex v;
v.r = s.r * t.r - s.i * t.i;
v.i = s.r * t.i + s.i * t.r;
return v;
};
int rcolor(int iters) {
if (iters == 255) return 0;
return 32 * (iters % 8);
};
int gcolor(int iters) {
if (iters == 255) return 0;
return 32 * (iters % 8);
};
int bcolor(int iters) {
if (iters == 255) return 0;
return 32 * (iters % 8);
};
int mbrot(Complex c, int maxIters) {
int i = 0;
Complex z;
z = c;
while (i < maxIters && z.r * z.r + z.i * z.i < 4) {
z = z * z + c;
i++;
}
return i;
};
int main(int argc, char * argv[]) {
int rank, size;
MPI_Init( & argc, & argv);
MPI_Comm_rank(MCW, & rank);
MPI_Comm_size(MCW, & size);
if (size < 2) {
printf("Not an MPI process if only 1 process runs.\n");
exit(1);
}
if (size % 2 != 0) {
printf("Please use a even number\n");
exit(1);
}
Complex c1, c2, c;
char path[] = "brot.ppm";
int DIM;
int chunk[4];
c1.r = -1;
c1.i = -1;
c2.r = 1;
c2.i = 1;
if (rank == 0) { //start the file
ofstream fout;
fout.open(path);
DIM = 2000; // pixel dimensions
fout << "P3" << endl; // The file type .ppm
fout << DIM << " " << DIM << endl; // dimensions of the image
fout << "255" << endl; // color depth
fout.close();
// making dimesions marks
for (int i = 0; i < size; i++) {
chunk[0] = 0; // startX
chunk[1] = DIM; // endX
chunk[2] = (DIM / size) * i; // startY
chunk[3] = (DIM / size) * (i + 1); // endY
MPI_Send(chunk, 4, MPI_INT, i, 0, MCW);
};
};
MPI_Recv(chunk, 4, MPI_INT, 0, 0, MCW, MPI_STATUS_IGNORE);
printf("Process %d recieved chunk\n\t StartX: %d, EndX: %d\n\t StartY: %d, EndY: %d\n", rank, chunk[0], chunk[1], chunk[2], chunk[3]);
// do stuff save in array
// data[3 elements][Xs][Ys]
int xrange = chunk[1] - chunk[0];
int yrange = chunk[3] - chunk[2];
printf("Process %d, x: %d, y: %d\n", rank, xrange, yrange);
int data[3][xrange][yrange];
printf("done\n");
// generate data for mandlebrot
for (int j = chunk[2]; j < chunk[3]; ++j) {
for (int i = chunk[0]; i < chunk[1]; ++i) {
// calculate one pixel of the DIM x DIM image
c.r = (i * (c1.r - c2.r) / DIM) + c2.r;
c.i = (j * (c1.i - c2.i) / DIM) + c2.i;
int iters = mbrot(c, 255);
data[0][i][j] = rcolor(iters);
data[1][i][j] = gcolor(iters);
data[2][i][j] = bcolor(iters);
}
}
printf("here2\n");
// taking turns to write their data to file
for (int k = 0; k < size; k++) {
if (rank == k) {
ofstream fout;
fout.open(path, ios::app);
fout << rank << " was here" << endl;
for (int j = chunk[2]; j < chunk[3]; ++j) {
for (int i = chunk[0]; i < chunk[1]; ++i) {
fout << data[0][i][j] << " " << data[1][i][j] << " " << data[2][i][j] << " ";
}
fout << endl;
}
printf("Process %d done and waiting\n", rank);
} else {
MPI_Barrier(MCW);
}
}
MPI_Finalize();
};
How to Run
$ mpic++ -o mbrot.out mbrot.cpp
$ mpirun -np 4 mbrot.out
.Hello, i have a problem with my c++ code. I'm trying to make a parallel implementation from my sequential code for sobel operator using OpenCV.
My actual idea is scatter a picture using 2d buffer, make the sobel operation to averaged_rows*cols. and then make the gathering When i have sent the averaged_rows and every rank receive it, i try to use MPI_Scatter and this execution error appears:
sent to 1
sent to 2
sent to 3
recieved by 1
recieved by 2
recieved by 3
[roronoasins-GL552VW:3245] *** An error occurred in MPI_Scatter
[roronoasins-GL552VW:3245] *** reported by process [1759117313,1]
[roronoasins-GL552VW:3245] *** on communicator MPI_COMM_WORLD
[roronoasins-GL552VW:3245] *** MPI_ERR_TRUNCATE: message truncated
[roronoasins-GL552VW:3245] *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort,
[roronoasins-GL552VW:3245] *** and potentially your MPI job)
[roronoasins-GL552VW:03239] 2 more processes have sent help message help-mpi-errors.txt / mpi_errors_are_fatal
[roronoasins-GL552VW:03239] Set MCA parameter "orte_base_help_aggregate" to 0 to see all help / error messages
What I actually do is scatter pic buffer, bcast the picture to the rest of ranks and the gathering.
MPI_Scatter(pic, cols*rows_av, MPI_INT, picAux, cols*rows_av, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast (pic3, cols*rows, MPI_INT, 0, MPI_COMM_WORLD);
int ip_gx, ip_gy, sum;
for(int y = ip*pic_struct[2]; y < (ip+1)*pic_struct[2] -1; y++){
for(int x = 1; x < pic_struct[1]- 1; x++){
int gx = x_gradient(pic3, x, y);
int gy = y_gradient(pic3, x, y);
int sum = abs(gx) + abs(gy);
sum = sum > 255 ? 255:sum;
sum = sum < 0 ? 0 : sum;
picAux[y][x] = sum;
}
}
MPI_Gather(picAux, cols*rows_av, MPI_INT, pic, cols*rows_av, MPI_INT, 0, MPI_COMM_WORLD);
I'd like to know what is happening with the Scatter function, i thought that i could scatter single picture pieces to the rest ranks to calculate sobel, maybe i'm wrong.
My code is here if u want to check it. Thanks for your time.
// > compile with mpic++ mpi_sobel.cpp -o mpi_sobel `pkg-config --libs opencv` -fopenmp -lstdc++
#include <iostream>
#include <cmath>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <omp.h>
#include <mpi.h>
using namespace std;
using namespace cv;
Mat src, dst;
/*
Computes the x component of the gradient vector
at a given point in a image.
returns gradient in the x direction
| 1 0 -1 |
Gx = | 2 0 -2 |
| 1 0 -1 |
*/
int x_gradient(int** image, int x, int y)
{
return image[y-1][x-1] +
2*image[y][x-1] +
image[y+1][x-1] -
image[y-1][x+1] -
2*image[y][x+1] -
image[y+1][x+1];
}
/*
Computes the y component of the gradient vector
at a given point in a image
returns gradient in the y direction
| 1 2 1 |
Gy = | 0 0 0 |
|-1 -2 -1 |
*/
int y_gradient(int** image, int x, int y)
{
return image[y+1][x-1] +
2*image[y+1][x] +
image[y+1][x+1] -
image[y-1][x-1] -
2*image[y-1][x] -
image[y-1][x+1];
}
int main(int argc, char** argv)
{
string picture;
if (argc == 2) {
picture = argv[1];
src = imread(argv[1], CV_LOAD_IMAGE_GRAYSCALE);
}
else {
picture = "input/logan.jpg";
src = imread(picture.c_str(), CV_LOAD_IMAGE_GRAYSCALE);
}
if( !src.data )
{ return -1; }
dst.create(src.rows, src.cols, src.type());
int rows_av, rows_extra;
Size s = src.size();
int rows = s.height;
int cols = s.width;
int pic[rows][cols];int picAux[rows][cols];
int ** pic3;
pic3 = new int*[rows];
for(int y = 0; y < rows; y++)
pic3[y] = new int[cols];
int pic_struct[3], pic_struct_recv[3];
int np, ip;
double start_time = omp_get_wtime();
if (MPI_Init(&argc, &argv) != MPI_SUCCESS){
exit(1);
}
MPI_Comm_size(MPI_COMM_WORLD, &np);
MPI_Comm_rank(MPI_COMM_WORLD, &ip);
MPI_Status status;
if(ip==0)
{
for(int y = 0; y < rows ; y++)
for(int x = 0; x < cols; x++)
{
pic3[y][x] = src.at<uchar>(y,x);
pic[y][x] = 0;
picAux[y][x] = 0;
}
src.release();
rows_av = rows/np;
//cols_av = cols/np;
pic_struct[0] = rows;
pic_struct[1] = cols;
pic_struct[2] = rows_av;
//pic_struct[3] = cols_av:
for(int i=1; i < np; i++)
{
//rows = (i <= rows_extra) ? rows_av+1 : rows_av;
pic_struct[0] = rows;
MPI_Send(&pic_struct, sizeof(pic_struct), MPI_BYTE, i, 0, MPI_COMM_WORLD);
cout << "sent to " << i << endl;
}
}else{//ip
MPI_Recv(&pic_struct, sizeof(pic_struct), MPI_BYTE, 0, 0, MPI_COMM_WORLD, &status);
cout << "recieved by " << ip << endl;
}
MPI_Scatter(pic, cols*rows_av, MPI_INT, picAux, cols*rows_av, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast (pic3, cols*rows, MPI_INT, 0, MPI_COMM_WORLD);
cout << "bcast" << endl;
//MPI_Barrier(MPI_COMM_WORLD);
int ip_gx, ip_gy, sum;
for(int y = ip*pic_struct[2]; y < (ip+1)*pic_struct[2] -1; y++){
for(int x = 1; x < pic_struct[1]- 1; x++){
ip_gx = x_gradient(src, x, y);
ip_gy = y_gradient(src, x, y);
sum = abs(ip_gx) + abs(ip_gy);
sum = sum > 255 ? 255:sum;
sum = sum < 0 ? 0 : sum;
picAux[y][x] = sum;
}
}
MPI_Gather(picAux, cols*rows_av, MPI_INT, pic, cols*rows_av, MPI_INT, 0, MPI_COMM_WORLD);
cout << "gather" << endl;
MPI_Finalize();
if(!ip)
{
double time = omp_get_wtime() - start_time;
for( int i = 0 ; i < rows ; i++ )
{
delete [] pic3[i] ;
delete [] pic3 ;
}
cout << "Number of processes: " << np << endl;
cout << "Rows, Cols: " << rows << " " << cols << endl;
cout << "Rows, Cols(Division): " << rows_av << ", " << cols << endl << endl;
cout << "Processing time: " << time << endl;
for(int i=0; i < 6 ; i++) picture.erase(picture.begin());
for(int i=0; i < 4 ; i++) picture.pop_back();
picture.insert(0,"output/");
picture += "-sobel.jpg";
for(int y = 0; y < rows; y++)
for(int x = 0; x < cols; x++)
dst.at<uchar>(y,x) = pic[y][x];
if(imwrite(picture.c_str(), dst)) cout << "Picture correctly saved as " << picture << endl;
else cout << "\nError has occurred being saved." << endl;
}
return 0;
}
Update: I forgot rows_av in ranks != 0 and pic3 sending is fixed. I've packed src in contiguous buffer and it is right in each rank.
updated code here: https://pastebin.com/jPV9mGFW
I have noticed that into the 3/4 dark there is noise, with this new issue i dont know if gathering is the problem now or i am doing the operations with number_process*rows/total_processes wrong.
MPI_Scatter(pic, cols*rows_av, MPI_UNSIGNED_CHAR, picAux, cols*rows_av, MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD);
int ip_gx, ip_gy, sum;
for(int y = ip*rows_av+1; y < (ip+1)*rows_av-1; y++){
for(int x = 1; x < cols ; x++){
ip_gx = x_gradient(src, x, y);
ip_gy = y_gradient(src, x, y);
sum = abs(ip_gx) + abs(ip_gy);
sum = sum > 255 ? 255:sum;
sum = sum < 0 ? 0 : sum;
picAux[y][x] = sum;
//picAux[y*rows_av+x] = sum;
}
}
MPI_Gather(picAux, cols*rows_av, MPI_UNSIGNED_CHAR, pic, cols*rows_av, MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD);
Loop updated and image is full calculated now but i cant use images bigger than 2048x1536.
for(int y = 1; y < rows_av-1; y++){
for(int x = 1; x < cols ; x++){
ip_gx = x_gradient(src, x, ip*rows_av+y);
ip_gy = y_gradient(src, x, ip*rows_av+y);
sum = abs(ip_gx) + abs(ip_gy);
sum = sum > 255 ? 255:sum;
sum = sum < 0 ? 0 : sum;
picAux[y*cols+x] = sum;
}
}
How could i send larger images than 2048x1536?
--------------------------------------------------------------------------
mpirun noticed that process rank 2 with PID 0 on node roronoasins-GL552VW exited on signal 11 (Segmentation fault).
--------------------------------------------------------------------------
Images size issue was the stack limited size. with ulimit -s unlimited works fine but im now working to improve memory efficiency. Last code will be updated in the pastebin code above.
I have to decompose and recompose a matrix in MPI (I'm using MPICH), and I'm using Scatterv and Gatherv as in the example from this question. Everything works well for small matrices, but when the matrix size increases (starting from 800x800), the program hangs when it reaches MPI_Gatherv. By printing debug messages, I can see that every process passes the call to Gatherv, except the one with rank 0 (the root process in the Gatherv call).
Any suggestion? Here's the code:
#include <iostream>
#include <cstring>
#include <fstream>
#include <cstdlib>
#include "mpi.h"
using namespace std;
#define TOP_ROW_TAG 1
#define BOTTOM_ROW_TAG 2
#define LEFT_COL_TAG 3
#define RIGHT_COL_TAG 4
int main(int argc, char ** argv) {
int me, nproc, width, height, wloc, hloc;
double k, d,c, wdouble, hdouble, discr, delta_t, t;
char* initial, end;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &me);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
MPI_Comm cart_top;
wdouble = atof(argv[1]);
hdouble = atof(argv[2]);
discr = atof(argv[3]);
k = atof(argv[4]);
d = atof(argv[5]);
c = atof(argv[6]);
delta_t = atof(argv[7]);
t = atof(argv[8]);
initial = argv[9];
end = argv[10];
double p = k/(d*c);
double dsc = delta_t/(discr*discr);
width = wdouble / discr;
height = hdouble / discr;
const int NPROWS=4; /* number of rows in _decomposition_ */
const int NPCOLS=4; /* number of cols in _decomposition_ */
const int BLOCKROWS = width/NPROWS; /* number of rows in _block_ */
const int BLOCKCOLS = height/NPCOLS;
const int dims[2] = {NPROWS, NPCOLS};
const int periods[2] = {0,0};
int* mycoords = new int[2];
int locsz = (width*height)/nproc;
double* T, *Tnew, *local, *locnew;
local = new double[BLOCKROWS*BLOCKCOLS];
locnew = new double[BLOCKROWS*BLOCKCOLS];
T = new double[width * height];
Tnew = new double[width * height];
ifstream infile;
infile.open(initial);
if(me==0) {
cout<<"BLOCKROWS: "<<BLOCKROWS;
cout<<"BLOCKCOLS: "<<BLOCKCOLS<<endl;
cout<<"width: "<<width;
cout<<"height: "<<height<<endl;
int idx, jdx, temp;
for (int i=0; i<width*height; i++) {
string currline;
getline(infile, currline);
idx = atoi(strtok(currline.c_str(), " "));
jdx = atoi(strtok(NULL, " "));
temp = atof(strtok(NULL, " "));
T[idx*height+jdx] = temp;
infile.close();
}
MPI_Datatype blocktype;
MPI_Datatype blocktype2;
MPI_Datatype coltype, coltype2;
MPI_Type_vector(BLOCKROWS, 1, BLOCKCOLS, MPI_DOUBLE, &coltype);
MPI_Type_create_resized( coltype, 0, sizeof(double), &coltype2);
MPI_Type_commit(&coltype2);
MPI_Type_vector(BLOCKROWS, BLOCKCOLS, height, MPI_DOUBLE, &blocktype2);
MPI_Type_create_resized( blocktype2, 0, sizeof(double), &blocktype);
MPI_Type_commit(&blocktype);
int disps[NPROWS*NPCOLS];
int counts[NPROWS*NPCOLS];
for (int ii=0; ii<NPROWS; ii++) {
for (int jj=0; jj<NPCOLS; jj++) {
disps[ii*NPCOLS+jj] = ii*height*BLOCKROWS+jj*BLOCKCOLS;
counts [ii*NPCOLS+jj] = 1;
}
}
int myrank, lb_i, lb_j, ub_i, ub_j;
lb_i=0;
lb_j=0;
ub_i=BLOCKROWS;
ub_j=BLOCKCOLS;
/*
0= left neighbor;
1= right neighbor;
2=top neighbor;
3=bottom neighbor;
*/
int neighs[4] = {};
double* leftcol, *rightcol, *myleftcol, *myrightcol, *toprow, *bottomrow;
leftcol = new double[BLOCKROWS];
rightcol= new double[BLOCKROWS];
myleftcol = new double[BLOCKROWS];
myrightcol= new double[BLOCKROWS];
toprow = new double[BLOCKCOLS];
bottomrow = new double[BLOCKCOLS];
//Create topology and get neighbor's rank
MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 0, &cart_top);
MPI_Barrier(MPI_COMM_WORLD);
MPI_Comm_rank(cart_top, &myrank);
MPI_Cart_shift(cart_top, 0, -1, &myrank, &neighs[0]);
MPI_Cart_shift(cart_top, 0, 1, &myrank, &neighs[1]);
MPI_Cart_shift(cart_top, 1, 1, &myrank, &neighs[2]);
MPI_Cart_shift(cart_top, 1, -1, &myrank, &neighs[3]);
MPI_Scatterv(T, counts, disps, blocktype, local, BLOCKROWS*BLOCKCOLS,
MPI_DOUBLE, 0, cart_top);
double curr_t=0;
for(double curr_t = 0; curr_t < t; curr_t+=delta_t) {
MPI_Barrier(cart_top);
//Send border columns to neighbors
if(neighs[2] != MPI_PROC_NULL) {
MPI_Send(&local[BLOCKCOLS-1], 1, coltype2, neighs[2], LEFT_COL_TAG+(int)(curr_t*1000), cart_top);
}
if(neighs[3] != MPI_PROC_NULL) {
MPI_Send(local, 1, coltype2, neighs[3], RIGHT_COL_TAG+(int)(curr_t*1000), cart_top);
}
if(neighs[0] != MPI_PROC_NULL) {
MPI_Send(local, BLOCKCOLS, MPI_DOUBLE, neighs[0], TOP_ROW_TAG+(int)(curr_t*1000), cart_top);
}
if(neighs[1] != MPI_PROC_NULL) {
MPI_Send(&local[(BLOCKROWS-1)*BLOCKCOLS], BLOCKCOLS, MPI_DOUBLE, neighs[1], BOTTOM_ROW_TAG+(int)(curr_t*1000), cart_top);
}
if(neighs[3] != MPI_PROC_NULL) {
MPI_Recv(leftcol, BLOCKROWS, MPI_DOUBLE, neighs[3], LEFT_COL_TAG+(int)(curr_t*1000), cart_top, MPI_STATUS_IGNORE);
}
if(neighs[2] != MPI_PROC_NULL) {
MPI_Recv(rightcol, BLOCKROWS, MPI_DOUBLE, neighs[2], RIGHT_COL_TAG+(int)(curr_t*1000), cart_top, MPI_STATUS_IGNORE);
}
if(neighs[1] != MPI_PROC_NULL) {
MPI_Recv(bottomrow, BLOCKCOLS, MPI_DOUBLE, neighs[1], TOP_ROW_TAG+(int)(curr_t*1000), cart_top, MPI_STATUS_IGNORE);
}
if(neighs[0] != MPI_PROC_NULL) {
MPI_Recv(toprow, BLOCKCOLS, MPI_DOUBLE, neighs[0], BOTTOM_ROW_TAG+(int)(curr_t*1000), cart_top, MPI_STATUS_IGNORE);
}
MPI_Barrier(cart_top);
double* aux;
//cout<<" t in process "<<me<<" is " <<t<<endl;
int i, j;
MPI_Comm_rank(cart_top, &myrank);
MPI_Barrier(cart_top);
for(i=lb_i; i<ub_i; i++) {
for(j=lb_j; j<ub_j; j++) {
double curr,c1,c2,c3,c4;
curr = local[i*BLOCKCOLS+j];
c1 = i==0 ? toprow[j] : local[(i-1)*BLOCKCOLS+j];
c2 = i==BLOCKROWS-1 ? bottomrow[j] : local[(i+1)*BLOCKCOLS+j];
c3 = j==0 ? leftcol[i] : local[i*BLOCKCOLS+(j-1)];
c4 = j==BLOCKCOLS-1 ? rightcol[i] : local[i*BLOCKCOLS+(j+1)];
locnew[i*BLOCKCOLS+j] = curr*(1-4*dsc*p) + dsc*p*(c1+c2+c3+c4);
/*if(i==0) locnew[i*BLOCKCOLS+j] = toprow[j];
else if(i==BLOCKROWS-1) locnew[i*BLOCKCOLS+j] = bottomrow[j];
if(j==0) locnew[i*BLOCKCOLS+j] = leftcol[i];
else if(j==BLOCKCOLS-1) locnew[i*BLOCKCOLS+j] = rightcol[i];
if(i!=0 && i!=BLOCKROWS-1 && j!=0 && j!=BLOCKCOLS-1) locnew[i*BLOCKCOLS+j] = local[i*BLOCKCOLS+j];*/
/*if(i==0) locnew[i*BLOCKCOLS+j] = (double)5000;
else if(i==BLOCKROWS-1) locnew[i*BLOCKCOLS+j] = (double)5000;
if(j==0) locnew[i*BLOCKCOLS+j] = (double)5000;
else if(j==BLOCKCOLS-1) locnew[i*BLOCKCOLS+j] = (double)5000;
if(i!=0 && i!=BLOCKROWS-1 && j!=0 && j!=BLOCKCOLS-1) locnew[i*BLOCKCOLS+j] = local[i*BLOCKCOLS+j];*/
}
}
aux = local;
local = locnew;
locnew = aux;
MPI_Barrier(cart_top);
/* aux = T;
T=Tnew;
Tnew = aux;*/
}
MPI_Gatherv(local, BLOCKROWS*BLOCKCOLS, MPI_DOUBLE, Tnew, counts, disps, blocktype, 0,cart_top);
if(me == 0) {
ofstream outfile;
outfile.open(argv[10]);
for(int i=0; i<width; i++) {
for(int j=0; j<height; j++) {
outfile<< i<<" " <<j<<" "<<Tnew[i*height+j]<<endl;
}
}
outfile.close();
}
MPI_Finalize();
}
I am studying computer architecture in the university.
I have a home work which making convolution faster using parallelism(openMP).
For now I made convolution code (your_convolution) with omp, but It did not be faster at all!
I'm using visual studio 2012.
How can i make it faster??
here's whole convolution's code.
give me some help.
#include <intrin.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <vector>
#include <assert.h>
#include <omp.h>
using namespace std;
void convolution(float* output, float* input, float* filter, int width, int height, int r)
{
assert(output!=NULL && input!=NULL && filter!=NULL && width>0 && height>0 && r>0);
int w1=width-1;
int h1=height-1;
int fwidth=2*r+1;
int i, j, di, dj, ii, jj;
float sum;
for (i=0;i<height;++i)
{
for (j=0;j<width;++j)
{
sum=0;
for (di=-r;di<=r;++di)
{
ii=i+di;
ii=max(min(ii,h1),0);
for (dj=-r;dj<=r;++dj)
{
jj=j+dj;
jj=max(min(jj,w1),0);
sum+=filter[dj+r+(di+r)*fwidth]*input[jj+ii*width];
}
}
output[j+i*width]=sum;
}
}
}
void your_convolution(float* output, float* input, float* filter, int width, int height, int r)
{
// write your code here //
assert(output != NULL && input != NULL && filter != NULL && width>0 && height>0 && r>0);
int w1 = width - 1;
int h1 = height - 1;
int fwidth = 2 * r + 1;
int i, j, di, dj, ii, jj;
float sum;
omp_set_num_threads(4);
#pragma omp parallel
{
for (i = 0; i<height; ++i)
{
for (j = 0; j<width; ++j)
{
sum = 0;
for (di = -r; di <= r; ++di)
{
ii = i + di;
ii = max(min(ii, h1), 0);
#pragma omp parallel for
for (dj = -r; dj <= r; ++dj)
{
jj = j + dj;
jj = max(min(jj, w1), 0);
sum += filter[dj + r + (di + r)*fwidth] * input[jj + ii*width];
}
}
output[j + i*width] = sum;
}
}
}
}
int main()
{
// load the image
int width=1920; // width of the image
int height=1080; // height of the image
int len=width*height; // pixels in the image
int i, j, ii, jj, i2;
float* data=(float*)malloc(sizeof(float)*len); // buffer to load the image
float* output=(float*)malloc(sizeof(float)*len); // output buffer
FILE* fp=fopen("../image.dat", "rb"); // open the image, assume that the bld directory is a subdirectory to the src directory
fread(data, sizeof(float), width*height, fp); // load the float values, the image is gray.
fclose(fp);
// set the filter
int radius=3; // filter radius
float sigma=(float)(radius/3.0); // standard deviation of the Gaussian filter
float beta=(float)(-0.5/(sigma*sigma)); // coefficient exp(beta*x*x)
int fwidth=2*radius+1; // width of the filter
int flen=fwidth*fwidth; // number of elements in the filter
float* filter=(float*)malloc(sizeof(float)*flen); // filter buffer
float sum_weight=0; // we want to normalize the filter weights
for (i=-radius;i<=radius;++i)
{
ii=(i+radius)*fwidth;
i2=i*i;
for (j=-radius;j<=radius;++j)
{
jj=j+radius+ii;
filter[jj]=exp(beta*(i2+j*j));
sum_weight+=filter[jj];
}
}
sum_weight=(float)(1.0/sum_weight);
for (i=0;i<flen;++i)
filter[i]*=sum_weight; // now the weights are normalized to sum to 1
clock_t start=clock();
convolution(output, data, filter, width, height, radius);
clock_t finish=clock();
double duration = (double)(finish - start) / CLOCKS_PER_SEC;
printf( "convolution naive: %2.3f seconds\n", duration );
float* output2=(float*)malloc(sizeof(float)*len); // output buffer
start=clock();
your_convolution(output2, data, filter, width, height, radius);
finish=clock();
double duration2 = (double)(finish - start) / CLOCKS_PER_SEC;
printf( "your convolution: %2.3f seconds\n", duration2 );
double sum=0;
for (i=0;i<len;++i)
sum+=fabs(output[i]-output2[i]);
printf("difference of the outputs=%lf\n", sum);
printf( "The performance of your convolve is %2.1f times higher than convolution naive.\n", duration/duration2);
free(data);
free(filter);
free(output);
return 0;
}
I have written an MPI code in C++ for my Raspberry Pi cluster, which generates an image of the Mandelbrot Set. What happens is on each node (excluding the master, processor 0) part of the Mandelbrot Set is calculated, resulting in each node having a 2D array of ints that indicates whether each xy point is in the set.
It appears to work well on each node individually, but when all the arrays are gathered to the master using this command:
MPI_Gather(&inside, 1, MPI_INT, insideFull, 1, MPI_INT, 0, MPI_COMM_WORLD);
it corrupts the data, and the result is an array full of garbage.
(inside is the nodes' 2D arrays of part of the set. insideFull is also a 2D array but it holds the whole set)
Why would it be doing this?
(This led to me wondering if it corrupting because the master isn't sending its array to itself (or at least I don't want it to). So part of my question also is is there an MPI_Gather variant that doesn't send anything from the root process, just collects from everything else?)
Thanks
EDIT: here's the whole code. If anyone can suggest better ways of how I'm transferring the arrays, please say.
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
// ONLY USE MULTIPLES OF THE NUMBER OF SLAVE PROCESSORS
#define ImageHeight 128
#define ImageWidth 128
double MinRe = -1.9;
double MaxRe = 0.5;
double MinIm = -1.2;
double MaxIm = MinIm + (MaxRe - MinRe)*ImageHeight / ImageWidth;
double Re_factor = (MaxRe - MinRe) / (ImageWidth - 1);
double Im_factor = (MaxIm - MinIm) / (ImageHeight - 1);
unsigned n;
unsigned MaxIterations = 50;
int red;
int green;
int blue;
// MPI variables ****
int processorNumber;
int processorRank;
//*******************//
int main(int argc, char** argv) {
// Initialise MPI
MPI_Init(NULL, NULL);
// Get the number of procesors
MPI_Comm_size(MPI_COMM_WORLD, &processorNumber);
// Get the rank of this processor
MPI_Comm_rank(MPI_COMM_WORLD, &processorRank);
// Get the name of this processor
char processorName[MPI_MAX_PROCESSOR_NAME];
int name_len;
MPI_Get_processor_name(processorName, &name_len);
// A barrier just to sync all the processors, make timing more accurate
MPI_Barrier(MPI_COMM_WORLD);
// Make an array that stores whether each point is in the Mandelbrot Set
int inside[ImageWidth / processorNumber][ImageHeight / processorNumber];
if(processorRank == 0) {
printf("Generating Mandelbrot Set\n");
}
// We don't want the master to process the Mandelbrot Set, only the slaves
if(processorRank != 0) {
// Determine which coordinates to test on each processor
int xMin = (ImageWidth / (processorNumber - 1)) * (processorRank - 1);
int xMax = ((ImageWidth / (processorNumber - 1)) * (processorRank - 1)) - 1;
int yMin = (ImageHeight / (processorNumber - 1)) * (processorRank - 1);
int yMax = ((ImageHeight / (processorNumber - 1)) * (processorRank - 1)) - 1;
// Check each value to see if it's in the Mandelbrot Set
for (int y = yMin; y <= yMax; y++) {
double c_im = MaxIm - y *Im_factor;
for (int x = xMin; x <= xMax; x++) {
double c_re = MinRe + x*Re_factor;
double Z_re = c_re, Z_im = c_im;
int isInside = 1;
for (n = 0; n <= MaxIterations; ++n) {
double Z_re2 = Z_re * Z_re, Z_im2 = Z_im * Z_im;
if (Z_re2 + Z_im2 > 10) {
isInside = 0;
break;
}
Z_im = 2 * Z_re * Z_im + c_im;
Z_re = Z_re2 - Z_im2 + c_re;
}
if (isInside == 1) {
inside[x][y] = 1;
}
else{
inside[x][y] = 0;
}
}
}
}
// Wait for all processors to finish computing
MPI_Barrier(MPI_COMM_WORLD);
int insideFull[ImageWidth][ImageHeight];
if(processorRank == 0) {
printf("Sending parts of set to master\n");
}
// Send all the arrays to the master
MPI_Gather(&inside[0][0], 1, MPI_INT, &insideFull[0][0], 1, MPI_INT, 0, MPI_COMM_WORLD);
// Output the data to an image
if(processorRank == 0) {
printf("Generating image\n");
FILE * image = fopen("mandelbrot_set.ppm", "wb");
fprintf(image, "P6 %d %d 255\n", ImageHeight, ImageWidth);
for(int y = 0; y < ImageHeight; y++) {
for(int x = 0; x < ImageWidth; x++) {
if(insideFull[x][y]) {
putc(0, image);
putc(0, image);
putc(255, image);
}
else {
putc(0, image);
putc(0, image);
putc(0, image);
}
// Just to see what values return, no actual purpose
printf("%d, %d, %d\n", x, y, insideFull[x][y]);
}
}
fclose(image);
printf("Complete\n");
}
MPI_Barrier(MPI_COMM_WORLD);
// Finalise MPI
MPI_Finalize();
}
You call MPI_Gether with the following parameters:
const void* sendbuf : &inside[0][0] Starting address of send buffer
int sendcount : 1 Number of elements in send buffer
const MPI::Datatype& sendtype : MPI_INT Datatype of send buffer elements
void* recvbuf : &insideFull[0][0]
int recvcount : 1 Number of elements for any single receive
const MPI::Datatype& recvtype : MPI_INT Datatype of recvbuffer elements
int root : 0 Rank of receiving process
MPI_Comm comm : MPI_COMM_WORLD Communicator (handle).
Sending/receiving only one element is not sufficient. Instead of 1 use
(ImageWidth / processorNumber)*(ImageHeight / processorNumber)
Then think about the different memory layout of your source and target 2D arrays:
int inside[ImageWidth / processorNumber][ImageHeight / processorNumber];
vs.
int insideFull[ImageWidth][ImageHeight];
As the copy is a memory bloc copy, and not an intelligent 2D array copy, all your source integers will be transfered contiguously to the target adress, regardless of the different size of the lines.
I'd recommend to send the data fisrt into an array of the same size as the source, and then in the receiving process, to copy the elements to the right lines & columns in the full array, for example with a small function like:
// assemble2d():
// copys a source int sarr[sli][sco] to a destination int darr[dli][sli]
// using an offset to starting at darr[doffli][doffco].
// The elements that are out of bounds are ignored. Negative offset possible.
void assemble2D(int*darr, int dli, int dco, int*sarr, int sli, int sco, int doffli=0, int doffco=0)
{
for (int i = 0; i < sli; i++)
for (int j = 0; j < sco; j++)
if ((i + doffli >= 0) && (j + doffco>=0) && (i + doffli<dli) && (j + doffco<dco))
darr[(i+doffli)*dli + j+doffco] = sarr[i*sli+j];
}