MPI_Gatherv hangs when recomposing a matrix

MPI_Gatherv hangs when recomposing a matrix - c++

I have to decompose and recompose a matrix in MPI (I'm using MPICH), and I'm using Scatterv and Gatherv as in the example from this question. Everything works well for small matrices, but when the matrix size increases (starting from 800x800), the program hangs when it reaches MPI_Gatherv. By printing debug messages, I can see that every process passes the call to Gatherv, except the one with rank 0 (the root process in the Gatherv call).
Any suggestion? Here's the code:
#include <iostream>
#include <cstring>
#include <fstream>
#include <cstdlib>
#include "mpi.h"
using namespace std;
#define TOP_ROW_TAG 1
#define BOTTOM_ROW_TAG 2
#define LEFT_COL_TAG 3
#define RIGHT_COL_TAG 4
int main(int argc, char ** argv) {
int me, nproc, width, height, wloc, hloc;
double k, d,c, wdouble, hdouble, discr, delta_t, t;
char* initial, end;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &me);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
MPI_Comm cart_top;
wdouble = atof(argv[1]);
hdouble = atof(argv[2]);
discr = atof(argv[3]);
k = atof(argv[4]);
d = atof(argv[5]);
c = atof(argv[6]);
delta_t = atof(argv[7]);
t = atof(argv[8]);
initial = argv[9];
end = argv[10];
double p = k/(d*c);
double dsc = delta_t/(discr*discr);
width = wdouble / discr;
height = hdouble / discr;
const int NPROWS=4; /* number of rows in _decomposition_ */
const int NPCOLS=4; /* number of cols in _decomposition_ */
const int BLOCKROWS = width/NPROWS; /* number of rows in _block_ */
const int BLOCKCOLS = height/NPCOLS;
const int dims[2] = {NPROWS, NPCOLS};
const int periods[2] = {0,0};
int* mycoords = new int[2];
int locsz = (width*height)/nproc;
double* T, *Tnew, *local, *locnew;
local = new double[BLOCKROWS*BLOCKCOLS];
locnew = new double[BLOCKROWS*BLOCKCOLS];
T = new double[width * height];
Tnew = new double[width * height];
ifstream infile;
infile.open(initial);
if(me==0) {
cout<<"BLOCKROWS: "<<BLOCKROWS;
cout<<"BLOCKCOLS: "<<BLOCKCOLS<<endl;
cout<<"width: "<<width;
cout<<"height: "<<height<<endl;
int idx, jdx, temp;
for (int i=0; i<width*height; i++) {
string currline;
getline(infile, currline);
idx = atoi(strtok(currline.c_str(), " "));
jdx = atoi(strtok(NULL, " "));
temp = atof(strtok(NULL, " "));
T[idx*height+jdx] = temp;
infile.close();
}
MPI_Datatype blocktype;
MPI_Datatype blocktype2;
MPI_Datatype coltype, coltype2;
MPI_Type_vector(BLOCKROWS, 1, BLOCKCOLS, MPI_DOUBLE, &coltype);
MPI_Type_create_resized( coltype, 0, sizeof(double), &coltype2);
MPI_Type_commit(&coltype2);
MPI_Type_vector(BLOCKROWS, BLOCKCOLS, height, MPI_DOUBLE, &blocktype2);
MPI_Type_create_resized( blocktype2, 0, sizeof(double), &blocktype);
MPI_Type_commit(&blocktype);
int disps[NPROWS*NPCOLS];
int counts[NPROWS*NPCOLS];
for (int ii=0; ii<NPROWS; ii++) {
for (int jj=0; jj<NPCOLS; jj++) {
disps[ii*NPCOLS+jj] = ii*height*BLOCKROWS+jj*BLOCKCOLS;
counts [ii*NPCOLS+jj] = 1;
}
}
int myrank, lb_i, lb_j, ub_i, ub_j;
lb_i=0;
lb_j=0;
ub_i=BLOCKROWS;
ub_j=BLOCKCOLS;
/*
0= left neighbor;
1= right neighbor;
2=top neighbor;
3=bottom neighbor;
*/
int neighs[4] = {};
double* leftcol, *rightcol, *myleftcol, *myrightcol, *toprow, *bottomrow;
leftcol = new double[BLOCKROWS];
rightcol= new double[BLOCKROWS];
myleftcol = new double[BLOCKROWS];
myrightcol= new double[BLOCKROWS];
toprow = new double[BLOCKCOLS];
bottomrow = new double[BLOCKCOLS];
//Create topology and get neighbor's rank
MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 0, &cart_top);
MPI_Barrier(MPI_COMM_WORLD);
MPI_Comm_rank(cart_top, &myrank);
MPI_Cart_shift(cart_top, 0, -1, &myrank, &neighs[0]);
MPI_Cart_shift(cart_top, 0, 1, &myrank, &neighs[1]);
MPI_Cart_shift(cart_top, 1, 1, &myrank, &neighs[2]);
MPI_Cart_shift(cart_top, 1, -1, &myrank, &neighs[3]);
MPI_Scatterv(T, counts, disps, blocktype, local, BLOCKROWS*BLOCKCOLS,
MPI_DOUBLE, 0, cart_top);
double curr_t=0;
for(double curr_t = 0; curr_t < t; curr_t+=delta_t) {
MPI_Barrier(cart_top);
//Send border columns to neighbors
if(neighs[2] != MPI_PROC_NULL) {
MPI_Send(&local[BLOCKCOLS-1], 1, coltype2, neighs[2], LEFT_COL_TAG+(int)(curr_t*1000), cart_top);
}
if(neighs[3] != MPI_PROC_NULL) {
MPI_Send(local, 1, coltype2, neighs[3], RIGHT_COL_TAG+(int)(curr_t*1000), cart_top);
}
if(neighs[0] != MPI_PROC_NULL) {
MPI_Send(local, BLOCKCOLS, MPI_DOUBLE, neighs[0], TOP_ROW_TAG+(int)(curr_t*1000), cart_top);
}
if(neighs[1] != MPI_PROC_NULL) {
MPI_Send(&local[(BLOCKROWS-1)*BLOCKCOLS], BLOCKCOLS, MPI_DOUBLE, neighs[1], BOTTOM_ROW_TAG+(int)(curr_t*1000), cart_top);
}
if(neighs[3] != MPI_PROC_NULL) {
MPI_Recv(leftcol, BLOCKROWS, MPI_DOUBLE, neighs[3], LEFT_COL_TAG+(int)(curr_t*1000), cart_top, MPI_STATUS_IGNORE);
}
if(neighs[2] != MPI_PROC_NULL) {
MPI_Recv(rightcol, BLOCKROWS, MPI_DOUBLE, neighs[2], RIGHT_COL_TAG+(int)(curr_t*1000), cart_top, MPI_STATUS_IGNORE);
}
if(neighs[1] != MPI_PROC_NULL) {
MPI_Recv(bottomrow, BLOCKCOLS, MPI_DOUBLE, neighs[1], TOP_ROW_TAG+(int)(curr_t*1000), cart_top, MPI_STATUS_IGNORE);
}
if(neighs[0] != MPI_PROC_NULL) {
MPI_Recv(toprow, BLOCKCOLS, MPI_DOUBLE, neighs[0], BOTTOM_ROW_TAG+(int)(curr_t*1000), cart_top, MPI_STATUS_IGNORE);
}
MPI_Barrier(cart_top);
double* aux;
//cout<<" t in process "<<me<<" is " <<t<<endl;
int i, j;
MPI_Comm_rank(cart_top, &myrank);
MPI_Barrier(cart_top);
for(i=lb_i; i<ub_i; i++) {
for(j=lb_j; j<ub_j; j++) {
double curr,c1,c2,c3,c4;
curr = local[i*BLOCKCOLS+j];
c1 = i==0 ? toprow[j] : local[(i-1)*BLOCKCOLS+j];
c2 = i==BLOCKROWS-1 ? bottomrow[j] : local[(i+1)*BLOCKCOLS+j];
c3 = j==0 ? leftcol[i] : local[i*BLOCKCOLS+(j-1)];
c4 = j==BLOCKCOLS-1 ? rightcol[i] : local[i*BLOCKCOLS+(j+1)];
locnew[i*BLOCKCOLS+j] = curr*(1-4*dsc*p) + dsc*p*(c1+c2+c3+c4);
/*if(i==0) locnew[i*BLOCKCOLS+j] = toprow[j];
else if(i==BLOCKROWS-1) locnew[i*BLOCKCOLS+j] = bottomrow[j];
if(j==0) locnew[i*BLOCKCOLS+j] = leftcol[i];
else if(j==BLOCKCOLS-1) locnew[i*BLOCKCOLS+j] = rightcol[i];
if(i!=0 && i!=BLOCKROWS-1 && j!=0 && j!=BLOCKCOLS-1) locnew[i*BLOCKCOLS+j] = local[i*BLOCKCOLS+j];*/
/*if(i==0) locnew[i*BLOCKCOLS+j] = (double)5000;
else if(i==BLOCKROWS-1) locnew[i*BLOCKCOLS+j] = (double)5000;
if(j==0) locnew[i*BLOCKCOLS+j] = (double)5000;
else if(j==BLOCKCOLS-1) locnew[i*BLOCKCOLS+j] = (double)5000;
if(i!=0 && i!=BLOCKROWS-1 && j!=0 && j!=BLOCKCOLS-1) locnew[i*BLOCKCOLS+j] = local[i*BLOCKCOLS+j];*/
}
}
aux = local;
local = locnew;
locnew = aux;
MPI_Barrier(cart_top);
/* aux = T;
T=Tnew;
Tnew = aux;*/
}
MPI_Gatherv(local, BLOCKROWS*BLOCKCOLS, MPI_DOUBLE, Tnew, counts, disps, blocktype, 0,cart_top);
if(me == 0) {
ofstream outfile;
outfile.open(argv[10]);
for(int i=0; i<width; i++) {
for(int j=0; j<height; j++) {
outfile<< i<<" " <<j<<" "<<Tnew[i*height+j]<<endl;
}
}
outfile.close();
}
MPI_Finalize();
}

Related

"Segmentation Fault" error when running MPI with more than one process

I'm writing a Generator that generates words in parallel with MPI.
When I run the generator with a single process as below, it gives the desired outputs without any problems.
mpirun -np 1 ./bin/gen_data 2
And the output is:
<<<==========GENERATION TEST START=========>>>
VT
BBZPTFTQ
WORD 0 : VT
WORD 1 : BBZPTFTQ
That took 0.000014 seconds
<<<===========GENERATION TEST END==========>>>
When I increase the number of processes, for example, when it is two, I get an error as follows:
<<<==========GENERATION TEST START=========>>>
MTBVLUWXB
DVP
WORD 0 : DVP
[fati:17967] *** Process received signal ***
[fati:17967] Signal: Segmentation fault (11)
[fati:17967] Signal code: (128)
[fati:17967] Failing at address: (nil)
[fati:17967] [ 0] /lib/x86_64-linux-gnu/libc.so.6(+0x43090)[0x7f978851f090]
[fati:17967] [ 1] ./bin/gen_data(+0xdf81)[0x5582c661bf81]
[fati:17967] [ 2] ./bin/gen_data(+0xe039)[0x5582c661c039]
[fati:17967] [ 3] ./bin/gen_data(+0xe3b1)[0x5582c661c3b1]
[fati:17967] [ 4] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf3)
[0x7f9788500083]
[fati:17967] [ 5] ./bin/gen_data(+0xd78e)[0x5582c661b78e]
[fati:17967] *** End of error message ***
--------------------------------------------------------------------------
Primary job terminated normally, but 1 process returned
a non-zero exit code. Per user-direction, the job has been aborted.
--------------------------------------------------------------------------
--------------------------------------------------------------------------
mpirun noticed that process rank 0 with PID 0 on node fati exited on signal 11
(Segmentation fault).
--------------------------------------------------------------------------
Here are my codes:
Generator.cpp
#include "Generator.hpp"
int Generator::MAX_STR_LEN = 10;
int Generator::MIN_STR_LEN = 2;
int Generator::DATASET_SIZE = 50000;
char *Generator::LETTERS = (char *)malloc(Generator::LETTER_COUNT * sizeof (char));
int Generator::WORLD_SIZE = 1;
int *Generator::SCATTER_SEND_BUFFER = nullptr;
char **Generator::ALL_WORDS = nullptr;
int Generator::ELEMENTS_PER_PROC = DATASET_SIZE;
void Generator::validate(int argc, char **argv){
if (Generator::WORLD_SIZE > 100)
throw invalid_argument("WORLD_SIZE can be 100 at max");
if (Generator::WORLD_SIZE < 1)
throw invalid_argument("WORLD_SIZE can't be lowest from 1");
if (argc < 2)
throw invalid_argument("CLI arguments must have 2 arguments at least");
Generator::DATASET_SIZE = atoi(argv[1]);
if (strlen(argv[1]) > 7 || Generator::DATASET_SIZE > 1000000)
throw invalid_argument("MPI_DATASET_SIZE can be 1M at max");
if (Generator::DATASET_SIZE <= 0)
throw invalid_argument("MPI_DATASET_SIZE can't be zero or negative");
Generator::MAX_STR_LEN = 10;
if (argc > 2)
Generator::MAX_STR_LEN = atoi(argv[2]);
if (Generator::MAX_STR_LEN > 100)
throw invalid_argument("MAX_STR_LEN can be 100 at max");
if (Generator::MAX_STR_LEN < 2)
throw invalid_argument("MAX_STR_LEN can't be lowest from 2");
Generator::MIN_STR_LEN = 2;
if (argc > 3)
Generator::MIN_STR_LEN = atoi(argv[3]);
if (Generator::MIN_STR_LEN > 99)
throw invalid_argument("MIN_STR_LEN can be 99 at max");
if (Generator::MIN_STR_LEN < 1)
throw invalid_argument("MIN_STR_LEN can't be lowest from 1");
if (Generator::MIN_STR_LEN >= Generator::MAX_STR_LEN)
throw invalid_argument("MIN_STR_LEN has to be lowest from MAX_STR_LEN");
Generator::ELEMENTS_PER_PROC = Generator::DATASET_SIZE / Generator::WORLD_SIZE;
}
void Generator::createLetters(){
for (int i = 0; i < Generator::LETTER_COUNT; i++)
Generator::LETTERS[i] = 'A' + i;
}
Generator::Generator(int argc, char **argv){
Generator::validate(argc, argv);
Generator::createLetters();
}
Generator::Generator(){}
void Generator::Allocate2DArray(char ***arr, int row, int col){
*arr = (char **)malloc(row * sizeof(char*));
for (int i = 0; i < row; i++)
(*arr)[i] = (char *)malloc(col * sizeof(char));
}
void Generator::CreateSendData(){
Generator::SCATTER_SEND_BUFFER = (int *)malloc(Generator::WORLD_SIZE * sizeof(int));
for (int i = 0; i < Generator::WORLD_SIZE; i++)
SCATTER_SEND_BUFFER[i] = Generator::ELEMENTS_PER_PROC;
}
void Generator::CreateWord(char **word){
int len = rand() % (Generator::MAX_STR_LEN - Generator::MIN_STR_LEN + 1) + Generator::MIN_STR_LEN;
*word = (char *)malloc((Generator::MAX_STR_LEN) * sizeof(char));
int i = 0;
for (; i < len; i++)
{
int r = rand() % LETTER_COUNT;
(*word)[i] = Generator::LETTERS[r];
}
if (len != MAX_STR_LEN)
(*word)[i] = '\0';
}
void Generator::CreateWords(char **arr, int buff_size){
for (int i = 0; i < buff_size; i++){
char *word;
Generator::CreateWord(&word);
arr[i] = word;
}
}
void Generator::WorkingTime(double start_time, double end_time){
printf("That took %f seconds\n", end_time - start_time);
}
void Generator::WriteWord(char *word){
for (int i = 0; i < Generator::MAX_STR_LEN; i++)
{
if (word[i] == '\0')
break;
cout << word[i];
}
cout << endl;
}
void Generator::WriteWords(){
for (int i = 0; i < Generator::DATASET_SIZE; i++){
cout << "WORD " << i << " : ";
// assert(ALL_WORDS[i] != NULL);
Generator::WriteWord(Generator::ALL_WORDS[i]);
}
}
void Generator::WriteWords(char **arr, int buff_size){
for (int i = 0; i < buff_size; i++)
Generator::WriteWord(arr[i]);
}
TestGenerate.cpp
#include "Generator.hpp"
#include "Util.hpp"
#include <mpi.h>
#include <unistd.h>
int main(int argc, char **argv)
{
Util::Clear();
cout << "<<<==========GENERATION TEST START=========>>>" << endl;
double start_time, end_time;
MPI_Init(NULL, NULL);
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
MPI_Comm_size(MPI_COMM_WORLD, &Generator::WORLD_SIZE);
Generator *generator = new Generator(argc, argv);
start_time = MPI_Wtime();
Generator::SCATTER_SEND_BUFFER = NULL;
Generator::CreateSendData();
assert(Generator::SCATTER_SEND_BUFFER != NULL);
int scatter_recv_buffer;
MPI_Barrier(MPI_COMM_WORLD);
MPI_Scatter(Generator::SCATTER_SEND_BUFFER, 1, MPI_INT, &scatter_recv_buffer, 1, MPI_INT, 0, MPI_COMM_WORLD);
srand((world_rank + 1) * time(0));
char **words = NULL;
Generator::Allocate2DArray(&words, scatter_recv_buffer, Generator::MAX_STR_LEN);
Generator::CreateWords(words, scatter_recv_buffer);
Generator::WriteWords(words, scatter_recv_buffer);
if (world_rank == 0)
Generator::Allocate2DArray(&Generator::ALL_WORDS, Generator::DATASET_SIZE, Generator::MAX_STR_LEN);
MPI_Gather(words, scatter_recv_buffer * Generator::MAX_STR_LEN, MPI_CHAR, Generator::ALL_WORDS, scatter_recv_buffer * Generator::MAX_STR_LEN, MPI_CHAR, 0, MPI_COMM_WORLD);
if (world_rank == 0)
{
end_time = MPI_Wtime();
Generator::WriteWords();
Generator::WorkingTime(start_time, end_time);
cout << "<<<===========GENERATION TEST END==========>>>" << endl;
}
MPI_Finalize();
}
I'm probably having trouble gathering data with MPI_Gather.

Your basic problem is that Allocate2DArray does not create a 2D array: it creates a 1D array of 1D arrays. Thus you can not use it as an MPI buffer. MPI buffers can only be (using your specific case) char*.

As #victor-eijkhout said MPI Buffers only take char* parameters.
And a 2d array is not needed to hold a string array at char pointers.
Here is the new Generator.cpp:
#include "Generator.hpp"
int Generator::MAX_STR_LEN = 10;
int Generator::MIN_STR_LEN = 2;
int Generator::DATASET_SIZE = 50000;
char *Generator::LETTERS = (char *)malloc(Generator::LETTER_COUNT * sizeof (char));
int Generator::WORLD_SIZE = 1;
int *Generator::SCATTER_SEND_BUFFER = nullptr;
char *Generator::ALL_WORDS = nullptr;
int Generator::ELEMENTS_PER_PROC = DATASET_SIZE;
void Generator::validate(int argc, char **argv){
if (Generator::WORLD_SIZE > 100)
throw invalid_argument("WORLD_SIZE can be 100 at max");
if (Generator::WORLD_SIZE < 1)
throw invalid_argument("WORLD_SIZE can't be lowest from 1");
if (argc < 2)
throw invalid_argument("CLI arguments must have 2 arguments at least");
Generator::DATASET_SIZE = atoi(argv[1]);
if (strlen(argv[1]) > 7 || Generator::DATASET_SIZE > 1000000)
throw invalid_argument("MPI_DATASET_SIZE can be 1M at max");
if (Generator::DATASET_SIZE <= 0)
throw invalid_argument("MPI_DATASET_SIZE can't be zero or negative");
Generator::MAX_STR_LEN = 10;
if (argc > 2)
Generator::MAX_STR_LEN = atoi(argv[2]);
if (Generator::MAX_STR_LEN > 100)
throw invalid_argument("MAX_STR_LEN can be 100 at max");
if (Generator::MAX_STR_LEN < 2)
throw invalid_argument("MAX_STR_LEN can't be lowest from 2");
Generator::MIN_STR_LEN = 2;
if (argc > 3)
Generator::MIN_STR_LEN = atoi(argv[3]);
if (Generator::MIN_STR_LEN > 99)
throw invalid_argument("MIN_STR_LEN can be 99 at max");
if (Generator::MIN_STR_LEN < 1)
throw invalid_argument("MIN_STR_LEN can't be lowest from 1");
if (Generator::MIN_STR_LEN >= Generator::MAX_STR_LEN)
throw invalid_argument("MIN_STR_LEN has to be lowest from MAX_STR_LEN");
Generator::ELEMENTS_PER_PROC = Generator::DATASET_SIZE / Generator::WORLD_SIZE;
}
void Generator::createLetters(){
for (int i = 0; i < Generator::LETTER_COUNT; i++)
Generator::LETTERS[i] = 'A' + i;
}
Generator::Generator(int argc, char **argv){
Generator::validate(argc, argv);
Generator::createLetters();
}
Generator::Generator(){}
// void Generator::Allocate2DArray(char ***arr, int row, int col){
// *arr = (char **)malloc(row * sizeof(char*));
// for (int i = 0; i < row; i++)
// (*arr)[i] = (char *)malloc(col * sizeof(char));
// }
void Generator::CreateSendData(){
Generator::SCATTER_SEND_BUFFER = (int *)malloc(Generator::WORLD_SIZE * sizeof(int));
for (int i = 0; i < Generator::WORLD_SIZE; i++)
SCATTER_SEND_BUFFER[i] = Generator::ELEMENTS_PER_PROC;
}
void Generator::CreateWord(char **word){
int len = rand() % (Generator::MAX_STR_LEN - Generator::MIN_STR_LEN + 1) + Generator::MIN_STR_LEN;
*word = (char *)malloc((Generator::MAX_STR_LEN) * sizeof(char));
int i = 0;
for (; i < len; i++)
{
int r = rand() % LETTER_COUNT;
(*word)[i] = Generator::LETTERS[r];
}
if (len != MAX_STR_LEN)
(*word)[i] = '\0';
}
void Generator::CreateWords(char *arr, int buff_size){
for (int i = 0; i < buff_size; i++){
char *word;
Generator::CreateWord(&word);
for (int j = 0; j < Generator::MAX_STR_LEN; j++)
{
arr[i*Generator::MAX_STR_LEN + j] = word[j];
if (word[j] == '\0')
break;
}
}
}
void Generator::WorkingTime(double start_time, double end_time){
printf("That took %f seconds\n", end_time - start_time);
}
void Generator::WriteWord(char *word){
for (int i = 0; i < Generator::MAX_STR_LEN; i++)
{
if (word[i] == '\0')
break;
cout << word[i];
}
cout << endl;
}
void Generator::WriteWords(){
for (int i = 0; i < Generator::DATASET_SIZE; i++){
Generator::WriteWord((Generator::ALL_WORDS + i*Generator::MAX_STR_LEN));
}
}
void Generator::WriteWords(char *arr, int buff_size){
for (int i = 0; i < buff_size; i++)
Generator::WriteWord((arr + i*Generator::MAX_STR_LEN));
}
TestGenerate.cpp
#include "Generator.hpp"
#include "Util.hpp"
#include <mpi.h>
#include <unistd.h>
int main(int argc, char **argv)
{
Util::Clear();
cout << "<<<==========GENERATION TEST START=========>>>" << endl;
double start_time, end_time;
MPI_Init(NULL, NULL);
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
MPI_Comm_size(MPI_COMM_WORLD, &Generator::WORLD_SIZE);
Generator *generator = new Generator(argc, argv);
start_time = MPI_Wtime();
Generator::SCATTER_SEND_BUFFER = NULL;
Generator::CreateSendData();
assert(Generator::SCATTER_SEND_BUFFER != NULL);
int scatter_recv_buffer;
MPI_Barrier(MPI_COMM_WORLD);
MPI_Scatter(Generator::SCATTER_SEND_BUFFER, 1, MPI_INT, &scatter_recv_buffer, 1, MPI_INT, 0, MPI_COMM_WORLD);
srand((world_rank + 1) * time(0));
char *words = (char *)malloc(sizeof(char) * scatter_recv_buffer * Generator::MAX_STR_LEN);
Generator::CreateWords(words, scatter_recv_buffer);
if (world_rank == 0)
Generator::ALL_WORDS = (char *)malloc(sizeof(char) * Generator::DATASET_SIZE * Generator::MAX_STR_LEN);
MPI_Barrier(MPI_COMM_WORLD);
MPI_Gather(words, scatter_recv_buffer * Generator::MAX_STR_LEN, MPI_CHAR, Generator::ALL_WORDS, scatter_recv_buffer * Generator::MAX_STR_LEN, MPI_CHAR, 0, MPI_COMM_WORLD);
if (world_rank == 0)
{
end_time = MPI_Wtime();
Generator::WriteWords();
Generator::WorkingTime(start_time, end_time);
cout << "<<<===========GENERATION TEST END==========>>>" << endl;
}
MPI_Finalize();
}

Matrix-Vector Multiplication on MPI - ERROR Compiled code

I need assistance to resolve an error in the following code:
#include <iostream>
#include <mpi.h>
using namespace std;
//matrix in two dimension in memory!!
int main(int argc, char** argv)
{
const int WIDTH = 100;
const int HEIGHT = 100;
int id, P;
double tempValue = 0;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &P);
MPI_Comm_rank(MPI_COMM_WORLD, &id);
double A[WIDTH][HEIGHT];
double x[HEIGHT], b[WIDTH];
int upperBound, lowerBound = 0;
// Master controls worksharing..
if (id == 0)
{
// Init A & x
for (int i = 0; i < WIDTH; i++)
for (int j = 0; j < HEIGHT; j++)
A[i][j] = 1;
for (int j = 0; j < HEIGHT; j++)
x[j] = 2;
// Send to each node its portion of A to be processed
int portionSize = WIDTH / P;
for (int i = 0; i < P; i++)
{
lowerBound = i * portionSize;
upperBound = (i + 1) * portionSize;
// let the last node process the remainder
if (i == (P - 1))
upperBound += (HEIGHT - portionSize * P);
if (i > 0)// Do not send to master node!!
{
// Send to node i the lower & upper bounds the A portion
//and complete vector x
MPI_Send(&lowerBound, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
MPI_Send(&upperBound, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
MPI_Send(&A[lowerBound][0], (upperBound - lowerBound) * HEIGHT,
MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
MPI_Send(&x[0], HEIGHT, MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
}
}
// master perform part of the job...
for (int i = 0; i < portionSize; i++)
{
tempValue = 0;
for (int j = 0; j < HEIGHT; j++)
tempValue += A[i][j] * x[j];
b[i] = tempValue;
}
//Get the results in order, each node would send their boundaries and data part
for (int i = 1; i < P; i++)
{
MPI_Recv(&lowerBound, 1, MPI_INT, i, 0, MPI_COMM_WORLD, MPI_STATUSES_IGNORE);
MPI_Recv(&upperBound, 1, MPI_INT, i, 0, MPI_COMM_WORLD, MPI_STATUSES_IGNORE);
MPI_Recv(&P[lowerBound], (upperBound - lowerBound), MPI_DOUBLE, i, 0,
MPI_COMM_WORLD, MPI_STATUSES_IGNORE);
}
// Print the first 2 values to check..
cout << "b[0]=" << b[0] << " b[Width-1]=" << b[WIDTH - 1] << endl;
}
else // the rest of the workers do their parts
{
//Receive the inputs
MPI_Recv(&lowerBound, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUSES_IGNORE);
MPI_Recv(&upperBound, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUSES_IGNORE);
MPI_Recv(&A[lowerBound][0], (upperBound - lowerBound) * WIDTH, MPI_DOUBLE, 0, 0,
MPI_COMM_WORLD, MPI_STATUSES_IGNORE);
MPI_Recv(&x, HEIGHT, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, MPI_STATUSES_IGNORE);
cout << "Node:" << id << " Received from:" << lowerBound << " to " << upperBound - 1
<< endl;
double* result = new double[upperBound - lowerBound];
//Do the job
for (int i = lowerBound, resultCounter = 0; i < upperBound; i++, resultCounter++)
{
tempValue = 0;
for (int j = 0; j < HEIGHT; j++)
tempValue += A[i][j] * x[j];
result[resultCounter] = tempValue;
}
//send the results
MPI_Send(&lowerBound, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
MPI_Send(&upperBound, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
MPI_Send(&result[0], upperBound - lowerBound, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
delete[] result;
}
MPI_Finalize();
return 0;
}
When I compile the code in Microsoft Visual Studio 2019, I get this error message:
Error (active) E0142 expression must have pointer-to-object type ConsoleApplication9 C:\Users\m_swe\Desktop\Assignments\Assignments\PrjP2P\MatMPI\MatMPI\Source.cpp 59
Error C2109 subscript requires array or pointer type ConsoleApplication9 C:\Users\m_swe\Desktop\Assignments\Assignments\PrjP2P\MatMPI\MatMPI\Source.cpp 59

I think the problem is on line: 59
MPI_Recv(&P[lowerBound], (upperBound - lowerBound), MPI_DOUBLE, i, 0,
MPI_Recv takes in a pointer to a buffer (the first argument) where you are going to receive and store the incoming data. In this case it could be in some variable which you can define inside the for loop, as:
int receivedValues[ WIDTH * HEIGHT ];
for (int i = 1; i < P; i++)
{
MPI_Recv(&lowerBound, 1, MPI_INT, i, 0, MPI_COMM_WORLD, MPI_STATUSES_IGNORE);
MPI_Recv(&upperBound, 1, MPI_INT, i, 0, MPI_COMM_WORLD, MPI_STATUSES_IGNORE);
MPI_Recv(&receivedValues[0], (upperBound - lowerBound), MPI_DOUBLE, i, 0,
MPI_COMM_WORLD, MPI_STATUSES_IGNORE);
// do your computation here with receivedValues
}
}

What's wrong with this parallel algorithm?

I'm trying to write parallel algorithm in openCL for L-system Pythagoras Tree :
var:A,B;
const: (,);
axiom:A;
rules:(B->BB),(A->B[A]A)
But i can't get over 9th iteration. 10th iteration returns disordered string. Here is my kernel:
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
#pragma OPENCL EXTENSION cl_amd_printf : enable
__kernel void l_system(int string_lenght){}
__kernel void l_system_interation(int string_lenght, __global char *sentence, __local char *string, __global int * local_char_num)
{
int local_x = (int)get_local_id(0);
int local_size = (int)get_local_size(0);
int x = (int)get_global_id(0);
int size = (int)get_global_size(0);
int group = (int)get_group_id(0);
int local_mem_index;
if(x < string_lenght){
//local mem index - offset for next group, copy char to local
local_mem_index = local_x * 5;
string[local_mem_index] = sentence[x];
if(local_x == 0){
//reset counter
atomic_xchg(&local_char_num[group], 0);
//atomic_add(&local_char_num[0], group);
}
}
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_GLOBAL_MEM_FENCE);
if(x < string_lenght){
if(string[local_mem_index] == 'A'){
atomic_add(&local_char_num[group], 5);
string[local_mem_index] = 'B';
string[local_mem_index + 1] = '(';
string[local_mem_index + 2] = 'A';
string[local_mem_index + 3] = ')';
string[local_mem_index + 4] = 'A';
}
else if(string[local_mem_index] == 'B'){
atomic_add(&local_char_num[group], 2);
string[local_mem_index + 1] = 'B';
//reset 3rd char of local_mem
string[local_mem_index + 2] = '0';
}
else{
atomic_add(&local_char_num[group], 1);
//reset 3rd char of local_mem
string[local_mem_index + 2] = '0';
string[local_mem_index + 2] = '0';
}
}
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_GLOBAL_MEM_FENCE);
//1 compute unit for every char from src
if(x < string_lenght){
//local first compute unit writes to result whole group string
if(local_x == 0){
int j = 0;
//find offset for write to result string
if(x != 0){
for(int l = 1;l <= group; l++)
{
j += atomic_xchg(&local_char_num[group-l], local_char_num[group-l]);
//if(l == 0)
}
atomic_xchg(&local_char_num[99+group], local_char_num[group]);
}
for(int i = 0; i < local_size; i++){
//only valid chars
if(string_lenght > (x+i)){
local_mem_index = i * 5;
//B rule, copy (,)
if(string[local_mem_index+2] != 'A'){
sentence[j++] = string[local_mem_index];
if(string[local_mem_index] == 'B'){
sentence[j++] = string[local_mem_index+1];
}
continue;//B,(,); next index;
}
else{ // A rule
sentence[j++] = string[local_mem_index];
sentence[j++] = string[local_mem_index+1];
sentence[j++] = string[local_mem_index+2];
sentence[j++] = string[local_mem_index+3];
sentence[j++] = string[local_mem_index+4];
}//if 'A'
//sentence[j] = 0;
}//if x+i
}//for
}// lx == 0
}
barrier(CLK_GLOBAL_MEM_FENCE);
}
I think, that something overflow anywhere, but can't find where... Maybe there is something wrong with my code in main:
cl_int letter_count = 0;
cl_int next_letter_count = 1;
for (int i = 0; i < iter_count; i++)
{
//printf("%s\n", sentence_init);
letter_count = next_letter_count;
next_letter_count = STRING_LENGTH_PAR((i + 1));
printf("in count: %d out count: %d\n", letter_count, next_letter_count);
CheckOpenCLError(clSetKernelArg(kernel_iteration, 0, sizeof(cl_int), &letter_count), "clSetKernelArg: letter_count");
CheckOpenCLError(clSetKernelArg(kernel_iteration, 2, sizeof(cl_char)* (local * RULE_SIZE + 1), NULL), "clSetKernelArg: tmp_string");
CheckOpenCLError(clEnqueueNDRangeKernel(queue, kernel_iteration, 1, NULL, &global, &local, 0, NULL, &kernel_iteration_event), "clEnqueueNDRangeKernel: kernel_iteration");
CheckOpenCLError(clFinish(queue), "clFinish");
kernel_computing_time += getEventTime(kernel_iteration_event);
}
CheckOpenCLError(clEnqueueReadBuffer(queue, sentence_dev, CL_TRUE, 0, sizeof(cl_char)* (next_letter_count), sentence_result, 0, NULL, &result_iteration_event), "clEnqueueReadBuffer: result_iteration_event");
cl_int *p = (cl_int*)malloc(sizeof(cl_int)*(STRING_LENGTH_PAR(iter_count)));
CheckOpenCLError(clEnqueueReadBuffer(queue, p_dev, CL_TRUE, 0, sizeof(cl_int)* (STRING_LENGTH_PAR(iter_count)), p, 0, NULL, &result_iteration_event), "clEnqueueReadBuffer: result_iteration_event");

convert to send recv [closed]

Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 8 years ago.
Improve this question
i already to gatherv it, still can not work. i got hang when input it... i already try and try again take the MPI_gatherv, every where in this code. i don't get the right code. this is the MPI_gatherv i put the last.
#include<iostream>
#include<mpi.h>
#include<cmath>
#include<opencv2/imgproc/imgproc.hpp>
#include<opencv2/highgui/highgui.hpp>
using namespace std;
using namespace cv;
int xGradient(Mat image, int x, int y)
{
return ((int)(image.at<uchar>(y-1, x-1))) +
2*image.at<uchar>(y, x-1) +
image.at<uchar>(y+1, x-1) -
image.at<uchar>(y-1, x+1) -
2*image.at<uchar>(y, x+1) -
image.at<uchar>(y+1, x+1);
}
int yGradient(Mat image, int x, int y)
{
return ((int)(image.at<uchar>(y-1, x-1))) +
2*image.at<uchar>(y-1, x) +
image.at<uchar>(y-1, x+1) -
image.at<uchar>(y+1, x-1) -
2*image.at<uchar>(y+1, x) -
image.at<uchar>(y+1, x+1);
}
int main()
{
Mat src, grey, dst;
Mat grey2;
double start, end;
int gx, gy, sum, argc, awal,akhir, size, rank;
int i;
int recvcounts[4] = { 0, 1, 2, 3 };
int displ[4] = { 0, 0, 1, 3 };
int buffer[6];
size_t total;
size_t elemsize;
int sizes[3];
int master=0;
char **argv;
awal= MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if( rank == master )
{
//start=MPI_Wtime();
src= imread("E:/sobel/Debug/jari.jpg");
cvtColor(src,grey,CV_BGR2GRAY);
src.release();
dst = grey.clone();
total=grey.total();
sizes[2]=grey.elemSize();
cv::Size s = grey.size();
sizes[0] = s.height;
sizes[1] = s.width;
cout<<"citra terdiri dari "<<total<<" elements dengan ukuran yaitu "<<sizes[0]<<" x "<<sizes[1]<<endl;
if( !grey.data )
{ return -1; }
//start=MPI_Wtime();
}
//if( rank == master )
start=MPI_Wtime();
MPI_Bcast( sizes, 3, MPI_INT, 0, MPI_COMM_WORLD);
// cout<<"rank "<<rank<<" : "<<sizes[0]<<" x "<<sizes[1]<<endl;
if(rank!=master){
grey.create(sizes[0],sizes[1],CV_8U);
}
MPI_Bcast( grey.data, sizes[0]*sizes[1], MPI_CHAR, 0, MPI_COMM_WORLD);
grey2.create(sizes[0],sizes[1],CV_8U);
int starty=(rank*grey.rows/size);
if(starty==0)
{starty=1;}
int stopy=((rank+1)*grey.rows/size);
if(stopy>grey.rows - 1)
{stopy=grey.rows - 1;}
for(int y = starty; y < stopy; y++)
{
for(int x = 1; x < grey.cols - 1; x++)
{
gx = xGradient(grey, x, y);
gy = yGradient(grey, x, y);
sum = abs(gx) + abs(gy);
sum = sum > 255 ? 255:sum;
sum = sum < 0 ? 0 : sum;
grey2.at<uchar>(y,x) = sum;
}
}
for (i=0; i<rank; i++)
{
buffer[i] = rank;
}
recvcounts[i]=grey.cols*(grey.rows/size);
displ[i+1]=displ[i]+recvcounts[i];
MPI_Gatherv(buffer, rank, MPI_INT,buffer, recvcounts, displ, MPI_INT, 0, MPI_COMM_WORLD);
if (rank == 0)
{
for (i=0; i<6; i++)
fflush(stdout);
}
grey.release();
imwrite("E:/sobel/Debug/deteksi tepi mpi.jpg", grey2);
//grey2.release();
end=MPI_Wtime();
cout<<"rank "<<rank<<" : waktu eksekusi sobel MPI adalah : "<< end-start << " detik " <<endl;
akhir=MPI_Finalize();
//waitKey();
return 0;
}
i got wrong at [i] (red underline)
int recvcounts[i]=grey.cols*(grey.rows/size);
int displ[i+1]=displ[i]+recvcounts[i];
what should i do again? please help me to repair it. i got hang when execute the code with 4 processus

In MPI, sending the pointer to an object is not enougth. Unlike threads or openmp, the default behavior is parrallel. If you write imwrite(name, grey2 );,the image grey2 will be written size times. If you send pointer grey from 0 to 1, the grey pointer on proc 1 will point a memory owned by proc 0. This will probably create failures.
MPI offers you many ways to communicate belong MPI_Send() and MPI_Receive(). For instance, MPI_Bcast() is suitable to send the image from proc 0 to all procs. http://www.mcs.anl.gov/research/projects/mpi/www/www3/MPI_Bcast.html
I changed your code to use MPI_Bcast() by sending the size of the image first and then the data.
#include<iostream>
#include<mpi.h>
#include<cmath>
#include<opencv2/imgproc/imgproc.hpp>
#include<opencv2/highgui/highgui.hpp>
using namespace std;
using namespace cv;
//int mod(int z, int l);
int xGradient(Mat image, int x, int y)
{
return ((int)(image.at<uchar>(y-1, x-1))) +
2*image.at<uchar>(y, x-1) +
image.at<uchar>(y+1, x-1) -
image.at<uchar>(y-1, x+1) -
2*image.at<uchar>(y, x+1) -
image.at<uchar>(y+1, x+1);
}
int yGradient(Mat image, int x, int y)
{
return ((int)(image.at<uchar>(y-1, x-1))) +
2*image.at<uchar>(y-1, x) +
image.at<uchar>(y-1, x+1) -
image.at<uchar>(y+1, x-1) -
2*image.at<uchar>(y+1, x) -
image.at<uchar>(y+1, x+1);
}
int main()
{
Mat src, grey, dst;
Mat grey2;
double start, end;
int gx, gy, sum, argc, awal,akhir, size, rank;
int master=0;
char **argv;
// MPI_Status status;
awal= MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
// start=MPI_Wtime();
cout<<"rank "<<rank<<endl;
size_t total;
size_t elemsize;
int sizes[3];
if( rank == master )
{
start=MPI_Wtime();
src= imread("jari1.jpg");
cvtColor(src,grey,CV_BGR2GRAY);
src.release();
//dst = grey.clone();
imwrite("jari2.jpg", grey );
cout<<"ok here"<<endl;
if(!grey.isContinuous()){
cout<<"trouble : data is not continuous"<<endl;
}
total=grey.total();
sizes[2]=grey.elemSize();
cv::Size s = grey.size();
sizes[0] = s.height;
sizes[1] = s.width;
cout<<"grey is made of "<<total<<" elements of size "<<sizes[2]<<" that is "<<sizes[0]<<" by "<<sizes[1]<<endl;
if( !grey.data )
{
return -1;
}
// MPI_Send(&grey, 1, MPI_LONG, 1, 1, MPI_COMM_WORLD);
cout<<"master mengirim data ke rank 1"<<endl;
//fflush (stdout);
}
/*else if (rank==1)
{
MPI_Recv(&grey, 1, MPI_LONG, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
cout<<"rank 1 menerima data"<<endl;
}*/
MPI_Bcast( sizes, 3, MPI_INT, 0, MPI_COMM_WORLD);
cout<<rank<<" : "<<sizes[0]<<" "<<sizes[1]<<endl;
if(rank!=master){
grey.create(sizes[0],sizes[1],CV_8U);
if(!grey.data){
cout<<"data not allocated, rank "<<rank<<endl;
}else{
cout<<" ok !"<<endl;
}
}
MPI_Bcast( grey.data, sizes[0]*sizes[1], MPI_CHAR, 0, MPI_COMM_WORLD);
//for output
grey2.create(sizes[0],sizes[1],CV_8U);
char name[100];
sprintf(name,"jari%d.jpg",rank+42+size);
imwrite(name, grey );
/*
for(int y = 0; y < grey.rows; y++)
for(int x = 0; x < grey.cols; x++)
grey.at<uchar>(y,x) = 0;
*/
int starty=(rank*grey.rows/size);
if(starty==0)
{starty=1;}
int stopy=((rank+1)*grey.rows/size);
if(stopy>grey.rows - 1)
{stopy=grey.rows - 1;}
for(int y = starty; y < stopy; y++)
{
for(int x = 1; x < grey.cols - 1; x++)
{
gx = xGradient(grey, x, y);
gy = yGradient(grey, x, y);
sum = abs(gx) + abs(gy);
//cout<<sum<<endl;
sum = sum > 255 ? 255:sum;
sum = sum < 0 ? 0 : sum;
grey2.at<uchar>(y,x) = sum;
//cout<<sum<<endl;
}
}
grey.release();
//namedWindow("deteksi tepi sobel");
//imshow("deteksi tepi sobel", dst);
//namedWindow("grayscale");
//imshow("grayscale", grey);
//namedWindow("Original");
//imshow("Original", src);
sprintf(name,"jari%d.jpg",rank+42);
imwrite(name, grey2 );
grey2.release();
//MPI_Barrier(MPI_COMM_WORLD);
end=MPI_Wtime();
cout<<"time: "<< end-start << " detik " <<endl;
akhir=MPI_Finalize();
//waitKey();
return 0;
}
To retreive the data on proc 0, the MPI_Gatherv() function seems useful. http://www.mcs.anl.gov/research/projects/mpi/www/www3/MPI_Gatherv.html or http://mpi.deino.net/mpi_functions/MPI_Gatherv.html I let you go on with your code. You may need an extended look at a tutorials and examples...
Edit :
I largely changed the code and i wish this piece of code will end your quest...
I changed my mind and used MPI_Scatterv() to send a little part of the image on each proc. I also changed the computation of the gradient... And then i retreive the image on one proc using MPI_Gatherv() In the end, the overall speed up is low, because most part of it is spend opening and writting files. Moreover, such filters (and this code in particular...) need a large memory bandwith.
I fear that you did not fully understood how this first piece of code works. But this one is far from being clear...I had trouble with indexes...
#include<iostream>
#include<mpi.h>
#include<cmath>
#include<opencv2/imgproc/imgproc.hpp>
#include<opencv2/highgui/highgui.hpp>
using namespace std;
using namespace cv;
//int mod(int z, int l);
static inline int xGradient(Mat image, int x, int y)
{
return ((int)(image.at<uchar>(y-1, x-1))) +
2*image.at<uchar>(y, x-1) +
image.at<uchar>(y+1, x-1) -
image.at<uchar>(y-1, x+1) -
2*image.at<uchar>(y, x+1) -
image.at<uchar>(y+1, x+1);
}
static inline int yGradient(Mat image, int x, int y)
{
return ((int)(image.at<uchar>(y-1, x-1))) +
2*image.at<uchar>(y-1, x) +
image.at<uchar>(y-1, x+1) -
image.at<uchar>(y+1, x-1) -
2*image.at<uchar>(y+1, x) -
image.at<uchar>(y+1, x+1);
}
static inline int xGradientd(uchar* pt, int cols)
{
return ((int)(pt[-cols+1])+2*pt[1]+pt[cols+1]-pt[-cols-1]-2*pt[-1]-pt[cols-1]);
}
static inline int yGradientd(uchar* pt, int cols )
{
return ((int)(pt[cols-1])+2*pt[cols]+pt[cols+1]-pt[-cols-1]-2*pt[-cols]-pt[-cols+1]);
}
int main()
{
Mat src, grey, dst;
Mat grey2;
Mat grey3;
double start, end;
int gx, gy, sum, argc, awal,akhir, size, rank;
char **argv;
// MPI_Status status;
awal= MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
// start=MPI_Wtime();
cout<<"rank "<<rank<<endl;
size_t total;
size_t elemsize;
int sizes[3];
if( rank == 0)
{
start=MPI_Wtime();
src= imread("jari1.jpg");
cvtColor(src,grey,CV_BGR2GRAY);
src.release();
//dst = grey.clone();
imwrite("jari2.jpg", grey );
cout<<"ok here"<<endl;
if(!grey.isContinuous()){
cout<<"trouble : data is not continuous"<<endl;
}
total=grey.total();
sizes[2]=grey.elemSize();
cv::Size s = grey.size();
sizes[0] = s.height;
sizes[1] = s.width;
cout<<"grey is made of "<<total<<" elements of size "<<sizes[2]<<" that is "<<sizes[0]<<" by "<<sizes[1]<<endl;
if( !grey.data )
{
return -1;
}
// MPI_Send(&grey, 1, MPI_LONG, 1, 1, MPI_COMM_WORLD);
cout<<"master mengirim data ke rank 1"<<endl;
//fflush (stdout);
}
//start of parallel part. To this point, only proc 0 was working.
if( rank == 0 )
{
start=MPI_Wtime();
}
//the sizes of the image grey are send to all processus.
MPI_Bcast( sizes, 3, MPI_INT, 0, MPI_COMM_WORLD);
//cout<<rank<<" : "<<sizes[0]<<" "<<sizes[1]<<endl;
int recvcount[size];
int displ[size];
int i;
//compute size of local image
//on each proc, a little slice of the image will be received from proc 0 through MPI_Scatterv
//to compute the gradient, two extra lines should be send on top and bottom of slice.(except for 0 and sizes-1)
//this is why there are so many tests.
//how many pixels on the slice ? sendcount.
int sendcount=sizes[1]*(sizes[0]/size)+2*sizes[1];
if(rank==size-1){
sendcount=sizes[1]*(sizes[0]-(size-1)*(sizes[0]/size))+sizes[1];
}
if(rank==0){
sendcount-=sizes[1];
}
//printf("creating image %d %d \n",sendcount/sizes[1],sizes[1]);
//image allocation :
grey3.create(sendcount/sizes[1],sizes[1],CV_8U);
if(!grey3.data){
cout<<"data not allocated, rank "<<rank<<endl;
}else{
//cout<<" ok !"<<endl;
}
//compute sizes and offsets on proc 0
//how many char should be sent from proc 0 to proc i ? recvcount[i].
//where does the data starts ? displ[i].
//these information are needed by MPI_Scatterv() on proc 0
if(rank==0){
displ[0]=0;
for(i=0;i<size;i++){
recvcount[i]=grey.cols*(grey.rows/size)+grey.cols;
if(i>0){
recvcount[i]+=grey.cols;
}
if(i>0){
displ[i]=recvcount[i-1]+displ[i-1]-2*grey.cols;
}
}
recvcount[size-1]=grey.cols*(grey.rows-(size-1)*(grey.rows/size));
if(size>1){
recvcount[size-1]+=grey.cols;
}
if(size-1>0){
displ[size-1]=grey.cols*(grey.rows)-recvcount[size-1];
}
}
/*
if(rank==master){
for(i=0;i<size;i++){
printf("count %d displ %d \n",recvcount[i],displ[i]);
}
}
*/
MPI_Scatterv( grey.data, recvcount, displ, MPI_CHAR, grey3.data, sendcount,MPI_CHAR,0, MPI_COMM_WORLD);
/*
char name[100];
sprintf(name,"jariscat%d.jpg",rank);
imwrite(name, grey3 );
*/
//MPI_Bcast( grey.data, sizes[0]*sizes[1], MPI_CHAR, 0, MPI_COMM_WORLD);
//for output
//this local slice will store the result of the gradient operation
grey2.create(sendcount/sizes[1],sizes[1],CV_8U);
/*
for(int y = 0; y < grey.rows; y++)
for(int x = 0; x < grey.cols; x++)
grey.at<uchar>(y,x) = 0;
*/
int starty=(rank*sizes[0]/size);
if(starty==0)
{starty=1;}
int stopy=((rank+1)*sizes[0]/size);
if(stopy>sizes[0] - 1)
{stopy=sizes[0] - 1;}
int ii=grey3.cols;
uchar* data=grey3.data;
uchar* datad=grey2.data;
for(int y = starty; y < stopy; y++)
{
ii++;
for(int x = 1; x < sizes[1] - 1; x++)
{
//gx = xGradient(grey, x, y);
gx=xGradientd(&data[ii],grey2.cols);
gy=yGradientd(&data[ii],grey2.cols);
//gy = yGradient(grey, x, y);
//printf("%d %d \n",gx,gy);
sum = abs(gx) + abs(gy);
//cout<<sum<<endl;
sum = sum > 255 ? 255:sum;
sum = sum < 0 ? 0 : sum;
datad[ii] = sum;
//cout<<sum<<endl;
ii++;
}
ii++;
}
//namedWindow("deteksi tepi sobel");
//imshow("deteksi tepi sobel", dst);
//namedWindow("grayscale");
//imshow("grayscale", grey);
//namedWindow("Original");
//imshow("Original", src);
/*
sprintf(name,"jarigrad%d.jpg",rank);
imwrite(name, grey2 );
*/
// now, the data in grey2 should be sent from every processor in image grey on proc 0
//MPI_Gatherv will be used.
//on proc 0, count of bytes to be received from each processor should be computed
// as well as displacements representing where each part should be placed in image grey
if(rank==0){
displ[0]=0;
for(i=0;i<size;i++){
recvcount[i]=grey.cols*(grey.rows/size);
if(i>0){
displ[i]=recvcount[i-1]+displ[i-1];
}
}
recvcount[size-1]=grey.cols*(grey.rows-(size-1)*(grey.rows/size));
if(size-1>0){
displ[size-1]=recvcount[size-2]+displ[size-2];
}
}
//on each processor, how many lines should be sent ? sendcount.
//where does the data in grey2 starts ? tosend.
sendcount=sizes[1]*(sizes[0]/size);
if(rank==size-1){
sendcount=sizes[1]*(sizes[0]-(size-1)*(sizes[0]/size));
}
uchar* tosend=&grey2.data[grey2.cols];
if(rank==0){
tosend=&grey2.data[0];
}
MPI_Gatherv(tosend,sendcount , MPI_CHAR,grey.data, recvcount, displ,MPI_CHAR, 0, MPI_COMM_WORLD);
grey2.release();
//everything is back on proc 0 in image grey
end=MPI_Wtime();
if(rank==0){
imwrite("output.jpg", grey );
cout<<"time: "<< end-start << " detik " <<endl;
grey.release();
}
akhir=MPI_Finalize();
//waitKey();
return 0;
}
Bye,
Francis

C++ implementation of knapsack branch and bound

I am trying to a C++ implementation of this knapsack problem using branch and bounding. There is a Java version on this website here: Implementing branch and bound for knapsack
I'm trying to make my C++ version print out the 90 that it should, however it's not doing that, instead, it's printing out 5.
Does anyone know where and what the problem may be?
#include <queue>
#include <iostream>
using namespace std;
struct node
{
int level;
int profit;
int weight;
int bound;
};
int bound(node u, int n, int W, vector<int> pVa, vector<int> wVa)
{
int j = 0, k = 0;
int totweight = 0;
int result = 0;
if (u.weight >= W)
{
return 0;
}
else
{
result = u.profit;
j = u.level + 1;
totweight = u.weight;
while ((j < n) && (totweight + wVa[j] <= W))
{
totweight = totweight + wVa[j];
result = result + pVa[j];
j++;
}
k = j;
if (k < n)
{
result = result + (W - totweight) * pVa[k]/wVa[k];
}
return result;
}
}
int knapsack(int n, int p[], int w[], int W)
{
queue<node> Q;
node u, v;
vector<int> pV;
vector<int> wV;
Q.empty();
for (int i = 0; i < n; i++)
{
pV.push_back(p[i]);
wV.push_back(w[i]);
}
v.level = -1;
v.profit = 0;
v.weight = 0;
int maxProfit = 0;
//v.bound = bound(v, n, W, pV, wV);
Q.push(v);
while (!Q.empty())
{
v = Q.front();
Q.pop();
if (v.level == -1)
{
u.level = 0;
}
else if (v.level != (n - 1))
{
u.level = v.level + 1;
}
u.weight = v.weight + w[u.level];
u.profit = v.profit + p[u.level];
u.bound = bound(u, n, W, pV, wV);
if (u.weight <= W && u.profit > maxProfit)
{
maxProfit = u.profit;
}
if (u.bound > maxProfit)
{
Q.push(u);
}
u.weight = v.weight;
u.profit = v.profit;
u.bound = bound(u, n, W, pV, wV);
if (u.bound > maxProfit)
{
Q.push(u);
}
}
return maxProfit;
}
int main()
{
int maxProfit;
int n = 4;
int W = 16;
int p[4] = {2, 5, 10, 5};
int w[4] = {40, 30, 50, 10};
cout << knapsack(n, p, w, W) << endl;
system("PAUSE");
}

I think you have put the profit and weight values in the wrong vectors. Change:
int p[4] = {2, 5, 10, 5};
int w[4] = {40, 30, 50, 10};
to:
int w[4] = {2, 5, 10, 5};
int p[4] = {40, 30, 50, 10};
and your program will output 90.

I believe what you are implementing is not a branch & bound algorithm exactly. It is more like an estimation based backtracking if I have to match it with something.
The problem in your algorithm is the data structure that you are using. What you are doing is to simply first push all the first levels, and then to push all second levels, and then to push all third levels to the queue and get them back in their order of insertion. You will get your result but this is simply searching the whole search space.
Instead of poping the elements with their insertion order what you need to do is to branch always on the node which has the highest estimated bound. In other words you are always branching on every node in your way regardless of their estimated bounds. Branch & bound technique gets its speed benefit from branching on only one single node each time which is most probable to lead to the result (has the highest estimated value).
Example : In your first iteration assume that you have found 2 nodes with estimated values
node1: 110
node2: 80
You are pushing them both to your queue. Your queue became "n2-n1-head" In the second iteration you are pushing two more nodes after branching on node1:
node3: 100
node4: 95
and you are adding them to you queue as well("n4-n3-n2-head". There comes the error. In the next iteration what you are going to get will be node2 but instead it should be node3 which has the highest estimated value.
So if I don't miss something in your code both your implementation and the java implementation are wrong. You should rather use a priority queue (heap) to implement a real branch & bound.

You are setting the W to 16, so the result is 5. The only item you can take into the knapsack is item 3 with profit 5 and weight 10.

#include <bits/stdc++.h>
using namespace std;
struct Item
{
float weight;
int value;
};
struct Node
{
int level, profit, bound;
float weight;
};
bool cmp(Item a, Item b)
{
double r1 = (double)a.value / a.weight;
double r2 = (double)b.value / b.weight;
return r1 > r2;
}
int bound(Node u, int n, int W, Item arr[])
{
if (u.weight >= W)
return 0;
int profit_bound = u.profit;
int j = u.level + 1;
int totweight = u.weight;
while ((j < n) && (totweight + arr[j].weight <= W))
{
totweight = totweight + arr[j].weight;
profit_bound = profit_bound + arr[j].value;
j++;
}
if (j < n)
profit_bound = profit_bound + (W - totweight) * arr[j].value /
arr[j].weight;
return profit_bound;
}
int knapsack(int W, Item arr[], int n)
{
sort(arr, arr + n, cmp);
queue<Node> Q;
Node u, v;
u.level = -1;
u.profit = u.weight = 0;
Q.push(u);
int maxProfit = 0;
while (!Q.empty())
{
u = Q.front();
Q.pop();
if (u.level == -1)
v.level = 0;
if (u.level == n-1)
continue;
v.level = u.level + 1;
v.weight = u.weight + arr[v.level].weight;
v.profit = u.profit + arr[v.level].value;
if (v.weight <= W && v.profit > maxProfit)
maxProfit = v.profit;
v.bound = bound(v, n, W, arr);
if (v.bound > maxProfit)
Q.push(v);
v.weight = u.weight;
v.profit = u.profit;
v.bound = bound(v, n, W, arr);
if (v.bound > maxProfit)
Q.push(v);
}
return maxProfit;
}
int main()
{
int W = 55; // Weight of knapsack
Item arr[] = {{10, 60}, {20, 100}, {30, 120}};
int n = sizeof(arr) / sizeof(arr[0]);
cout << "Maximum possible profit = "
<< knapsack(W, arr, n);
return 0;
}
**SEE IF THIS HELPS**

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js