MPI_Scatter issue, can't scatter and gather a picture matrix - c++

.Hello, i have a problem with my c++ code. I'm trying to make a parallel implementation from my sequential code for sobel operator using OpenCV.
My actual idea is scatter a picture using 2d buffer, make the sobel operation to averaged_rows*cols. and then make the gathering When i have sent the averaged_rows and every rank receive it, i try to use MPI_Scatter and this execution error appears:
sent to 1
sent to 2
sent to 3
recieved by 1
recieved by 2
recieved by 3
[roronoasins-GL552VW:3245] *** An error occurred in MPI_Scatter
[roronoasins-GL552VW:3245] *** reported by process [1759117313,1]
[roronoasins-GL552VW:3245] *** on communicator MPI_COMM_WORLD
[roronoasins-GL552VW:3245] *** MPI_ERR_TRUNCATE: message truncated
[roronoasins-GL552VW:3245] *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort,
[roronoasins-GL552VW:3245] *** and potentially your MPI job)
[roronoasins-GL552VW:03239] 2 more processes have sent help message help-mpi-errors.txt / mpi_errors_are_fatal
[roronoasins-GL552VW:03239] Set MCA parameter "orte_base_help_aggregate" to 0 to see all help / error messages
What I actually do is scatter pic buffer, bcast the picture to the rest of ranks and the gathering.
MPI_Scatter(pic, cols*rows_av, MPI_INT, picAux, cols*rows_av, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast (pic3, cols*rows, MPI_INT, 0, MPI_COMM_WORLD);
int ip_gx, ip_gy, sum;
for(int y = ip*pic_struct[2]; y < (ip+1)*pic_struct[2] -1; y++){
for(int x = 1; x < pic_struct[1]- 1; x++){
int gx = x_gradient(pic3, x, y);
int gy = y_gradient(pic3, x, y);
int sum = abs(gx) + abs(gy);
sum = sum > 255 ? 255:sum;
sum = sum < 0 ? 0 : sum;
picAux[y][x] = sum;
}
}
MPI_Gather(picAux, cols*rows_av, MPI_INT, pic, cols*rows_av, MPI_INT, 0, MPI_COMM_WORLD);
I'd like to know what is happening with the Scatter function, i thought that i could scatter single picture pieces to the rest ranks to calculate sobel, maybe i'm wrong.
My code is here if u want to check it. Thanks for your time.
// > compile with mpic++ mpi_sobel.cpp -o mpi_sobel `pkg-config --libs opencv` -fopenmp -lstdc++
#include <iostream>
#include <cmath>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <omp.h>
#include <mpi.h>
using namespace std;
using namespace cv;
Mat src, dst;
/*
Computes the x component of the gradient vector
at a given point in a image.
returns gradient in the x direction
| 1 0 -1 |
Gx = | 2 0 -2 |
| 1 0 -1 |
*/
int x_gradient(int** image, int x, int y)
{
return image[y-1][x-1] +
2*image[y][x-1] +
image[y+1][x-1] -
image[y-1][x+1] -
2*image[y][x+1] -
image[y+1][x+1];
}
/*
Computes the y component of the gradient vector
at a given point in a image
returns gradient in the y direction
| 1 2 1 |
Gy = | 0 0 0 |
|-1 -2 -1 |
*/
int y_gradient(int** image, int x, int y)
{
return image[y+1][x-1] +
2*image[y+1][x] +
image[y+1][x+1] -
image[y-1][x-1] -
2*image[y-1][x] -
image[y-1][x+1];
}
int main(int argc, char** argv)
{
string picture;
if (argc == 2) {
picture = argv[1];
src = imread(argv[1], CV_LOAD_IMAGE_GRAYSCALE);
}
else {
picture = "input/logan.jpg";
src = imread(picture.c_str(), CV_LOAD_IMAGE_GRAYSCALE);
}
if( !src.data )
{ return -1; }
dst.create(src.rows, src.cols, src.type());
int rows_av, rows_extra;
Size s = src.size();
int rows = s.height;
int cols = s.width;
int pic[rows][cols];int picAux[rows][cols];
int ** pic3;
pic3 = new int*[rows];
for(int y = 0; y < rows; y++)
pic3[y] = new int[cols];
int pic_struct[3], pic_struct_recv[3];
int np, ip;
double start_time = omp_get_wtime();
if (MPI_Init(&argc, &argv) != MPI_SUCCESS){
exit(1);
}
MPI_Comm_size(MPI_COMM_WORLD, &np);
MPI_Comm_rank(MPI_COMM_WORLD, &ip);
MPI_Status status;
if(ip==0)
{
for(int y = 0; y < rows ; y++)
for(int x = 0; x < cols; x++)
{
pic3[y][x] = src.at<uchar>(y,x);
pic[y][x] = 0;
picAux[y][x] = 0;
}
src.release();
rows_av = rows/np;
//cols_av = cols/np;
pic_struct[0] = rows;
pic_struct[1] = cols;
pic_struct[2] = rows_av;
//pic_struct[3] = cols_av:
for(int i=1; i < np; i++)
{
//rows = (i <= rows_extra) ? rows_av+1 : rows_av;
pic_struct[0] = rows;
MPI_Send(&pic_struct, sizeof(pic_struct), MPI_BYTE, i, 0, MPI_COMM_WORLD);
cout << "sent to " << i << endl;
}
}else{//ip
MPI_Recv(&pic_struct, sizeof(pic_struct), MPI_BYTE, 0, 0, MPI_COMM_WORLD, &status);
cout << "recieved by " << ip << endl;
}
MPI_Scatter(pic, cols*rows_av, MPI_INT, picAux, cols*rows_av, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast (pic3, cols*rows, MPI_INT, 0, MPI_COMM_WORLD);
cout << "bcast" << endl;
//MPI_Barrier(MPI_COMM_WORLD);
int ip_gx, ip_gy, sum;
for(int y = ip*pic_struct[2]; y < (ip+1)*pic_struct[2] -1; y++){
for(int x = 1; x < pic_struct[1]- 1; x++){
ip_gx = x_gradient(src, x, y);
ip_gy = y_gradient(src, x, y);
sum = abs(ip_gx) + abs(ip_gy);
sum = sum > 255 ? 255:sum;
sum = sum < 0 ? 0 : sum;
picAux[y][x] = sum;
}
}
MPI_Gather(picAux, cols*rows_av, MPI_INT, pic, cols*rows_av, MPI_INT, 0, MPI_COMM_WORLD);
cout << "gather" << endl;
MPI_Finalize();
if(!ip)
{
double time = omp_get_wtime() - start_time;
for( int i = 0 ; i < rows ; i++ )
{
delete [] pic3[i] ;
delete [] pic3 ;
}
cout << "Number of processes: " << np << endl;
cout << "Rows, Cols: " << rows << " " << cols << endl;
cout << "Rows, Cols(Division): " << rows_av << ", " << cols << endl << endl;
cout << "Processing time: " << time << endl;
for(int i=0; i < 6 ; i++) picture.erase(picture.begin());
for(int i=0; i < 4 ; i++) picture.pop_back();
picture.insert(0,"output/");
picture += "-sobel.jpg";
for(int y = 0; y < rows; y++)
for(int x = 0; x < cols; x++)
dst.at<uchar>(y,x) = pic[y][x];
if(imwrite(picture.c_str(), dst)) cout << "Picture correctly saved as " << picture << endl;
else cout << "\nError has occurred being saved." << endl;
}
return 0;
}
Update: I forgot rows_av in ranks != 0 and pic3 sending is fixed. I've packed src in contiguous buffer and it is right in each rank.
updated code here: https://pastebin.com/jPV9mGFW
I have noticed that into the 3/4 dark there is noise, with this new issue i dont know if gathering is the problem now or i am doing the operations with number_process*rows/total_processes wrong.
MPI_Scatter(pic, cols*rows_av, MPI_UNSIGNED_CHAR, picAux, cols*rows_av, MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD);
int ip_gx, ip_gy, sum;
for(int y = ip*rows_av+1; y < (ip+1)*rows_av-1; y++){
for(int x = 1; x < cols ; x++){
ip_gx = x_gradient(src, x, y);
ip_gy = y_gradient(src, x, y);
sum = abs(ip_gx) + abs(ip_gy);
sum = sum > 255 ? 255:sum;
sum = sum < 0 ? 0 : sum;
picAux[y][x] = sum;
//picAux[y*rows_av+x] = sum;
}
}
MPI_Gather(picAux, cols*rows_av, MPI_UNSIGNED_CHAR, pic, cols*rows_av, MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD);
Loop updated and image is full calculated now but i cant use images bigger than 2048x1536.
for(int y = 1; y < rows_av-1; y++){
for(int x = 1; x < cols ; x++){
ip_gx = x_gradient(src, x, ip*rows_av+y);
ip_gy = y_gradient(src, x, ip*rows_av+y);
sum = abs(ip_gx) + abs(ip_gy);
sum = sum > 255 ? 255:sum;
sum = sum < 0 ? 0 : sum;
picAux[y*cols+x] = sum;
}
}
How could i send larger images than 2048x1536?
--------------------------------------------------------------------------
mpirun noticed that process rank 2 with PID 0 on node roronoasins-GL552VW exited on signal 11 (Segmentation fault).
--------------------------------------------------------------------------
Images size issue was the stack limited size. with ulimit -s unlimited works fine but im now working to improve memory efficiency. Last code will be updated in the pastebin code above.

Related

MPI C++ Runtime Error: signal 11 (Segmentation fault) with multi-dimensional array creation

Making Mandelbrot with MPI
So I've made a Mandelbrot generator and everything worked fine. Now I'm throwing in a speedup from MPI. Process 0 generates a file name mbrot.ppm and adds the appropriate metadata, then divides up the workload into chunks.
Each process receives the chunk's starting and ending positions and gets to work calculating its portion of the Mandelbrot set. To write to the mbrot.ppm file, each process saves its data in an array so it doesn't write to the file before the previous process finishes.
My Problem
Its a runtime error that says:
Primary job terminated normally, but 1 process returned
a non-zero exit code. Per user-direction, the job has been aborted.
--------------------------------------------------------------------------
--------------------------------------------------------------------------
mpirun noticed that process rank 0 with PID 0 on node Lenovo exited on signal 11 (Segmentation fault).
I believe it comes from the line int data[3][xrange][yrange]; (line 120) since the print statement after this line never executes. Would there be an obvious reason I'm missing why this multi-dimensional array is causing me problems?
Full Code
#include <iostream>
#include <mpi.h>
#include <unistd.h>
#include <stdlib.h>
#include <math.h>
#include <fstream>
#define MCW MPI_COMM_WORLD
using namespace std;
struct Complex {
double r;
double i;
};
Complex operator + (Complex s, Complex t) {
Complex v;
v.r = s.r + t.r;
v.i = s.i + t.i;
return v;
};
Complex operator * (Complex s, Complex t) {
Complex v;
v.r = s.r * t.r - s.i * t.i;
v.i = s.r * t.i + s.i * t.r;
return v;
};
int rcolor(int iters) {
if (iters == 255) return 0;
return 32 * (iters % 8);
};
int gcolor(int iters) {
if (iters == 255) return 0;
return 32 * (iters % 8);
};
int bcolor(int iters) {
if (iters == 255) return 0;
return 32 * (iters % 8);
};
int mbrot(Complex c, int maxIters) {
int i = 0;
Complex z;
z = c;
while (i < maxIters && z.r * z.r + z.i * z.i < 4) {
z = z * z + c;
i++;
}
return i;
};
int main(int argc, char * argv[]) {
int rank, size;
MPI_Init( & argc, & argv);
MPI_Comm_rank(MCW, & rank);
MPI_Comm_size(MCW, & size);
if (size < 2) {
printf("Not an MPI process if only 1 process runs.\n");
exit(1);
}
if (size % 2 != 0) {
printf("Please use a even number\n");
exit(1);
}
Complex c1, c2, c;
char path[] = "brot.ppm";
int DIM;
int chunk[4];
c1.r = -1;
c1.i = -1;
c2.r = 1;
c2.i = 1;
if (rank == 0) { //start the file
ofstream fout;
fout.open(path);
DIM = 2000; // pixel dimensions
fout << "P3" << endl; // The file type .ppm
fout << DIM << " " << DIM << endl; // dimensions of the image
fout << "255" << endl; // color depth
fout.close();
// making dimesions marks
for (int i = 0; i < size; i++) {
chunk[0] = 0; // startX
chunk[1] = DIM; // endX
chunk[2] = (DIM / size) * i; // startY
chunk[3] = (DIM / size) * (i + 1); // endY
MPI_Send(chunk, 4, MPI_INT, i, 0, MCW);
};
};
MPI_Recv(chunk, 4, MPI_INT, 0, 0, MCW, MPI_STATUS_IGNORE);
printf("Process %d recieved chunk\n\t StartX: %d, EndX: %d\n\t StartY: %d, EndY: %d\n", rank, chunk[0], chunk[1], chunk[2], chunk[3]);
// do stuff save in array
// data[3 elements][Xs][Ys]
int xrange = chunk[1] - chunk[0];
int yrange = chunk[3] - chunk[2];
printf("Process %d, x: %d, y: %d\n", rank, xrange, yrange);
int data[3][xrange][yrange];
printf("done\n");
// generate data for mandlebrot
for (int j = chunk[2]; j < chunk[3]; ++j) {
for (int i = chunk[0]; i < chunk[1]; ++i) {
// calculate one pixel of the DIM x DIM image
c.r = (i * (c1.r - c2.r) / DIM) + c2.r;
c.i = (j * (c1.i - c2.i) / DIM) + c2.i;
int iters = mbrot(c, 255);
data[0][i][j] = rcolor(iters);
data[1][i][j] = gcolor(iters);
data[2][i][j] = bcolor(iters);
}
}
printf("here2\n");
// taking turns to write their data to file
for (int k = 0; k < size; k++) {
if (rank == k) {
ofstream fout;
fout.open(path, ios::app);
fout << rank << " was here" << endl;
for (int j = chunk[2]; j < chunk[3]; ++j) {
for (int i = chunk[0]; i < chunk[1]; ++i) {
fout << data[0][i][j] << " " << data[1][i][j] << " " << data[2][i][j] << " ";
}
fout << endl;
}
printf("Process %d done and waiting\n", rank);
} else {
MPI_Barrier(MCW);
}
}
MPI_Finalize();
};
How to Run
$ mpic++ -o mbrot.out mbrot.cpp
$ mpirun -np 4 mbrot.out

MPI Gatherv with submatrices

I'm having trouble with getting MPI_Gatherv to work how I intend, and was wondering those of you who are more experienced can see what I'm doing wrong.
I have a large matrix (TEST) of [N, M]. Each process does some work on a subset [nrows, M] (WORK_MATRIX) and then every process gathers these submatrices (along the row dimension) into the full matrix.
It seems like it doesn't gather any of the data, and I'm struggling to figure out why!
Here I'm using Eigen to wrap these (contiguous) matrices.
Output:
mpirun -np 5 ./pseudo.x
1 1 1 1 1
0 1 2 3 4
TEST: 5 10
0 0 2 0 0 0 0 0 0 0
1 1 2 0 0 0 0 0 0 0
2 2 2 0 0 0 0 0 0 0
3 2 2 0 0 0 0 0 0 0
4 2 0 0 0 0 0 0 0 0
I've created a simple version of the code below:
mpiicc -I/path/to/Eigen -o pseudo.x pseudo.cpp
#include <mpi.h>
#include <Eigen/Dense>
#include <iostream>
using namespace Eigen;
using namespace std;
int main(int argc, char ** argv) {
int RSIZE = 5;
int CSIZE = 10;
int rank;
int num_tasks;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &num_tasks);
MatrixXd TEST_MATRIX = MatrixXd::Zero(RSIZE, CSIZE);
VectorXi recv = VectorXi::Zero(num_tasks);
VectorXi displs = VectorXi::Zero(num_tasks);
int nrows = (RSIZE + rank) / num_tasks;
MPI_Allgather(&nrows, 1, MPI_INT, recv.data(), 1, MPI_INT, MPI_COMM_WORLD);
int start = 0;
for (int i = 0; i < rank; i++)
start += recv[i];
MPI_Allgather(&start, 1, MPI_INT, displs.data(), 1, MPI_INT, MPI_COMM_WORLD);
if (rank == 0) {
cout << recv.transpose() << endl;
cout << displs.transpose() << endl;
}
MatrixXd WORK_MATRIX = MatrixXd::Zero(nrows, CSIZE);
for (int row = 0; row < nrows; row++)
for (int col = 0; col < CSIZE; col++)
WORK_MATRIX(row, col) += rank;
MPI_Datatype rowsized, row;
int sizes[2] = { RSIZE, CSIZE };
int subsizes[2] = { nrows, CSIZE };
int starts[2] = { 0, 0 };
MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, MPI_DOUBLE, &rowsized);
MPI_Type_create_resized(rowsized, 0, sizeof(double), &row);
MPI_Type_commit(&row);
MPI_Allgatherv(WORK_MATRIX.data(), recv[rank], row, TEST_MATRIX.data(), recv.data(), displs.data(), row, MPI_COMM_WORLD);
if (rank == 0) {
cout << "TEST: " << TEST_MATRIX.rows() << " " << TEST_MATRIX.cols() << endl;
for (int i = 0; i < TEST_MATRIX.rows(); i++) {
for (int j = 0; j < TEST_MATRIX.cols(); j++) {
cout << TEST_MATRIX(i, j) << " ";
}
cout << endl;
}
}
}
in C, 2D matrixes are stored by rows, and I doubt the Eigen changes that.
that means you do not need to resize your datatypes, and the displacement should be adjusted
start += recv[i] * CSIZE;
As a matter of taste, you do not need two MPI_Allgather() at all since nrows and start can be computed locally.
I'd rather suggest you simply create a derived datatype for one row with MPI_Type_contiguous() (and this type should not be resized), since MPI_Type_create_subarray() is really an overkill here.

APPCRASH (not when debugging) and Segmentation fault using QtCreator (C/C++)

I'm using QtCreator to code an algorithm that I have already coded on Matlab.
When coding this program, I have two errors. The firts one (APPCRASH) appears just when I build and execute the program normally, but not when I try to debug it (Heisenbug) and it appears on the function 'matriceA'. I tried to make the variables volatile and to write the matrix A term formulas on other function, hoping that that will stop the compiler optimization (I think that the compiler optimization might cause the problem), but I have not been able to solve the problem. I have not tried to to compile the project using the option -o0 because my professor (it's an university project) has to be able to compile it normally (without specific options).
The second one is a SISSEGV segmentation fault. It happens when the code arrives to "DestroyFloatArray(&b, width);" on InpaintingColor.
And here the codes:
clanu_process.cpp (it's little messy because I've tried a lot of things...)
#include "clanu_process.h"
#include "iomanip"
void InpaintingColor(float **Rout, float **Gout, float **Bout, float **Rin, float **Gin, float **Bin, float **Mask, int width, int height, double param)
{
cout << "1" << endl;
float alphak = 0, bethak = 0, res = 0;
float **b = 0, **xk = 0, **dk = 0, **rk = 0, **Ark = 0, **tmp1 = 0,**tmp2 = 0,**tmp3 = 0;
Ark = AllocateFloatArray( width, height);
tmp1 = AllocateFloatArray( width, height);
tmp2 = AllocateFloatArray( width, height);
tmp3 = AllocateFloatArray( width, height);
xk = AllocateFloatArray( width, height);
dk = AllocateFloatArray( width, height);
rk = AllocateFloatArray( width, height);
b = AllocateFloatArray( width, height);
cout << "2" << endl;
res = 1e8;
matrixProductByScalar(b,1.0/(3.0*256),Rin,width,height);
matrixDuplicate(xk, b, width, height);
// APPCRASH error
matriceA(Ark,xk,Mask,width,height);
//More code
// SIGSEGV error
DestroyFloatArray(&b, width);
DestroyFloatArray(&xk, width);
DestroyFloatArray(&dk, width);
DestroyFloatArray(&rk, width);
DestroyFloatArray(&Ark, width);
DestroyFloatArray(&tmp1, width);
DestroyFloatArray(&tmp2, width);
DestroyFloatArray(&tmp3, width);
}
float** matriceA(float **A, float **I, float **Masque, int N2, int N1){
volatile bool bool_iplus = false, bool_imoins = false, bool_jmoins = false, bool_jplus = false;
volatile int iplus = 0, imoins = 0, jplus = 0, jmoins = 0;
for(int i = 1; i <= N1; i++){
bool_iplus = i<N1;
iplus = i+1 < N1 ? i+1 : N1;
bool_imoins = i>1;
imoins = i-1 > 1 ? i-1 : 1;
for(int j = 1; j <= N2; j++){
bool_jplus = j<N2;
jplus = j+1 < N2 ? j+1 : N2;
bool_jmoins = j>1;
jmoins = j -1 > 1 ? j-1 : 1;
if(Masque[i-1][j-1]!=0){
//cout << "if - " << i << ", " << j<< endl;
A[i-1][j-1] = (1.0/36)*(16*I[i-1][j-1]
+ 4*(
(bool_iplus?I[iplus-1][j-1]:0)
+ (bool_imoins?I[imoins-1][j-1]:0)
+ (bool_jplus?I[i-1][jplus-1]:0)
+ (bool_jmoins?I[i-1][jmoins-1]:0)
)+(
(bool_iplus&&bool_jplus?I[iplus-1][jplus-1]:0)
+ (bool_imoins&&bool_jplus?I[imoins-1][jplus-1]:0)
+ (bool_imoins&&bool_jmoins?I[imoins-1][jmoins-1]:0))
+ (bool_iplus&&bool_jmoins?I[iplus-1][jmoins-1]:0));
}else{
//cout << "else - " << i << ", " << j << endl;
A[i-1][j-1]=
-(1.0*N1*N2)*(
-8.0*I[i-1][j-1]
+ I[iplus-1][j-1]
+ I[imoins-1][j-1]
+ I[i-1][jplus-1]
+ I[i-1][jmoins-1]
+ I[iplus-1][jplus-1]
+ I[imoins-1][jplus-1]
+ I[imoins-1][jmoins-1]
+ I[iplus-1][jmoins-1]);
}
}
}
return A;
}
The functions AllocateFloatArray and DestroyFloatArray
float ** AllocateFloatArray(int width, int height)
{
float ** r = new float*[width];
for(int i=0; i<width; i++)
r[i] = new float[height];
return r;
}
void DestroyFloatArray(float ***a, int width)
{
if( *a == 0 ) return;
for(int i=0; i<width; i++)
delete[] a[0][i];
delete[] *a;
*a = 0;
}
Thank you for your time.
I'm no sure that it's the cause of your problem but...
Your function "Matrix operations" (sum(), matrixSubstraction(), matrixAddition(), matrixProductByElement(), matrixProductByScalar(), and matrixDuplicate()) are ranging the first index from zero to width and the second one from zero to height.
If I'm not wrong, this is correct and is consistent with allocation/deallocation (AllocateFloatArray() and DestroyFloatArray()).
But look at the two matriceA() functions; they are defined as
float** matriceA(float **A, float **I, int N2, int N1)
float** matriceA(float **A, float **I, float **Masque, int N2, int N1)
In both functions the first index range from zero to N1 and the second one from zero to N2; by example
for(int i = 1; i <= N1; i++){
// ...
for(int j = 1; j <= N2; j++){
// ...
A[i-1][j-1] = (1.0/36)*(16*I[i-1][j-1] // ...
Good. But you call matriceA() in this way
matriceA(Ark,rk,Mask,width,height);
Briefly: you allocate your matrices as width * height matrices; your "matrix operations" are using they as width * height matrices but your matriceA() function are using they as height * width.
Wonderful way to devastate the memory.
I suppose the solution could be
1) switch N1 and N2 in matriceA() definition
2) or switch width and height in matriceA() calling
p.s.: sorry for my bad English.

DTW Algorithm OCT-file

I am trying to create a Dynamic Time Warping(DTW) function, which will calculate the minimum distance between the two signals provided to it. It is based on the following algorithm,
DTW Algorithm:-
int DTWDistance(s: array [1..n], t: array [1..m]) {
DTW := array [0..n, 0..m]
w := abs(n-m)// adapt window size (*)
for i := 0 to n
for j:= 0 to m
DTW[i, j] := infinity
DTW[0, 0] := 0
for i := 1 to n
for j := max(1, i-w) to min(m, i+w)
cost := d(s[i], t[j])
DTW[i, j] := cost + minimum(DTW[i-1, j ], // insertion
DTW[i, j-1], // deletion
DTW[i-1, j-1]) // match
return DTW[n, m]
more info DTW Algorithm
Now I was able to create an Octave function of this Algorithm and its working properly.
Octave Function:-
function dtw_distance = dtw2(a,b)
length_a = length(a);
length_b = length(b);
an=zeros(length_a+1,length_b+1);
an(:,:)=9999;
an(1,1)=0;
cost=0;
#Here we have also implemented the window size.
w=abs(length_a-length_b);
for i=1:length_a
for j=max(1,i-w):min(length_b,i+w)
cost=abs(a(i)-b(j));
an(i+1,j+1)=cost+min([an(i,j+1),an(i+1,j),an(i,j)]);
end
end
an;
dtw_distance=an(length_a+1,length_b+1);
Now the computation time of this code increases as the size of argument increases. Hence I am trying to create OCT file which is written in C++ for faster execution.
C++ OCT File:-
#include <octave/oct.h>
octave_idx_type getMax(octave_idx_type a, octave_idx_type b){
return (a>b)?a:b;
}
octave_idx_type getMin(octave_idx_type a, octave_idx_type b){
return (a<b)?a:b;
}
DEFUN_DLD (dtw3, args, , "Find DTW of two Signals With Window")
{
int nargin = args.length();
if (nargin != 2)
print_usage();
else
{
NDArray A = args(0).array_value();
NDArray B = args(1).array_value();
octave_stdout << "Size of A is" << A.length();
octave_stdout << "Size of B is" << B.length();
if (! error_state)
{
octave_idx_type row = A.length()+1;
octave_idx_type col = B.length()+1;
Matrix results (row,col);
for(octave_idx_type i = 0; i <= row ; i++)
{
for(octave_idx_type j=0; j<= col ; j++)
{
results(i,j)=9999;
}
}
octave_stdout << "row col" << results.dim1() << results.dim2() ;
octave_stdout << "row end" << results(row,0) ;
octave_stdout << "col end" << results(0,col) ;
results(0,0)=0;
octave_idx_type win = (row>col)?(row-col):(col-row);
octave_idx_type cost = 0;
for(octave_idx_type i = 1 ; i <= row ; i++)
{
for(octave_idx_type j = getMax(1,i-win) ; j <= getMin(col,i+win) ; j++)
{
cost=(A(i)>B(j))?(A(i)-B(j)):(B(j)-A(i));
results(i,j)= cost + getMin(getMin(results(i-1,j),results(i,j-1)),results(i-1,j-1));
}
}
octave_stdout << "Ans is: " << results(row,col);
return octave_value(results(row,col));
}
}
}
Sample Input/Output
Input - Arg1: [1 2 3 4 5] , Arg2: [1 2 3 4 5 6 7]
Output:
For Octave Function: Ans is 3
For OCT FIle:
* Error in /usr/lib/x86_64-linux-gnu/octave/4.0.0/exec/x86_64-pc-linux-gnu/octave-gui': double free or corruption (!prev): 0x00007f24e81eb0a0 ***
panic: Aborted -- stopping myself...
*** Error in/usr/lib/x86_64-linux-gnu/octave/4.0.0/exec/x86_64-pc-linux-gnu/octave-gui': malloc(): memory corruption: 0x00007f24e81eb230 *
Input : Arg1 : A=rand(1,221), Args2: B=rand(1,299)
Output:
For Octave Function: Ans is 72.63
For OCT File:
* Error in `/usr/lib/x86_64-linux-gnu/octave/4.0.0/exec/x86_64-pc-linux-gnu/octave-gui': double free or corruption (!prev): 0x00007f57a06ad940 *
panic: Aborted -- stopping myself...
Size of A is221Size of B is299row col222300row end9999col end9999Ans is:1 attempting to save variables to 'octave-workspace'...
save to 'octave-workspace' complete
Aborted (core dumped)
My Problem:
First of all what is this double free corruption error I am getting when using OCT files?
The answer for Octave file and OCT file is different, whats the error in OCT file which is causing this?
Thank you.
First, you should read how to debug oct files (http://wiki.octave.org/Debugging_Octave#Debugging_oct-files)
Then you'll find this part:
Matrix results (row,col);
for(octave_idx_type i = 0; i <= row ; i++)
{
for(octave_idx_type j=0; j<= col ; j++)
{
results(i,j)=9999;
}
}
The Matrix result has dimension row, col but you are writing until i<=row and j<=col which is 1 beyond bounds. Try i<row and j<col
There were so many problems in your code which was too much to describe, here my changes. I've replaces some functions which buildt-in functions:
#include <octave/oct.h>
DEFUN_DLD (dtw3, args, , "Find DTW of two signals with window")
{
int nargin = args.length();
if (nargin != 2)
print_usage();
Matrix A = args(0).array_value();
Matrix B = args(1).array_value();
octave_stdout << "Size of A is " << A.length() << std::endl;;
octave_stdout << "Size of B is " << B.length() << std::endl;
if (! error_state)
{
octave_idx_type n = A.length();
octave_idx_type m = B.length();
Matrix results (n + 1, m + 1);
for(octave_idx_type i = 0; i <= n ; i++)
for(octave_idx_type j = 0; j <= m ; j++)
results(i, j) = octave_Inf;
results(0, 0) = 0;
octave_idx_type win = abs (n-m);
double cost = 0;
for(octave_idx_type i = 1 ; i <= n ; i++)
for(octave_idx_type j = std::max(1, i-win) ; j <= std::min(m, i+win) ; j++)
{
cost = abs(A(i-1) - B(j-1));
results(i, j) = cost + std::min(std::min(results(i-1,j),results(i,j-1)),results(i-1,j-1));
}
//octave_stdout << results << std::endl;
return ovl(results(n, m));
}
}

convert to send recv [closed]

Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 8 years ago.
Improve this question
i already to gatherv it, still can not work. i got hang when input it... i already try and try again take the MPI_gatherv, every where in this code. i don't get the right code. this is the MPI_gatherv i put the last.
#include<iostream>
#include<mpi.h>
#include<cmath>
#include<opencv2/imgproc/imgproc.hpp>
#include<opencv2/highgui/highgui.hpp>
using namespace std;
using namespace cv;
int xGradient(Mat image, int x, int y)
{
return ((int)(image.at<uchar>(y-1, x-1))) +
2*image.at<uchar>(y, x-1) +
image.at<uchar>(y+1, x-1) -
image.at<uchar>(y-1, x+1) -
2*image.at<uchar>(y, x+1) -
image.at<uchar>(y+1, x+1);
}
int yGradient(Mat image, int x, int y)
{
return ((int)(image.at<uchar>(y-1, x-1))) +
2*image.at<uchar>(y-1, x) +
image.at<uchar>(y-1, x+1) -
image.at<uchar>(y+1, x-1) -
2*image.at<uchar>(y+1, x) -
image.at<uchar>(y+1, x+1);
}
int main()
{
Mat src, grey, dst;
Mat grey2;
double start, end;
int gx, gy, sum, argc, awal,akhir, size, rank;
int i;
int recvcounts[4] = { 0, 1, 2, 3 };
int displ[4] = { 0, 0, 1, 3 };
int buffer[6];
size_t total;
size_t elemsize;
int sizes[3];
int master=0;
char **argv;
awal= MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if( rank == master )
{
//start=MPI_Wtime();
src= imread("E:/sobel/Debug/jari.jpg");
cvtColor(src,grey,CV_BGR2GRAY);
src.release();
dst = grey.clone();
total=grey.total();
sizes[2]=grey.elemSize();
cv::Size s = grey.size();
sizes[0] = s.height;
sizes[1] = s.width;
cout<<"citra terdiri dari "<<total<<" elements dengan ukuran yaitu "<<sizes[0]<<" x "<<sizes[1]<<endl;
if( !grey.data )
{ return -1; }
//start=MPI_Wtime();
}
//if( rank == master )
start=MPI_Wtime();
MPI_Bcast( sizes, 3, MPI_INT, 0, MPI_COMM_WORLD);
// cout<<"rank "<<rank<<" : "<<sizes[0]<<" x "<<sizes[1]<<endl;
if(rank!=master){
grey.create(sizes[0],sizes[1],CV_8U);
}
MPI_Bcast( grey.data, sizes[0]*sizes[1], MPI_CHAR, 0, MPI_COMM_WORLD);
grey2.create(sizes[0],sizes[1],CV_8U);
int starty=(rank*grey.rows/size);
if(starty==0)
{starty=1;}
int stopy=((rank+1)*grey.rows/size);
if(stopy>grey.rows - 1)
{stopy=grey.rows - 1;}
for(int y = starty; y < stopy; y++)
{
for(int x = 1; x < grey.cols - 1; x++)
{
gx = xGradient(grey, x, y);
gy = yGradient(grey, x, y);
sum = abs(gx) + abs(gy);
sum = sum > 255 ? 255:sum;
sum = sum < 0 ? 0 : sum;
grey2.at<uchar>(y,x) = sum;
}
}
for (i=0; i<rank; i++)
{
buffer[i] = rank;
}
recvcounts[i]=grey.cols*(grey.rows/size);
displ[i+1]=displ[i]+recvcounts[i];
MPI_Gatherv(buffer, rank, MPI_INT,buffer, recvcounts, displ, MPI_INT, 0, MPI_COMM_WORLD);
if (rank == 0)
{
for (i=0; i<6; i++)
fflush(stdout);
}
grey.release();
imwrite("E:/sobel/Debug/deteksi tepi mpi.jpg", grey2);
//grey2.release();
end=MPI_Wtime();
cout<<"rank "<<rank<<" : waktu eksekusi sobel MPI adalah : "<< end-start << " detik " <<endl;
akhir=MPI_Finalize();
//waitKey();
return 0;
}
i got wrong at [i] (red underline)
int recvcounts[i]=grey.cols*(grey.rows/size);
int displ[i+1]=displ[i]+recvcounts[i];
what should i do again? please help me to repair it. i got hang when execute the code with 4 processus
In MPI, sending the pointer to an object is not enougth. Unlike threads or openmp, the default behavior is parrallel. If you write imwrite(name, grey2 );,the image grey2 will be written size times. If you send pointer grey from 0 to 1, the grey pointer on proc 1 will point a memory owned by proc 0. This will probably create failures.
MPI offers you many ways to communicate belong MPI_Send() and MPI_Receive(). For instance, MPI_Bcast() is suitable to send the image from proc 0 to all procs. http://www.mcs.anl.gov/research/projects/mpi/www/www3/MPI_Bcast.html
I changed your code to use MPI_Bcast() by sending the size of the image first and then the data.
#include<iostream>
#include<mpi.h>
#include<cmath>
#include<opencv2/imgproc/imgproc.hpp>
#include<opencv2/highgui/highgui.hpp>
using namespace std;
using namespace cv;
//int mod(int z, int l);
int xGradient(Mat image, int x, int y)
{
return ((int)(image.at<uchar>(y-1, x-1))) +
2*image.at<uchar>(y, x-1) +
image.at<uchar>(y+1, x-1) -
image.at<uchar>(y-1, x+1) -
2*image.at<uchar>(y, x+1) -
image.at<uchar>(y+1, x+1);
}
int yGradient(Mat image, int x, int y)
{
return ((int)(image.at<uchar>(y-1, x-1))) +
2*image.at<uchar>(y-1, x) +
image.at<uchar>(y-1, x+1) -
image.at<uchar>(y+1, x-1) -
2*image.at<uchar>(y+1, x) -
image.at<uchar>(y+1, x+1);
}
int main()
{
Mat src, grey, dst;
Mat grey2;
double start, end;
int gx, gy, sum, argc, awal,akhir, size, rank;
int master=0;
char **argv;
// MPI_Status status;
awal= MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
// start=MPI_Wtime();
cout<<"rank "<<rank<<endl;
size_t total;
size_t elemsize;
int sizes[3];
if( rank == master )
{
start=MPI_Wtime();
src= imread("jari1.jpg");
cvtColor(src,grey,CV_BGR2GRAY);
src.release();
//dst = grey.clone();
imwrite("jari2.jpg", grey );
cout<<"ok here"<<endl;
if(!grey.isContinuous()){
cout<<"trouble : data is not continuous"<<endl;
}
total=grey.total();
sizes[2]=grey.elemSize();
cv::Size s = grey.size();
sizes[0] = s.height;
sizes[1] = s.width;
cout<<"grey is made of "<<total<<" elements of size "<<sizes[2]<<" that is "<<sizes[0]<<" by "<<sizes[1]<<endl;
if( !grey.data )
{
return -1;
}
// MPI_Send(&grey, 1, MPI_LONG, 1, 1, MPI_COMM_WORLD);
cout<<"master mengirim data ke rank 1"<<endl;
//fflush (stdout);
}
/*else if (rank==1)
{
MPI_Recv(&grey, 1, MPI_LONG, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
cout<<"rank 1 menerima data"<<endl;
}*/
MPI_Bcast( sizes, 3, MPI_INT, 0, MPI_COMM_WORLD);
cout<<rank<<" : "<<sizes[0]<<" "<<sizes[1]<<endl;
if(rank!=master){
grey.create(sizes[0],sizes[1],CV_8U);
if(!grey.data){
cout<<"data not allocated, rank "<<rank<<endl;
}else{
cout<<" ok !"<<endl;
}
}
MPI_Bcast( grey.data, sizes[0]*sizes[1], MPI_CHAR, 0, MPI_COMM_WORLD);
//for output
grey2.create(sizes[0],sizes[1],CV_8U);
char name[100];
sprintf(name,"jari%d.jpg",rank+42+size);
imwrite(name, grey );
/*
for(int y = 0; y < grey.rows; y++)
for(int x = 0; x < grey.cols; x++)
grey.at<uchar>(y,x) = 0;
*/
int starty=(rank*grey.rows/size);
if(starty==0)
{starty=1;}
int stopy=((rank+1)*grey.rows/size);
if(stopy>grey.rows - 1)
{stopy=grey.rows - 1;}
for(int y = starty; y < stopy; y++)
{
for(int x = 1; x < grey.cols - 1; x++)
{
gx = xGradient(grey, x, y);
gy = yGradient(grey, x, y);
sum = abs(gx) + abs(gy);
//cout<<sum<<endl;
sum = sum > 255 ? 255:sum;
sum = sum < 0 ? 0 : sum;
grey2.at<uchar>(y,x) = sum;
//cout<<sum<<endl;
}
}
grey.release();
//namedWindow("deteksi tepi sobel");
//imshow("deteksi tepi sobel", dst);
//namedWindow("grayscale");
//imshow("grayscale", grey);
//namedWindow("Original");
//imshow("Original", src);
sprintf(name,"jari%d.jpg",rank+42);
imwrite(name, grey2 );
grey2.release();
//MPI_Barrier(MPI_COMM_WORLD);
end=MPI_Wtime();
cout<<"time: "<< end-start << " detik " <<endl;
akhir=MPI_Finalize();
//waitKey();
return 0;
}
To retreive the data on proc 0, the MPI_Gatherv() function seems useful. http://www.mcs.anl.gov/research/projects/mpi/www/www3/MPI_Gatherv.html or http://mpi.deino.net/mpi_functions/MPI_Gatherv.html I let you go on with your code. You may need an extended look at a tutorials and examples...
Edit :
I largely changed the code and i wish this piece of code will end your quest...
I changed my mind and used MPI_Scatterv() to send a little part of the image on each proc. I also changed the computation of the gradient... And then i retreive the image on one proc using MPI_Gatherv() In the end, the overall speed up is low, because most part of it is spend opening and writting files. Moreover, such filters (and this code in particular...) need a large memory bandwith.
I fear that you did not fully understood how this first piece of code works. But this one is far from being clear...I had trouble with indexes...
#include<iostream>
#include<mpi.h>
#include<cmath>
#include<opencv2/imgproc/imgproc.hpp>
#include<opencv2/highgui/highgui.hpp>
using namespace std;
using namespace cv;
//int mod(int z, int l);
static inline int xGradient(Mat image, int x, int y)
{
return ((int)(image.at<uchar>(y-1, x-1))) +
2*image.at<uchar>(y, x-1) +
image.at<uchar>(y+1, x-1) -
image.at<uchar>(y-1, x+1) -
2*image.at<uchar>(y, x+1) -
image.at<uchar>(y+1, x+1);
}
static inline int yGradient(Mat image, int x, int y)
{
return ((int)(image.at<uchar>(y-1, x-1))) +
2*image.at<uchar>(y-1, x) +
image.at<uchar>(y-1, x+1) -
image.at<uchar>(y+1, x-1) -
2*image.at<uchar>(y+1, x) -
image.at<uchar>(y+1, x+1);
}
static inline int xGradientd(uchar* pt, int cols)
{
return ((int)(pt[-cols+1])+2*pt[1]+pt[cols+1]-pt[-cols-1]-2*pt[-1]-pt[cols-1]);
}
static inline int yGradientd(uchar* pt, int cols )
{
return ((int)(pt[cols-1])+2*pt[cols]+pt[cols+1]-pt[-cols-1]-2*pt[-cols]-pt[-cols+1]);
}
int main()
{
Mat src, grey, dst;
Mat grey2;
Mat grey3;
double start, end;
int gx, gy, sum, argc, awal,akhir, size, rank;
char **argv;
// MPI_Status status;
awal= MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
// start=MPI_Wtime();
cout<<"rank "<<rank<<endl;
size_t total;
size_t elemsize;
int sizes[3];
if( rank == 0)
{
start=MPI_Wtime();
src= imread("jari1.jpg");
cvtColor(src,grey,CV_BGR2GRAY);
src.release();
//dst = grey.clone();
imwrite("jari2.jpg", grey );
cout<<"ok here"<<endl;
if(!grey.isContinuous()){
cout<<"trouble : data is not continuous"<<endl;
}
total=grey.total();
sizes[2]=grey.elemSize();
cv::Size s = grey.size();
sizes[0] = s.height;
sizes[1] = s.width;
cout<<"grey is made of "<<total<<" elements of size "<<sizes[2]<<" that is "<<sizes[0]<<" by "<<sizes[1]<<endl;
if( !grey.data )
{
return -1;
}
// MPI_Send(&grey, 1, MPI_LONG, 1, 1, MPI_COMM_WORLD);
cout<<"master mengirim data ke rank 1"<<endl;
//fflush (stdout);
}
//start of parallel part. To this point, only proc 0 was working.
if( rank == 0 )
{
start=MPI_Wtime();
}
//the sizes of the image grey are send to all processus.
MPI_Bcast( sizes, 3, MPI_INT, 0, MPI_COMM_WORLD);
//cout<<rank<<" : "<<sizes[0]<<" "<<sizes[1]<<endl;
int recvcount[size];
int displ[size];
int i;
//compute size of local image
//on each proc, a little slice of the image will be received from proc 0 through MPI_Scatterv
//to compute the gradient, two extra lines should be send on top and bottom of slice.(except for 0 and sizes-1)
//this is why there are so many tests.
//how many pixels on the slice ? sendcount.
int sendcount=sizes[1]*(sizes[0]/size)+2*sizes[1];
if(rank==size-1){
sendcount=sizes[1]*(sizes[0]-(size-1)*(sizes[0]/size))+sizes[1];
}
if(rank==0){
sendcount-=sizes[1];
}
//printf("creating image %d %d \n",sendcount/sizes[1],sizes[1]);
//image allocation :
grey3.create(sendcount/sizes[1],sizes[1],CV_8U);
if(!grey3.data){
cout<<"data not allocated, rank "<<rank<<endl;
}else{
//cout<<" ok !"<<endl;
}
//compute sizes and offsets on proc 0
//how many char should be sent from proc 0 to proc i ? recvcount[i].
//where does the data starts ? displ[i].
//these information are needed by MPI_Scatterv() on proc 0
if(rank==0){
displ[0]=0;
for(i=0;i<size;i++){
recvcount[i]=grey.cols*(grey.rows/size)+grey.cols;
if(i>0){
recvcount[i]+=grey.cols;
}
if(i>0){
displ[i]=recvcount[i-1]+displ[i-1]-2*grey.cols;
}
}
recvcount[size-1]=grey.cols*(grey.rows-(size-1)*(grey.rows/size));
if(size>1){
recvcount[size-1]+=grey.cols;
}
if(size-1>0){
displ[size-1]=grey.cols*(grey.rows)-recvcount[size-1];
}
}
/*
if(rank==master){
for(i=0;i<size;i++){
printf("count %d displ %d \n",recvcount[i],displ[i]);
}
}
*/
MPI_Scatterv( grey.data, recvcount, displ, MPI_CHAR, grey3.data, sendcount,MPI_CHAR,0, MPI_COMM_WORLD);
/*
char name[100];
sprintf(name,"jariscat%d.jpg",rank);
imwrite(name, grey3 );
*/
//MPI_Bcast( grey.data, sizes[0]*sizes[1], MPI_CHAR, 0, MPI_COMM_WORLD);
//for output
//this local slice will store the result of the gradient operation
grey2.create(sendcount/sizes[1],sizes[1],CV_8U);
/*
for(int y = 0; y < grey.rows; y++)
for(int x = 0; x < grey.cols; x++)
grey.at<uchar>(y,x) = 0;
*/
int starty=(rank*sizes[0]/size);
if(starty==0)
{starty=1;}
int stopy=((rank+1)*sizes[0]/size);
if(stopy>sizes[0] - 1)
{stopy=sizes[0] - 1;}
int ii=grey3.cols;
uchar* data=grey3.data;
uchar* datad=grey2.data;
for(int y = starty; y < stopy; y++)
{
ii++;
for(int x = 1; x < sizes[1] - 1; x++)
{
//gx = xGradient(grey, x, y);
gx=xGradientd(&data[ii],grey2.cols);
gy=yGradientd(&data[ii],grey2.cols);
//gy = yGradient(grey, x, y);
//printf("%d %d \n",gx,gy);
sum = abs(gx) + abs(gy);
//cout<<sum<<endl;
sum = sum > 255 ? 255:sum;
sum = sum < 0 ? 0 : sum;
datad[ii] = sum;
//cout<<sum<<endl;
ii++;
}
ii++;
}
//namedWindow("deteksi tepi sobel");
//imshow("deteksi tepi sobel", dst);
//namedWindow("grayscale");
//imshow("grayscale", grey);
//namedWindow("Original");
//imshow("Original", src);
/*
sprintf(name,"jarigrad%d.jpg",rank);
imwrite(name, grey2 );
*/
// now, the data in grey2 should be sent from every processor in image grey on proc 0
//MPI_Gatherv will be used.
//on proc 0, count of bytes to be received from each processor should be computed
// as well as displacements representing where each part should be placed in image grey
if(rank==0){
displ[0]=0;
for(i=0;i<size;i++){
recvcount[i]=grey.cols*(grey.rows/size);
if(i>0){
displ[i]=recvcount[i-1]+displ[i-1];
}
}
recvcount[size-1]=grey.cols*(grey.rows-(size-1)*(grey.rows/size));
if(size-1>0){
displ[size-1]=recvcount[size-2]+displ[size-2];
}
}
//on each processor, how many lines should be sent ? sendcount.
//where does the data in grey2 starts ? tosend.
sendcount=sizes[1]*(sizes[0]/size);
if(rank==size-1){
sendcount=sizes[1]*(sizes[0]-(size-1)*(sizes[0]/size));
}
uchar* tosend=&grey2.data[grey2.cols];
if(rank==0){
tosend=&grey2.data[0];
}
MPI_Gatherv(tosend,sendcount , MPI_CHAR,grey.data, recvcount, displ,MPI_CHAR, 0, MPI_COMM_WORLD);
grey2.release();
//everything is back on proc 0 in image grey
end=MPI_Wtime();
if(rank==0){
imwrite("output.jpg", grey );
cout<<"time: "<< end-start << " detik " <<endl;
grey.release();
}
akhir=MPI_Finalize();
//waitKey();
return 0;
}
Bye,
Francis