What is an optimal way to send OPENCV Mat over MPI - c++

What is an optimal way to send OPENCV Mat over MPI? Now I have done it by convetring Mat to int** but this is a bit slow solution.
A = alloc2d(n , m);
for (int i = 0; i < n ; ++i)
for (int j = 0; j < m ; ++j)
A[i][j] = img.at<uchar>(i , j);
/////////////////////////////////////
int ** alloc2d(int rows, int cols) {
int * data = (int *)malloc(rows * cols * sizeof(int));
int ** arr = (int **)malloc(rows * sizeof(int *));
for (int i = 0; i < rows; ++i)
arr[i] = &(data[cols * i]);
return arr;
}

Check the original Mat is contiguous first, and clone it if it isn't.
Then just get the:
rows
columns
type
channels
of the original Mat and save in that order, each as 4 bytes, at the start of a buffer. Then append the appropriate number of bytes from the original Mat's data pointer and send the whole lot.
Do the opposite at the receiving end... read the first four integers from the buffer and create a Mat of the corresponding size and load the remainder of the data into it.
#Miki provides an excellent, related answer here which demonstrates the details of most of the techniques suggested above - look specifically at Mat2str() and str2Mat().
I don't do much C++ or much MPI, I am sure anyone who uses MPI or C++ a lot could tighten it up, but the following works and works pretty fast too!
#include <cstdlib>
#include <iostream>
#include <iomanip>
#include <ctime>
#include <iostream>
#include <string>
#include <chrono>
#include <thread>
#include <opencv2/opencv.hpp>
#include "opencv2/highgui/highgui.hpp"
#include "mpi.h"
using namespace std;
using namespace cv;
const int MAXBYTES=8*1024*1024;
uchar buffer[MAXBYTES];
void matsnd(const Mat& m,int dest){
int rows = m.rows;
int cols = m.cols;
int type = m.type();
int channels = m.channels();
memcpy(&buffer[0 * sizeof(int)],(uchar*)&rows,sizeof(int));
memcpy(&buffer[1 * sizeof(int)],(uchar*)&cols,sizeof(int));
memcpy(&buffer[2 * sizeof(int)],(uchar*)&type,sizeof(int));
// See note at end of answer about "bytes" variable below!!!
int bytespersample=1; // change if using shorts or floats
int bytes=m.rows*m.cols*channels*bytespersample;
cout << "matsnd: rows=" << rows << endl;
cout << "matsnd: cols=" << cols << endl;
cout << "matsnd: type=" << type << endl;
cout << "matsnd: channels=" << channels << endl;
cout << "matsnd: bytes=" << bytes << endl;
if(!m.isContinuous())
{
m = m.clone();
}
memcpy(&buffer[3*sizeof(int)],m.data,bytes);
MPI_Send(&buffer,bytes+3*sizeof(int),MPI_UNSIGNED_CHAR,dest,0,MPI_COMM_WORLD);
}
Mat matrcv(int src){
MPI_Status status;
int count,rows,cols,type,channels;
MPI_Recv(&buffer,sizeof(buffer),MPI_UNSIGNED_CHAR,src,0,MPI_COMM_WORLD,&status);
MPI_Get_count(&status,MPI_UNSIGNED_CHAR,&count);
memcpy((uchar*)&rows,&buffer[0 * sizeof(int)], sizeof(int));
memcpy((uchar*)&cols,&buffer[1 * sizeof(int)], sizeof(int));
memcpy((uchar*)&type,&buffer[2 * sizeof(int)], sizeof(int));
cout << "matrcv: Count=" << count << endl;
cout << "matrcv: rows=" << rows << endl;
cout << "matrcv: cols=" << cols << endl;
cout << "matrcv: type=" << type << endl;
// Make the mat
Mat received= Mat(rows,cols,type,(uchar*)&buffer[3*sizeof(int)]);
return received;
}
int main ( int argc, char *argv[] )
{
// Initialise MPI
MPI::Init (argc,argv);
// Get our rank
int id = MPI::COMM_WORLD.Get_rank();
if(id==0)
{
// MASTER - wait to receive image from slave and write to disk for checking
Mat received=matrcv(1);
imwrite("received.jpg",received);
}else{
// Slave - read Mat from disk and send to master
Mat image=imread("image.jpg",IMREAD_COLOR);
matsnd(image,0);
}
// Terminate MPI
MPI::Finalize();
}
I put a loop with 10,000 iterations around:
matsnd() in the slave, and
matrcv() in the Master
and it took 1.9 seconds for the 10,000 iterations. I cannot compare as you didn't show any timings.
All the cout statements that are hard-left justified are just debug stuff that can safely be removed.
Note:
Whilst I have used and tested the above, I have since learned that the calculation of the number of bytes I send may be incorrect in some circumstances (probably where there are alignment constraints). If you are interested, please check this answer.
Keywords: MPI, MPI_Send, MPI_Recv, OpenCV, Mat, image

Related

Segmentation Fault in creating a cv::Mat from unsigned char array

I cannot understand why the following minimal code outputs segmentation fault and cv::Mat values are not printed correctly:
#include <opencv2/opencv.hpp>
int main()
{
unsigned char out[1280*720*3/2] = {100};
cv::Mat dummy_query = cv::Mat(1, 1280*720*3/2*sizeof(unsigned char), CV_8UC1, (void *)out);
cv::Size s = dummy_query.size();
std::cout << s << "\r\n";
for(int i = 0; i < 1280*720*3/2; i++)
{
std::cout << i << "ss" << int(out[i]) << ":";
std::cout << dummy_query.at<int>(0,i) << " ";
}
}
You have defined uchar datatype in cv::Mat but accessing at this line
as int std::cout << dummy_query.at<int>(0,i) << " ";
so your program will likely get crash at end of the loop
e.g.
// create a 100x100 8-bit matrix
Mat M(100,100,CV_8U);
// this will be compiled fine. no any data conversion will be done.
Mat_<float>& M1 = (Mat_<float>&)M;
// the program is likely to crash at the statement below
M1(99,99) = 1.f;
check this open cv reference

reading wave file with hop lengths using libsndfile, or to identify the number of samples in the file

I would like to read a wave file, and process them into fft. this is my current working code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <sndfile.h>
#include <iostream>
#include <vector>
#include "fftw-3.3.8/api/fftw3.h"
using namespace std;
#define ARRAY_LEN(x) ((int) (sizeof (x) / sizeof (x [0])))
#define MAX(x,y) ((x) > (y) ? (x) : (y))
#define MIN(x,y) ((x) < (y) ? (x) : (y))
vector<double> read_audio_vector(const char* filePath){
SNDFILE *infile ;
SF_INFO sfinfo ;
double buffer [8192] = {};
vector<double> output_buffer(8192, 0);
sf_count_t count ;
cout << "Reading from : " << filePath << endl;
memset (&sfinfo, 0, sizeof (sfinfo)) ;
if ((infile = sf_open (filePath, SFM_READ, &sfinfo)) == NULL) {
printf ("Error : Not able to open input file '%s'\n", filePath);
sf_close (infile);
exit (1) ;
}
count = sf_read_double (infile, buffer, ARRAY_LEN (buffer));
for (int j=0; j<8192; ++j){
output_buffer[j] = buffer[j];
}
sf_close (infile) ;
return output_buffer;
}
vector<vector<double> > computeFullFFT_vector(int frameSize, int numFrames, vector<double> buffer ){
vector<double> audioFrame(frameSize,0);
vector<double> magnitudeSpectrum(frameSize/2,0);
vector<vector<double> > Spectrogram(numFrames, vector<double>(frameSize/2));
int startidx;
for (int frameidx=0; frameidx<numFrames; ++frameidx){
// Extract frame from buffer, with a hop of 128
startidx=frameidx*128;
for (int i = 0; i < frameSize; i++){
audioFrame[i] = buffer[startidx+i];
}
// performFFT && Update -> Spectrogram
}
return Spectrogram;
}
int main (int argc, char ** argv) {
// Init
SNDFILE *infile ;
SF_INFO sfinfo ;
int frameSize = 256;
// Read Audio
cout << "\n==== Read Audio ===== \n";
vector<double> x = read_audio_vector(argv[1]);
cout << "--x.size() : " << x.size() << endl;
int i;
i=0; cout << "x[" << i << "] : " << x[i] << endl;
i=7999; cout << "x[" << i << "] : " << x[i] << endl;
i=8000; cout << "x[" << i << "] : " << x[i] << endl;
i=8191; cout << "x[" << i << "] : " << x[i] << endl;
// Process FFT here
int numFrames = (8192-frameSize)/128 + 1;
vector<vector<double> > Spectrogram(numFrames, vector<double>(frameSize/2));
Spectrogram = computeFullFFT_vector(frameSize, numFrames, x);
cout << "Done" << endl;
return 0 ;
}
However, the problem with is that i assumed and pre-allocated 8192 number of samples.
In this case, i have only 1 second at 8kHz, meaning i only have 8000 samples.
Hence you see these values
buffer[0] : 0.176361
buffer[7999] : 0.025177
buffer[8000] : 0
buffer[8191] : 0
As you can see, from index 8000 to 8191, these values are empty. So they are redundant.
Why i set to 8192, is because i want to pre-allocate Spectrogram with numFrames, and to do that i need to know the number of samples.
Problem:
I want to make this code a general-purpose code, that accepts a wave file of any length, (1s, 10sec, 3minutes, etc), so this pre-allocation does not work anymore.
Is there a way to find out the number of samples of the wave file, so i can change from a fixed 8192 to a variable number depending on the length of the wave files?
Alternatively, can i read the wave file in chunks, but with hop length?
Currently this doesn't work because it doesn't read them with hop length.
int num_frames = 0;
while ((count = sf_read_double (infile, buffer, ARRAY_LEN (buffer))) > 0) {
for (int i = 0; i < 256; i++){
buffer[i] *= 0.5;
}
num_frames++;
}
cout << "num_frames=" << num_frames; // this gives 32 frames, instead of the 63 frames that i desire
FYI : i compile with
g++ ./debug_tmp.cpp $(pkg-config --libs --cflags sndfile) ;
./a.out wav/test_1s.wav
To get the total number of samples of your .wav file, you need to look at the structure SF_INFO and especially the members frames and channels. The total number of samples will be the product of these two.
Yes you can read your file by cutting it in chunks. Simply specify the length of your choice and pass it in third argument of sf_readf_double. Note that this argument means frames, and not samples. The return will be the actual amount of frames read (in case of you are at the end of the file, the number of frames actually read will be less that the number you asked.
Here is a code example in C where I show the total number of samples, then I reduce the volume of the audio by cutting in chunks with a voluntary weird number of my choice 147.
#include <stdio.h>
#include "sndfile.h"
#define MONO 1
#define DATA_BLOCK_LENGTH 147 // Frames
int main(void) {
int i;
// Input file
char *inFileName;
SNDFILE *inFile;
SF_INFO inFileInfo;
inFileName = "audioFiles/whiteNoise.wav";
// Get total number of samples
inFile = sf_open(inFileName, SFM_READ, &inFileInfo);
if(inFile == NULL)
printf("Audio file error.\n");
int nFrames = (int)inFileInfo.frames;
// Print results
printf("Total number of frames: %d\n", nFrames);
printf("Number of channels: %d\n", inFileInfo.channels);
printf("Total number of samples: %d", nFrames * inFileInfo.channels);
// Output file
char *outFileName;
SNDFILE *outFile;
SF_INFO outFileInfo;
outFileName = "audioFiles/outWhiteNoise.wav";
outFileInfo.frames = inFileInfo.frames;
outFileInfo.samplerate = inFileInfo.samplerate;
outFileInfo.channels = inFileInfo.channels;
outFileInfo.format = inFileInfo.format;
outFile = sf_open(outFileName, SFM_WRITE, &outFileInfo);
// Process
int inDataBuffer[DATA_BLOCK_LENGTH*MONO];
int outDataBuffer[DATA_BLOCK_LENGTH*MONO];
int nReadFrames;
while(nFrames > 0) {
nReadFrames = sf_readf_int(inFile, inDataBuffer, DATA_BLOCK_LENGTH);
for(i = 0; i < DATA_BLOCK_LENGTH; i++)
outDataBuffer[i] = inDataBuffer[i] / 2;
sf_writef_int(outFile, outDataBuffer, nReadFrames);
nFrames -= nReadFrames;
}
sf_close(inFile); sf_close(outFile);
return 0;
}

Segmentation fault (core dumped) with OpenCV

I am trying to code a program that eliminates some of the connected components and keep the rest.
However, at some point in the code, the program exits with error message "Segmentation fault (core dumped)".
I have narrowed down the error to the statement: "destinationImage.at(row, column) = labeledImage.at(row, column);" using the checkpoints you'll find the code below.
I have tried all the solution I found, especially this one, with no luck.
Please Help!
One more thing, the program reads the image correctly but does not show the original image as per the code. Instead, it prints a message "init done
opengl support available". Is this normal? Does the implementation of the imshow take place at the end of the program with no errors?
/* Goal is to find all related components, eliminate secondary objects*/
#include <opencv2/core/utility.hpp>
#include "opencv2/imgproc.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/highgui.hpp"
#include <iostream>
using namespace cv;
using namespace std;
//Declaring variables
Mat originalImage;
int conComponentsCount;
int primaryComponents;
//Declaring constants
const char* keys =
{
"{#image|../data/sample.jpg|image for converting to a grayscale}"
};
//Functions prototypes, used to be able to define functions AFTER the "main" function
Mat BinarizeImage (Mat &, int thresh);
int AverageCCArea(Mat & CCLabelsStats,int numOfLabels, int minCCSize);
bool ComponentIsIncludedCheck (int ccArea, int referenceCCArea);
//Program mainstream============================================
int main (int argc, const char **argv)
{
//Waiting for user to enter the required path, default path is defined in "keys" string
CommandLineParser parser(argc, argv, keys);
string inputImage = parser.get<string>(0);
//Reading original image
//NOTE: the program MUST terminate or loop back if the image was not loaded; functions below use reference to matrices and references CANNOT be null or empty.
originalImage = imread(inputImage.c_str(), IMREAD_GRAYSCALE);// or: imread(argv[1], CV_LOAD_IMAGE_GRAYSCALE)
cout << " 1) Loading image done!" << endl;//CHECKPOINT
if (originalImage.empty())
{
cout << "Nothing was loaded!";
return -1; //terminating program with error feedback
}
cout << " 2) Checking for null Image done!" << endl;//CHECKPOINT
namedWindow("Original Image", 0);
imshow("Original Image", originalImage);
cout << " 3) Showing ORIGINAL image done!" << endl;//CHECKPOINT
//Image Binarization; connectedcomponents function only accepts binary images.
int threshold=100; //Value chosen empirically.
Mat binImg = BinarizeImage(originalImage, threshold);
cout << " 4) Binarizing image done!" << endl;//CHECKPOINT
//Finding the number of connected components and generating the labeled image.
Mat labeledImage; //Image with connected components labeled.
Mat stats, centroids; //Statistics of connected image's components.
conComponentsCount = connectedComponentsWithStats(binImg, labeledImage, stats, centroids, 4, CV_16U);
cout << " 5) Connecting pixels done!" << endl;//CHECKPOINT
//Creating a new matrix to include the final image (without secondary objects)
Mat destinationImage(labeledImage.size(), CV_16U);
//Calculating the average of the labeled image components areas
int ccSizeIncluded = 1000;
int avgComponentArea = AverageCCArea(stats, conComponentsCount, ccSizeIncluded);
cout << " 6) Calculating components avg area done!" << endl;//CHECKPOINT
//Criteria for component sizes
for (int row = 0; row <= labeledImage.rows; row++)
{
cout << " 6a) Starting rows loop iteration # " << row+1 << " done!" << endl;//CHECKPOINT
for (int column = 0; column <= labeledImage.cols; column++)
{
//Criteria for component sizes
int labelValue = labeledImage.at<int>(row, column);
if (ComponentIsIncludedCheck (stats.at<int>(labelValue, CC_STAT_AREA), avgComponentArea))
{
//Setting pixel value to the "destinationImage"
destinationImage.at<int>(row, column) = labeledImage.at<int>(row, column);
cout << " 6b) Setting pixel (" << row << "," << column << ") done!" << endl;//CHECKPOINT
}
else
cout << " 6c) Pixel (" << row << "," << column << ") Skipped!" << endl;//CHECKPOINT
}
cout << " 6d) Row " << row << " done!" << endl;//CHECKPOINT
}
cout << " 7) Showing FINAL image done!" << endl;//CHECKPOINT
namedWindow("Final Image", 0);
imshow("Final Image", destinationImage);
cout << " 8) Program done!" << endl;//CHECKPOINT
waitKey (0);
}
//+++++++++++++++++++++++++++++++++++++++++++++++++++
Mat BinarizeImage (Mat & originalImg, int threshold=100) //default value of threshold of grey content.
{
// Binarization of image to be used in connectedcomponents function.
Mat bw = threshold < 128 ? (originalImg < threshold) : (originalImg > threshold);
return bw;
}
//+++++++++++++++++++++++++++++++++++++++++++++++++++
int AverageCCArea(Mat & CCLabelsStats,int numOfLabels, int minCCSize) //calculates the average area of connected components without components smaller than minCCSize pixels..... reference is used to improve performance, passing-by-reference does not require copying the matrix to this function.
{
int average;
for (int i=1; i<=numOfLabels; i++)
{
int sum = 0;
int validComponentsCount = numOfLabels - 1;
if (CCLabelsStats.at<int>(i, CC_STAT_AREA) >= minCCSize)
{
sum += CCLabelsStats.at<int>(i, CC_STAT_AREA);
}
else
{
validComponentsCount--;
}
average = sum / (validComponentsCount);
}
return average;
}
//+++++++++++++++++++++++++++++++++++++++++++++++++++
bool ComponentIsIncludedCheck (int ccArea, int referenceCCArea)
{
if (ccArea >= referenceCCArea)
{
return true; //Component should be included in the destination image
}
else
{
return false; //Component should NOT be included in the destination image
}
}
change this:
for (int row = 0; row <= labeledImage.rows; row++)
to this:
for (int row = 0; row < labeledImage.rows; row++)
and this:
for (int column = 0; column <= labeledImage.cols; column++)
to this:
for (int column = 0; column < labeledImage.cols; column++)
any good?
(remember that in C++ we start counting from 0, so if e.g. labeledImage.cols == 10, the last column is the one with the index 9)

How many values can be put into an Array in C++?

I wanted to read an array of double values from a file to an array. I have like 128^3 values. My program worked just fine as long as I stayed at 128^2 values, but now I get an "segmentation fault" error, even though 128^3 ≈ 2,100,000 is by far below the maximum of int. So how many values can you actually put into an array of doubles?
#include <iostream>
#include <fstream>
int LENGTH = 128;
int main(int argc, const char * argv[]) {
// insert code here...
const int arrLength = LENGTH*LENGTH*LENGTH;
std::string filename = "density.dat";
std::cout << "opening file" << std::endl;
std::ifstream infile(filename.c_str());
std::cout << "creating array with length " << arrLength << std::endl;
double* densdata[arrLength];
std::cout << "Array created"<< std::endl;
for(int i=0; i < arrLength; ++i){
double a;
infile >> a;
densdata[i] = &a;
std::cout << "read value: " << a << " at line " << (i+1) << std::endl;
}
return 0;
}
You are allocating the array on the stack, and stack size is limited (by default, stack limit tends to be in single-digit megabytes).
You have several options:
increase the size of the stack (ulimit -s on Unix);
allocate the array on the heap using new;
move to using std::vector.

Binary reader not triggering eof bit when reading exact number of bytes

I am writing images to a binary file using this code:
std::ofstream edgefile("C:\\****\\edge.bin", std::ofstream::binary | std::ofstream::app | std::ofstream::out);
Mat edges;
Canny(bilat, edges, cthr1, cthr2, 3); //cany sliders
if (writeedge){
int rows = edges.rows;
int cols = edges.cols;
edgefile.write(reinterpret_cast<const char *>(&rows), sizeof(int));
edgefile.write(reinterpret_cast<const char *>(&cols), sizeof(int));
edgefile.write(reinterpret_cast<char*>(edges.data), edges.rows*edges.cols*sizeof(uchar));
cout << "writen r:" << rows << "C: " << cols << "Bytes: " << edges.rows*edges.cols*sizeof(uchar) << endl;
}
And then reading the same images with this:
std::ifstream infile;
int main(int argc, char* argv[])
{
int * ptr;
ptr = new int;
int rows;
int cols;
infile.open("C:\\****\\edge.bin", std::ofstream::binary | std::ofstream::app | std::ofstream::in);
while (!infile.eof())
{
infile.read(reinterpret_cast<char*>(ptr), sizeof(int));
rows = *ptr;
infile.read(reinterpret_cast<char*>(ptr), sizeof(int));
cols = *ptr;
Mat ed(rows, cols, CV_8UC1, Scalar::all(0));
infile.read(reinterpret_cast<char*>(ed.data), rows * cols * (sizeof uchar));
cout << "writen r: " << rows << " C: " << cols << " Bytes: " << rows * cols * (sizeof uchar) << endl;
imshow("God Knows", ed);
cvWaitKey();
}
infile.close();
return 0;
}
The images are read accurately however eof bit is not triggered at the end thus multiplying the last ptr value and reading another blank image at the end. After this the cycle ends. How can I check if the next bit is EOF bit without resetting the currently read position?
(I know that if 1 more byte would be read it would trigger the EOF bit)
The EOF bit is set after you try to read past the end of the file, that's just how streams work.
You can easily restructure the main loop to check the status after the first read. This works because the return value from read is a reference to the stream, and casting the reference to bool checks whether the stream is still in a good status (i.e. no EOF).
while (infile.read(reinterpret_cast<char*>(ptr), sizeof(int)))
{
// ...