I am new at OpenCV and I am trying to write a simple code to get the mean of a block size in an image. I wrote the following code, the build is ok, however, the debug is giving me an unhandled exception at memory location. This exception is at the following line:
mean_img.at<double>(i/block_size, j/block_size) = mean_img.at<double>(i/block_size,j/block_size) + new_img.at<double>(i + x, j + y) / (mean);
So, I will be grateful if anyone give me some hints. Thanks in advance and here is the whole code:
#include "opencv2/highgui/highgui.hpp" // Include Libs for OpenCV and Image Processing
#include <opencv2/opencv.hpp> // check that
#include "opencv2/core/core.hpp" // check that
#include <iostream> // Include Libs for C++
#include "opencv2/imgproc/imgproc.hpp" // Include Libs for OpenCV and Image Processing
#include <math.h>
using namespace cv; // namespace parameters not important in OpenCV2.4.6
using namespace std; // namespace parameters not important in OpenCV2.4.6
int main( int argc, const char** argv )
{
/*This part is to compute the parameters(block size, resize parameter) of the new_img*/
int resize_parameter; // resize parameter must be multiplication of 2
resize_parameter = 500;
int block_size; // block parameter must be divisable by of block size
block_size = 50;
if ((resize_parameter % 2) != 0) resize_parameter = resize_parameter - (resize_parameter % 2);
while ((resize_parameter % block_size) != 0) block_size = block_size - 1;
int mean_size = resize_parameter/block_size; // this is the size of the mean matrix
int mean = block_size * block_size; // this no is ti get the mean of every element in the matrix
//int mean_img [mean_size][mean_size] = {}; // the mean image matrix initialized by zero
/*This part is to allocate the array with dynamic size*/
//int** mean_img = new int*[mean_size];
//for(int x = 0; x < mean_size; x++)
//mean_img[x] = new int[mean_size];
/*Then we can use the array*/
/*This part is to fill all the elements of the mean matrix with zeros*/
//memset(mean_img, 0, sizeof(mean_img[0][0]) * mean_size * mean_size);
/*This part is the definition of the matrices that are used for the images*/
Mat mean_img = Mat(mean_size,mean_size,CV_64FC4, cv::Scalar(0)); // define a new matrix with meansize*meansize elements to compute the mean
Mat mean_img_full = Mat(resize_parameter,resize_parameter,CV_64FC4, cv::Scalar(0)); // define a new matrix with resizeparameter*resizeparameter elements to compute the mean
Mat new_img = Mat(resize_parameter,resize_parameter,CV_64FC4); // define a new matrix with resize_parameter*resize_parameter elements
Mat original_img = imread("Desert.JPG", CV_LOAD_IMAGE_GRAYSCALE); //define a new matrix and read the image data in the file "Desert.JPG" and store it in 'original_img'
// notes: the location of the image must be in the same directory of the C++ file
if (original_img.empty()) //check whether the image is loaded or not
{
cout << "Error : Image cannot be loaded..!!" << endl;
//system("pause"); //wait for a key press
return -1;
}
// explicitly specify dsize=dst.size(); fx and fy will be computed from that.
// resize( src matrix, dst matrix, dst.size to get the size of the dst matrix, 0, 0 "to deal with the dst matrix size, may be 0.5 or any fraction from the src size, "AREA,CUBIC,LINEAR")
resize(original_img, new_img, new_img.size(), 0, 0, CV_INTER_AREA);
/*This part is to compute the mean of each block*/
for ( int i = 0; i < resize_parameter; i = i + block_size) // i represents the index of the raw
{
for ( int j = 0; j < resize_parameter; j = j + block_size) // for the blocks in the same raw with different columns
{
for ( int x = 0; x < block_size; x++) // x represents the index of the raw
{
for ( int y = 0; y < block_size; y++) // y represents the index of the column
{
//cout << i ; //cout << "\n"; //cout << j ; //cout << "\n"; //cout << x ; //cout << "\n"; //cout << y ; //cout << "\n";
mean_img.at<double>(i/block_size, j/block_size) = mean_img.at<double>(i/block_size,j/block_size) + new_img.at<double>(i + x, j + y) / (mean);
}
}
}
}
/*This is the end of the part to compute the mean of each block*/
/*This part is to fill all the resize matrix with the mean value*/
for ( int x = 0; x < resize_parameter/block_size; x++) // x represents the index of the raw in the mean matrix
{
for ( int y = 0; y < resize_parameter/block_size; y++) // y represents the index of the column in the mean matrix
{
for ( int i = 0; i < block_size; i++) // i represents the index of the raw in the mean_full matrix
{
for ( int j = 0; j < block_size; j++) // j represents the index of the column in the mean_full matrix
{
mean_img_full.at<double>((x*block_size)+i,(y*block_size)+j) = mean_img.at<double>(x,y);
}
}
}
}
//cout << cv::getBuildInformation() << endl;
/*This is the end of the part to fill all the resize matrix with the mean value*/
namedWindow("OriginalImage", CV_WINDOW_AUTOSIZE); //create a window with the name "OriginalImage"
imshow("OriginalImage", original_img); //display the image which is stored in the 'original_img' in the "OriginalImage" window
namedWindow("NewImage", CV_WINDOW_AUTOSIZE); //create a window with the name "NewImage"
imshow("NewImage", new_img); //display the image which is stored in the 'new_img' in the "NewImage" window
namedWindow("MeanImage", CV_WINDOW_AUTOSIZE); //create a window with the name "MeanImage"
imshow("MeanImage", mean_img); //display the image which is stored in the 'mean_img' in the "MeanImage" window
namedWindow("MeanFullImage", CV_WINDOW_AUTOSIZE); //create a window with the name "MeanFullImage"
imshow("MeanFullImage", mean_img_full); //display the image which is stored in the 'mean_img_full' in the "MeanFullImage" window
waitKey(0); //wait infinite time for a keypress
destroyWindow("OriginalImage"); //destroy the window with the name, "OriginalImage"
destroyWindow("NewImage"); //destroy the window with the name, "NewImage"
destroyWindow("MeanImage"); //destroy the window with the name, "MeanImage"
destroyWindow("MeanFullImage"); //destroy the window with the name, "MeanImage"
return 0;
}
The problem was at the definition of the type of each matrix. It has to be 8 Bits Unsigned Character. It is working now. Thanks a lot ,,,
Related
I am doing a homework where we need to write a function which gets an image and a kernel and we have to calculate the 2d spatial convolution.
Using a gaussian kernel I get the expected result (a blurred image) but if I use instead for example an edge detection kernel (taken from here) I see that something isn't working properly (the image becomes very greyish).
I guess the problem is either the border handling, which should be a zero-padding but I am not totally sure if implemented it correctly or the normalization at the end.
Is there a way to display a float image (e.g. one pixel of the float has a value for 25000), because I think it always gets capped at 255 (white) if I don't use the normalization.
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <iostream>
int main(int argc, char *argv[])
{
cv::Mat img = cv::imread("orig.jpg",0); // load image as grayscale
img.convertTo(img,CV_32FC1); // convert to float
cv::Mat_<float> output(img.rows,img.cols); // create new mat with same size as source image
output = 0;
// creating a kernel (here Gaussian blur)
cv::Mat_<float> kernel(5,5);
kernel << 1,4,6,4,1,4,16,24,16,4,6,24,36,24,6,4,16,24,16,4,1,4,6,4,1;
int kCenterX = kernel.cols/2;
int kCenterY = kernel.rows/2;
for (int i = 0; i < img.rows; i++){ // for every row in image
for (int j = 0; j < img.cols; j++){ // for every column in image
for (int m = 0; m < kernel.rows; m++){ // for every row of kernel
int mm = kernel.rows - 1 -m; // row index of flipped kernel
for (int n = 0; n < kernel.cols; n++){ // for every column of kernel
int nn = kernel.cols - 1 -n; // column index of flipped kernel
// index for border handling
int ii = i + (m - kCenterY);
int jj = j + (n - kCenterX);
// checking if sample is still in bound of input image
// and if not, treat those pixels as 0 (because they won't get added to sum)
if (ii >= 0 && ii < img.rows && jj >= 0 && jj < img.cols)
output.at<float>(i,j) += img.at<float>(ii,jj) * kernel.at<float>(mm,nn);
}
}
}
}
// normalize input and output image (might be wrong, but I don't know how else I can see float images
cv::normalize(output, output, 0, 1, cv::NORM_MINMAX);
cv::normalize(img, img, 0, 1, cv::NORM_MINMAX);
// display images
cv::imshow("Original", img);
cv::imshow("Convolution", output);
cv::waitKey(0);
return 0;
}
This question already has answers here:
C programming, why does this large array declaration produce a segmentation fault?
(6 answers)
Closed 6 years ago.
I am running a code where I am simply creating 2 matrices: one matrix is of dimensions arows x nsame and the other has dimensions nsame x bcols. The result is an array of dimensions arows x bcols. This is fairly simple to implement using BLAS and the following code appears to work as intended when using the below master-slave model with OpenMPI:`
#include <iostream>
#include <stdio.h>
#include <iostream>
#include <cmath>
#include <mpi.h>
#include <gsl/gsl_blas.h>
using namespace std;`
int main(int argc, char** argv){
int noprocs, nid;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &nid);
MPI_Comm_size(MPI_COMM_WORLD, &noprocs);
int master = 0;
const int nsame = 500; //must be same if matrices multiplied together = acols = brows
const int arows = 500;
const int bcols = 527; //works for 500 x 500 x 527 and 6000 x 100 x 36
int rowsent;
double buff[nsame];
double b[nsame*bcols];
double c[arows][bcols];
double CC[1*bcols]; //here ncols corresponds to numbers of rows for matrix b
for (int i = 0; i < bcols; i++){
CC[i] = 0.;
};
// Master part
if (nid == master ) {
double a [arows][nsame]; //creating identity matrix of dimensions arows x nsame (it is I if arows = nsame)
for (int i = 0; i < arows; i++){
for (int j = 0; j < nsame; j++){
if (i == j)
a[i][j] = 1.;
else
a[i][j] = 0.;
}
}
double b[nsame*bcols];//here ncols corresponds to numbers of rows for matrix b
for (int i = 0; i < (nsame*bcols); i++){
b[i] = (10.*i + 3.)/(3.*i - 2.) ;
};
MPI_Bcast(b,nsame*bcols, MPI_DOUBLE_PRECISION, master, MPI_COMM_WORLD);
rowsent=0;
for (int i=1; i < (noprocs); i++) {
// Note A is a 2D array so A[rowsent]=&A[rowsent][0]
MPI_Send(a[rowsent], nsame, MPI_DOUBLE_PRECISION,i,rowsent+1,MPI_COMM_WORLD);
rowsent++;
}
for (int i=0; i<arows; i++) {
MPI_Recv(CC, bcols, MPI_DOUBLE_PRECISION, MPI_ANY_SOURCE, MPI_ANY_TAG,
MPI_COMM_WORLD, &status);
int sender = status.MPI_SOURCE;
int anstype = status.MPI_TAG; //row number+1
int IND_I = 0;
while (IND_I < bcols){
c[anstype - 1][IND_I] = CC[IND_I];
IND_I++;
}
if (rowsent < arows) {
MPI_Send(a[rowsent], nsame,MPI_DOUBLE_PRECISION,sender,rowsent+1,MPI_COMM_WORLD);
rowsent++;
}
else { // tell sender no more work to do via a 0 TAG
MPI_Send(MPI_BOTTOM,0,MPI_DOUBLE_PRECISION,sender,0,MPI_COMM_WORLD);
}
}
}
// Slave part
else {
MPI_Bcast(b,nsame*bcols, MPI_DOUBLE_PRECISION, master, MPI_COMM_WORLD);
MPI_Recv(buff,nsame,MPI_DOUBLE_PRECISION,master,MPI_ANY_TAG,MPI_COMM_WORLD,&status);
while(status.MPI_TAG != 0) {
int crow = status.MPI_TAG;
gsl_matrix_view AAAA = gsl_matrix_view_array(buff, 1, nsame);
gsl_matrix_view BBBB = gsl_matrix_view_array(b, nsame, bcols);
gsl_matrix_view CCCC = gsl_matrix_view_array(CC, 1, bcols);
/* Compute C = A B */
gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, &AAAA.matrix, &BBBB.matrix,
0.0, &CCCC.matrix);
MPI_Send(CC,bcols,MPI_DOUBLE_PRECISION, master, crow, MPI_COMM_WORLD);
MPI_Recv(buff,nsame,MPI_DOUBLE_PRECISION,master,MPI_ANY_TAG,MPI_COMM_WORLD,&status);
}
}
// output c here on master node //uncomment the below lines if I wish to see the output
// if (nid == master){
// if (rowsent == arows){
// // cout << rowsent;
// int IND_F = 0;
// while (IND_F < arows){
// int IND_K = 0;
// while (IND_K < bcols){
// cout << "[" << IND_F << "]" << "[" << IND_K << "] = " << c[IND_F][IND_K] << " ";
// IND_K++;
// }
// cout << "\n";
// IND_F++;
// }
// }
// }
MPI_Finalize();
//free any allocated space here
return 0;
};
Now what appears odd is that when I increase size of the matrices (e.g. from nsame = 500 to nsame = 501), the code no longer works. I receive the following error:
mpirun noticed that process rank 0 with PID 0 on node Users-MacBook-Air exited on signal 11 (Segmentation fault: 11).
I have tried this with other combinations of sizes for the matrices and there always appears to be an upper limit for the size of the matrices themselves (which seems to vary based on how I vary the different dimensions themselves). I have also tried modifying the values of the matrices themselves although this does not appear to change anything. I realize there are alternative ways to initialize the matrices in my example (e.g. using vector) but am simply wondering why my current scheme of multiplying matrices of arbitrary size seems to only work to a certain extent.
You're declaring too many big local variables, which is causing stack space related problems. a, in particular, is 500x500 doubles (250000 8 byte elements, or 2 million bytes). b is even larger.
You'll need to dynamically allocate space for some or all of those arrays.
There might be a compiler option to increase the initial stack space but that isn't a good long term solution.
I am making an application that uses OCR and I am using OpenCV to threshold the image to improve the OCR results, I have gotten pretty good results but I want to know if anyone has any suggestions for improvement.
Here is what I've done so far:
// Convert to grayscale.
cv::cvtColor(cvMat, cvMat, CV_RGB2GRAY);
// Apply adaptive threshold.
cv::adaptiveThreshold(cvMat, cvMat, 255, CV_ADAPTIVE_THRESH_GAUSSIAN_C, CV_THRESH_BINARY, 3, 5);
// Attempt to sharpen the image.
cv::GaussianBlur(cvMat, cvMat, cv::Size(0, 0), 3);
cv::addWeighted(cvMat, 1.5, cvMat, -0.5, 0, cvMat);
Let me know if you have any suggestions to improve results, thanks.
Sample Images:
After:
One of the best algorithms for thresholding problem in the OCR field is sauvola method.You can use the below code.
#ifndef _THRESHOLDER
#define _THRESHOLDER
#include <cv.h>
#include "type.h"
using namespace cv;
enum class BhThresholdMethod{OTSU,NIBLACK,SAUVOLA,WOLFJOLION};
class BhThresholder
{
public :
void doThreshold(InputArray src ,OutputArray dst,const BhThresholdMethod &method);
private:
};
#endif //_THRESHOLDER
thresholder.cpp
#include "stdafx.h"
#define uget(x,y) at<unsigned char>(y,x)
#define uset(x,y,v) at<unsigned char>(y,x)=v;
#define fget(x,y) at<float>(y,x)
#define fset(x,y,v) at<float>(y,x)=v;
// *************************************************************
// glide a window across the image and
// create two maps: mean and standard deviation.
// *************************************************************
//#define BINARIZEWOLF_VERSION "2.3 (February 26th, 2013)"
double calcLocalStats (Mat &im, Mat &map_m, Mat &map_s, int win_x, int win_y) {
double m,s,max_s, sum, sum_sq, foo;
int wxh = win_x / 2;
int wyh = win_y / 2;
int x_firstth = wxh;
int y_lastth = im.rows-wyh-1;
int y_firstth= wyh;
double winarea = win_x*win_y;
max_s = 0;
for (int j = y_firstth ; j<=y_lastth; j++)
{
// Calculate the initial window at the beginning of the line
sum = sum_sq = 0;
for (int wy=0 ; wy<win_y; wy++)
for (int wx=0 ; wx<win_x; wx++) {
foo = im.uget(wx,j-wyh+wy);
sum += foo;
sum_sq += foo*foo;
}
m = sum / winarea;
s = sqrt ((sum_sq - (sum*sum)/winarea)/winarea);
if (s > max_s)
max_s = s;
map_m.fset(x_firstth, j, m);
map_s.fset(x_firstth, j, s);
// Shift the window, add and remove new/old values to the histogram
for (int i=1 ; i <= im.cols -win_x; i++) {
// Remove the left old column and add the right new column
for (int wy=0; wy<win_y; ++wy) {
foo = im.uget(i-1,j-wyh+wy);
sum -= foo;
sum_sq -= foo*foo;
foo = im.uget(i+win_x-1,j-wyh+wy);
sum += foo;
sum_sq += foo*foo;
}
m = sum / winarea;
s = sqrt ((sum_sq - (sum*sum)/winarea)/winarea);
if (s > max_s)
max_s = s;
map_m.fset(i+wxh, j, m);
map_s.fset(i+wxh, j, s);
}
}
return max_s;
}
void NiblackSauvolaWolfJolion (InputArray _src, OutputArray _dst,const BhThresholdMethod &version,int winx, int winy, double k, double dR) {
Mat src = _src.getMat();
Mat dst = _dst.getMat();
double m, s, max_s;
double th=0;
double min_I, max_I;
int wxh = winx/2;
int wyh = winy/2;
int x_firstth= wxh;
int x_lastth = src.cols-wxh-1;
int y_lastth = src.rows-wyh-1;
int y_firstth= wyh;
int mx, my;
// Create local statistics and store them in a double matrices
Mat map_m = Mat::zeros (src.size(), CV_32FC1);
Mat map_s = Mat::zeros (src.size(), CV_32FC1);
max_s = calcLocalStats (src, map_m, map_s, winx, winy);
minMaxLoc(src, &min_I, &max_I);
Mat thsurf (src.size(), CV_32FC1);
// Create the threshold surface, including border processing
// ----------------------------------------------------
for (int j = y_firstth ; j<=y_lastth; j++) {
// NORMAL, NON-BORDER AREA IN THE MIDDLE OF THE WINDOW:
for (int i=0 ; i <= src.cols-winx; i++) {
m = map_m.fget(i+wxh, j);
s = map_s.fget(i+wxh, j);
// Calculate the threshold
switch (version) {
case BhThresholdMethod::NIBLACK:
th = m + k*s;
break;
case BhThresholdMethod::SAUVOLA:
th = m * (1 + k*(s/dR-1));
break;
case BhThresholdMethod::WOLFJOLION:
th = m + k * (s/max_s-1) * (m-min_I);
break;
default:
cerr << "Unknown threshold type in ImageThresholder::surfaceNiblackImproved()\n";
exit (1);
}
thsurf.fset(i+wxh,j,th);
if (i==0) {
// LEFT BORDER
for (int i=0; i<=x_firstth; ++i)
thsurf.fset(i,j,th);
// LEFT-UPPER CORNER
if (j==y_firstth)
for (int u=0; u<y_firstth; ++u)
for (int i=0; i<=x_firstth; ++i)
thsurf.fset(i,u,th);
// LEFT-LOWER CORNER
if (j==y_lastth)
for (int u=y_lastth+1; u<src.rows; ++u)
for (int i=0; i<=x_firstth; ++i)
thsurf.fset(i,u,th);
}
// UPPER BORDER
if (j==y_firstth)
for (int u=0; u<y_firstth; ++u)
thsurf.fset(i+wxh,u,th);
// LOWER BORDER
if (j==y_lastth)
for (int u=y_lastth+1; u<src.rows; ++u)
thsurf.fset(i+wxh,u,th);
}
// RIGHT BORDER
for (int i=x_lastth; i<src.cols; ++i)
thsurf.fset(i,j,th);
// RIGHT-UPPER CORNER
if (j==y_firstth)
for (int u=0; u<y_firstth; ++u)
for (int i=x_lastth; i<src.cols; ++i)
thsurf.fset(i,u,th);
// RIGHT-LOWER CORNER
if (j==y_lastth)
for (int u=y_lastth+1; u<src.rows; ++u)
for (int i=x_lastth; i<src.cols; ++i)
thsurf.fset(i,u,th);
}
cerr << "surface created" << endl;
for (int y=0; y<src.rows; ++y)
for (int x=0; x<src.cols; ++x)
{
if (src.uget(x,y) >= thsurf.fget(x,y))
{
dst.uset(x,y,255);
}
else
{
dst.uset(x,y,0);
}
}
}
void BhThresholder::doThreshold(InputArray _src ,OutputArray _dst,const BhThresholdMethod &method)
{
Mat src = _src.getMat();
int winx = 0;
int winy = 0;
float optK=0.5;
if (winx==0 || winy==0) {
winy = (int) (2.0 * src.rows - 1)/3;
winx = (int) src.cols-1 < winy ? src.cols-1 : winy;
// if the window is too big, than we asume that the image
// is not a single text box, but a document page: set
// the window size to a fixed constant.
if (winx > 100)
winx = winy = 40;
}
// Threshold
_dst.create(src.size(), CV_8UC1);
Mat dst = _dst.getMat();
//medianBlur(src,dst,5);
GaussianBlur(src,dst,Size(5,5),0);
//#define _BH_SHOW_IMAGE
#ifdef _BH_DEBUG
#define _BH_SHOW_IMAGE
#endif
//medianBlur(src,dst,7);
switch (method)
{
case BhThresholdMethod::OTSU :
threshold(dst,dst,128,255,CV_THRESH_OTSU);
break;
case BhThresholdMethod::SAUVOLA :
case BhThresholdMethod::WOLFJOLION :
NiblackSauvolaWolfJolion (src, dst, method, winx, winy, optK, 128);
}
bitwise_not(dst,dst);
#ifdef _BH_SHOW_IMAGE
#undef _BH_SHOW_IMAGE
#endif
}
Here is comparsion table for thresholding methods: http://clweb.csa.iisc.ernet.in/rahulsharma/binarize/set1.php?id=set1%2Fimage00b
A few thoughts:
Since you're starting with a rectangular object that may be viewed at a non-normal angle, use an affine transform to warp the image so that it appears rectangular with right angle corners.
Before the affine transform, you should probably remove barrel distortion (the curviness of the card edges).
Consider using an adaptive threshold rather than a simple global binarization threshold.
If you can find a proper OCR algorithm that doesn't require binary images, use that. Although binarization will work well for black text on a white background, in general binarization presents a lot of problems if you want to achieve high accuracy (i.e., character recognition approaching 98%+ for arbitrary strings of characters)
Try to sample with better resolution.
I'm trying to find the fft of a dynamically allocated array. The input array is copied from host to device using cudaMemcpy2D. Then the fft is taken (cufftExecR2C) and the results are copied back from device to host.
So my initial problem was how to use the pitch information in the fft. Then I found an answer here - CUFFT: How to calculate fft of pitched pointer?
But unfortunately it doesn't work. The results I get are garbage values. Given below is my code.
#define NRANK 2
#define BATCH 10
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <cufft.h>
#include <stdio.h>
#include <iomanip>
#include <iostream>
#include <vector>
using namespace std;
const size_t NX = 4;
const size_t NY = 6;
int main()
{
// Input array (static) - host side
float h_in_data_static[NX][NY] ={
{0.7943 , 0.6020 , 0.7482 , 0.9133 , 0.9961 , 0.9261},
{0.3112 , 0.2630 , 0.4505 , 0.1524 , 0.0782 , 0.1782},
{0.5285 , 0.6541 , 0.0838 , 0.8258 , 0.4427, 0.3842},
{0.1656 , 0.6892 , 0.2290 , 0.5383 , 0.1067, 0.1712}
};
// --------------------------------
// Input array (dynamic) - host side
float *h_in_data_dynamic = new float[NX*NY];
// Set the values
size_t h_ipitch;
for (int r = 0; r < NX; ++r) // this can be also done on GPU
{
for (int c = 0; c < NY; ++c)
{ h_in_data_dynamic[NY*r + c] = h_in_data_static[r][c]; }
}
// --------------------------------
// Output array - host side
float2 *h_out_data_temp = new float2[NX*(NY/2+1)] ;
// Input and Output array - device side
cufftHandle plan;
cufftReal *d_in_data;
cufftComplex * d_out_data;
int n[NRANK] = {NX, NY};
// Copy input array from Host to Device
size_t ipitch;
cudaError cudaStat1 = cudaMallocPitch((void**)&d_in_data,&ipitch,NY*sizeof(cufftReal),NX);
cout << cudaGetErrorString(cudaStat1) << endl;
cudaError cudaStat2 = cudaMemcpy2D(d_in_data,ipitch,h_in_data_dynamic,NY*sizeof(float),NY*sizeof(float),NX,cudaMemcpyHostToDevice);
cout << cudaGetErrorString(cudaStat2) << endl;
// Allocate memory for output array - device side
size_t opitch;
cudaError cudaStat3 = cudaMallocPitch((void**)&d_out_data,&opitch,(NY/2+1)*sizeof(cufftComplex),NX);
cout << cudaGetErrorString(cudaStat3) << endl;
// Performe the fft
int rank = 2; // 2D fft
int istride = 1, ostride = 1; // Stride lengths
int idist = 1, odist = 1; // Distance between batches
int inembed[] = {ipitch, NX}; // Input size with pitch
int onembed[] = {opitch, NX}; // Output size with pitch
int batch = 1;
cufftPlanMany(&plan, rank, n, inembed, istride, idist, onembed, ostride, odist, CUFFT_R2C, batch);
//cufftPlan2d(&plan, NX, NY , CUFFT_R2C);
cufftSetCompatibilityMode(plan, CUFFT_COMPATIBILITY_NATIVE);
cufftExecR2C(plan, d_in_data, d_out_data);
cudaThreadSynchronize();
// Copy d_in_data back from device to host
cudaError cudaStat4 = cudaMemcpy2D(h_out_data_temp,(NY/2+1)*sizeof(float2), d_out_data, opitch, (NY/2+1)*sizeof(cufftComplex), NX, cudaMemcpyDeviceToHost);
cout << cudaGetErrorString(cudaStat4) << endl;
// Print the results
for (int i = 0; i < NX; i++)
{
for (int j =0 ; j< NY/2 + 1; j++)
printf(" %f + %fi",h_out_data_temp[i*(NY/2+1) + j].x ,h_out_data_temp[i*(NY/2+1) + j].y);
printf("\n");
}
cudaFree(d_in_data);
return 0;
}
I think the problem is in cufftPlanMany. How can I solve this issue ?
You may want to study the advanced data layout section of the documentation carefully.
I think the previous question that was linked is somewhat confusing because that question is passing the width and height parameters in reverse order for what I would expect for a cufft 2D plan. However the answer then mimics that order so it is at least consistent.
Secondly, you missed in the previous question that the "pitch" parameters that are being passed in inembed and onembed are not the same as the pitch parameters that you would receive from a cudaMallocPitch operation. They have to be scaled by the number of bytes per data element in the input and output data sets. I'm actually not entirely sure this is the intended use of the inembed and onembed parameters, but it seems to work.
When I adjust your code to account for the above two changes, I seem to get valid results, at least they appear to be in a reasonable range. You've posted several questions now about 2D FFTs, where you've said the results are not correct. I can't do these 2D FFT's in my head, so I suggest in the future you indicate what data you are expecting.
This has the changes I made:
#define NRANK 2
#define BATCH 10
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <cufft.h>
#include <stdio.h>
#include <iomanip>
#include <iostream>
#include <vector>
using namespace std;
const size_t NX = 4;
const size_t NY = 6;
int main()
{
// Input array (static) - host side
float h_in_data_static[NX][NY] ={
{0.7943 , 0.6020 , 0.7482 , 0.9133 , 0.9961 , 0.9261},
{0.3112 , 0.2630 , 0.4505 , 0.1524 , 0.0782 , 0.1782},
{0.5285 , 0.6541 , 0.0838 , 0.8258 , 0.4427, 0.3842},
{0.1656 , 0.6892 , 0.2290 , 0.5383 , 0.1067, 0.1712}
};
// --------------------------------
// Input array (dynamic) - host side
float *h_in_data_dynamic = new float[NX*NY];
// Set the values
size_t h_ipitch;
for (int r = 0; r < NX; ++r) // this can be also done on GPU
{
for (int c = 0; c < NY; ++c)
{ h_in_data_dynamic[NY*r + c] = h_in_data_static[r][c]; }
}
// --------------------------------
int owidth = (NY/2)+1;
// Output array - host side
float2 *h_out_data_temp = new float2[NX*owidth] ;
// Input and Output array - device side
cufftHandle plan;
cufftReal *d_in_data;
cufftComplex * d_out_data;
int n[NRANK] = {NX, NY};
// Copy input array from Host to Device
size_t ipitch;
cudaError cudaStat1 = cudaMallocPitch((void**)&d_in_data,&ipitch,NY*sizeof(cufftReal),NX);
cout << cudaGetErrorString(cudaStat1) << endl;
cudaError cudaStat2 = cudaMemcpy2D(d_in_data,ipitch,h_in_data_dynamic,NY*sizeof(float),NY*sizeof(float),NX,cudaMemcpyHostToDevice);
cout << cudaGetErrorString(cudaStat2) << endl;
// Allocate memory for output array - device side
size_t opitch;
cudaError cudaStat3 = cudaMallocPitch((void**)&d_out_data,&opitch,owidth*sizeof(cufftComplex),NX);
cout << cudaGetErrorString(cudaStat3) << endl;
// Performe the fft
int rank = 2; // 2D fft
int istride = 1, ostride = 1; // Stride lengths
int idist = 1, odist = 1; // Distance between batches
int inembed[] = {NX, ipitch/sizeof(cufftReal)}; // Input size with pitch
int onembed[] = {NX, opitch/sizeof(cufftComplex)}; // Output size with pitch
int batch = 1;
if ((cufftPlanMany(&plan, rank, n, inembed, istride, idist, onembed, ostride, odist, CUFFT_R2C, batch)) != CUFFT_SUCCESS) cout<< "cufft error 1" << endl;
//cufftPlan2d(&plan, NX, NY , CUFFT_R2C);
if ((cufftSetCompatibilityMode(plan, CUFFT_COMPATIBILITY_NATIVE)) != CUFFT_SUCCESS) cout << "cufft error 2" << endl;
if ((cufftExecR2C(plan, d_in_data, d_out_data)) != CUFFT_SUCCESS) cout << "cufft error 3" << endl;
cudaDeviceSynchronize();
// Copy d_in_data back from device to host
cudaError cudaStat4 = cudaMemcpy2D(h_out_data_temp,owidth*sizeof(float2), d_out_data, opitch, owidth*sizeof(cufftComplex), NX, cudaMemcpyDeviceToHost);
cout << cudaGetErrorString(cudaStat4) << endl;
// Print the results
for (int i = 0; i < NX; i++)
{
for (int j =0 ; j< owidth; j++)
printf(" %f + %fi",h_out_data_temp[i*owidth + j].x ,h_out_data_temp[i*owidth + j].y);
printf("\n");
}
cudaFree(d_in_data);
return 0;
}
I am working with OpenCV and C++ for a project and I found the following problem: after initializing a mat with the following statement
Mat or_mat=Mat(img->height,img->width,CV_32FC1);
check the following value
or_mat.at <float> (i, j) = atan (fy / fx) / 2 +1.5707963;
After completing returning the mat for the output of the function but when I go to read there are many values that do not correspond to the output. Precise in incorrect values for the I-4.31602e +008 is inserted and if I make a cout the value of the expression is correct. What could be the error??
relevant Code:
Mat or_mat=Mat(img->height,img->width,CV_32FC1);
to angle
if(fx > 0){
or_mat.at<float>(i,j) = atan(fy/fx)/2+1.5707963;
}
else if(fx<0 && fy >0){
or_mat.at<float>(i,j) = atan(fy/fx)/2+3.1415926;
}
else if(fx<0 && fy <0){
or_mat.at<float>(i,j) = atan(fy/fx)/2;
}
else if(fy!=0 && fx==0){
or_mat.at<float>(i,j) = 1.5707963;
}
I have to calculate the local orientation of the fingerprint image, the following code I have omitted several statements and calculations that do not have errors.
I would triple check that you are indexing correctly. The following code shows my initialising a matrix full of zeros, and then filling it with some float using at .at operator. It compiles and runs nicely:
int main()
{
int height = 10;
int width = 3;
// Initialise or_mat to with every element set to zero
cv::Mat or_mat = cv::Mat::zeros(height, width, CV_32FC1);
std::cout << "Original or_mat:\n" << or_mat << std::endl;
// Loop through and set each element equal to some float
float value = 10.254;
for (int i = 0; i < or_mat.rows; ++i)
{
for (int j = 0; j < or_mat.cols; ++j)
{
or_mat.at<float>(i,j) = value;
}
}
std::cout << "Final or_mat:\n" << or_mat << std::endl;
return 0;
}