I am attempting to write a naive implementation of the Short-Time Fourier Transform using consecutive FFT frames in time, calculated using the FFTW library, but I am getting a Segmentation fault and cannot work out why.
My code is as below:
// load in audio
AudioFile<double> audioFile;
audioFile.load ("assets/example-audio/file_example_WAV_1MG.wav");
int N = audioFile.getNumSamplesPerChannel();
// make stereo audio mono
double fileDataMono[N];
if (audioFile.isStereo())
for (int i = 0; i < N; i++)
fileDataMono[i] = ( audioFile.samples[0][i] + audioFile.samples[1][i] ) / 2;
// setup stft
// (test transform, presently unoptimized)
int stepSize = 512;
int M = 2048; // fft size
int noOfFrames = (N-(M-stepSize))/stepSize;
// create Hamming window vector
double w[M];
for (int m = 0; m < M; m++) {
w[m] = 0.53836 - 0.46164 * cos( 2*M_PI*m / M );
}
double* input;
// (pads input array if necessary)
if ( (N-(M-stepSize))%stepSize != 0) {
noOfFrames += 1;
int amountOfZeroPadding = stepSize - (N-(M-stepSize))%stepSize;
double ipt[N + amountOfZeroPadding];
for (int i = 0; i < N; i++) // copy values from fileDataMono into input
ipt[i] = fileDataMono[i];
for (int i = 0; i < amountOfZeroPadding; i++)
ipt[N + i] = 0;
input = ipt;
} else {
input = fileDataMono;
}
// compute stft
fftw_complex* stft[noOfFrames];
double frames[noOfFrames][M];
fftw_plan fftPlan;
for (int i = 0; i < noOfFrames; i++) {
stft[i] = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * M);
for (int m = 0; m < M; m++)
frames[i][m] = input[i*stepSize + m] * w[m];
fftPlan = fftw_plan_dft_r2c_1d(M, frames[i], stft[i], FFTW_ESTIMATE);
fftw_execute(fftPlan);
}
// compute istft
double* outputFrames[noOfFrames];
double output[N];
for (int i = 0; i < noOfFrames; i++) {
outputFrames[i] = (double*)fftw_malloc(sizeof(double) * M);
fftPlan = fftw_plan_dft_c2r_1d(M, stft[i], outputFrames[i], FFTW_ESTIMATE);
fftw_execute(fftPlan);
for (int m = 0; i < M; m++) {
output[i*stepSize + m] += outputFrames[i][m];
}
}
fftw_destroy_plan(fftPlan);
for (int i = 0; i < noOfFrames; i++) {
fftw_free(stft[i]);
fftw_free(outputFrames[i]);
}
// output audio
AudioFile<double>::AudioBuffer outputBuffer;
outputBuffer.resize (1);
outputBuffer[0].resize(N);
outputBuffer[0].assign(output, output+N);
bool ok = audioFile.setAudioBuffer(outputBuffer);
audioFile.setAudioBufferSize (1, N);
audioFile.setBitDepth (16);
audioFile.setSampleRate (8000);
audioFile.save ("out/audioOutput.wav");
The segfault seems to be being raised by the first fftw_malloc when computing the forward STFT.
Thanks in advance!
The relevant bit of code is:
double* input;
if ( (N-(M-stepSize))%stepSize != 0) {
double ipt[N + amountOfZeroPadding];
//...
input = ipt;
}
//...
input[i*stepSize + m];
Your input pointer points at memory that exists only inside the if statement. The closing brace denotes the end of the lifetime of the ipt array. When dereferencing the pointer later, you are addressing memory that no longer exists.
I am trying to FFT an image using the library from http://www.fftw.org/. basically i want to do a forward transform and then the backward transform to get the input image i have chosen. Then I would like to get my input back with the backward FFT, but it doesn't work. Here is my code :
double n[w][h][2];
double im[w][h][2];
const int Lx = w;
const int Lt = h;
int var_x;
int var_t;
fftw_complex *in, *out, *result;
fftw_plan p;
fftw_plan inv_p;
in = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*Lx*Lt);
out = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*Lx*Lt);
result = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) *Lx *Lt);
p = fftw_plan_dft_2d(Lx, Lt, in, out, FFTW_FORWARD, FFTW_MEASURE);
for (int x = 0; x < Lx; x++)
{
for (int t = 0; t < Lt; t++)
{
in[t + Lt*x][0] = n[x][t][0];
in[t + Lt*x][1] = 0;
}
}
fftw_execute(p);
for (int x = 0; x < Lx; x++)
{
for (int t = 0; t < Lt; t++)
{
n[x][t][0] = out[t + Lt*x][0];
n[x][t][1] = out[t + Lt*x][1];
}
}
inv_p = fftw_plan_dft_2d(Lx, Lt, out, result, FFTW_BACKWARD, FFTW_MEASURE);
fftw_execute(inv_p);
for (int x = 0; x < Lx; x++)
{
for (int t = 0; t < Lt; t++)
{
im[x][t][0] = result[t + Lt*x][0];
im[x][t][1] = result[t + Lt*x][1];
std::cout<<im[x][t][0]<<std::endl;
}
}
fftw_destroy_plan(p);
fftw_free(in);
fftw_free(out);
As you can see, I just try to perform a normal FFT, then to reverse it. The problem is that my output 'im' is just full of 0, instead of 1 and 0...
So what's wrong with my code ?
Thank you :)
Your original image and transform matrices are declared as int. Try defining them as double:
double n[w][h][2];
double im[w][h][2];
The line below, for example, is destroying data because result[i][j] is of type double (as of fftw_complex definition). So that if result[i][j] == 0.99, it'll be converted to 0.
im[x][t][0] = result[t + Lt*x][0]; //A value of 0.99 may be converted to 0
This is the corrected version of the code which is now working - thank you to everybody who helped me to fix all the problems.
double n[w][h][2];
double im[w][h][2];
const int Lx = w;
const int Lt = h;
int var_x;
int var_t;
fftw_complex *in, *out, *result;
fftw_plan p;
fftw_plan inv_p;
in = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*Lx*Lt);
out = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*Lx*Lt);
result = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) *Lx *Lt);
p = fftw_plan_dft_2d(Lx, Lt, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
for (int x = 0; x < Lx; x++)
{
for (int t = 0; t < Lt; t++)
{
in[t + Lt*x][0] = n[x][t][0];
in[t + Lt*x][1] = 0;
}
}
fftw_execute(p);
for (int x = 0; x < Lx; x++)
{
for (int t = 0; t < Lt; t++)
{
n[x][t][0] = out[t + Lt*x][0];
n[x][t][1] = out[t + Lt*x][1];
}
}
inv_p = fftw_plan_dft_2d(Lx, Lt, out, result, FFTW_BACKWARD, FFTW_ESTIMATE);
fftw_execute(inv_p);
for (int x = 0; x < Lx; x++)
{
for (int t = 0; t < Lt; t++)
{
im[x][t][0] = result[t + Lt*x][0];
im[x][t][1] = result[t + Lt*x][1];
std::cout<<im[x][t][0]<<std::endl;
}
}
fftw_destroy_plan(p);
fftw_destroy_plan(inv_p);
fftw_free(in);
fftw_free(out);
fftw_free(result);
I am new to c++ and I have no real idea why my program crashes only guesses.
The following program suddenly started to crash on line 49 at the void saveSig(cv::Mat *frame) line it self
without even steping in to the function it self.
It ran fine before.
The program soposed to track a person in a video under certain circumstances which I will not go over since they haven't been impllementet yet.
I can only guess that I have ran out of stack and I'm not sure why, again it might be a leek that I missed or maybe I just ran out of stack space or maybe it's completely something else and very stupid.
PS: sorry if the code is not "pretty" I'm really new to C++ and OpenCV and I will appreciate any comments about "bad coding practice".
#include "myCVFunctions.h"
#include <vector>
#define LOADING_VIDEO_ERROR -1
#define LOADING_BACKGROUND_IMAGE_ERROR -2
#define FRAME_BUFFER_SIZE 10
#define SIG_BUFFER_SIZE 6
const cv::string g_c_videoFilePath = "res/tmp.mp4";
const cv::string g_c_bgFilePath = "res/bg.jpg";
const cv::Mat g_c_bg = cv::imread(g_c_bgFilePath);
const cv::Rect g_c_entranceROIRect(869, 999, 345, 80);
const cv::Rect g_c_largeEntranceROIRect(869, 340, 345, 740);
const cv::Rect g_c_sigROI(869,539,345,541);
cv::Mat g_currFrameBackup;
cv::Point g_clickCoords(-1,-1);
cv::Rect g_markedROI;
bool g_trace = false;
bool g_personInside = false;
bool g_useSig = false;
char g_sigCount = 0;
double g_sig[SIG_BUFFER_SIZE];
double g_newSig[SIG_BUFFER_SIZE];
cv::Point g_inSigHeadCoords[SIG_BUFFER_SIZE];
cv::Point g_inNewSigHeadCoords[SIG_BUFFER_SIZE];
long double av1 = 0;
long double av2 = 0;
double minDiff = 9999999999.999999;
void onMouse(int event, int x, int y, int flags, void* userdata){
if(event == CV_EVENT_LBUTTONDOWN){
g_clickCoords.x = x;
g_clickCoords.y = y;
}
if(event == CV_EVENT_MOUSEMOVE && g_clickCoords.x>=0){
g_markedROI = cv::Rect(g_clickCoords, cv::Point(x,y));
g_currFrameBackup.copyTo(*((cv::Mat*)userdata));
cv::rectangle(*((cv::Mat*)userdata), g_markedROI, cv::Scalar(0,255,0));
}
if(event == CV_EVENT_LBUTTONUP){
g_trace = true;
g_useSig = true;
g_clickCoords = cv::Point(-1,-1);
}
}
void saveSig(cv::Mat *frame){ //the crash occurs here
double fftData[512*512];
cv::Mat sigROI, sigHSV, resized;
sigROI = (*frame)(g_c_sigROI);
cv::cvtColor(sigROI, sigHSV, CV_BGR2HSV);
resized = my_cv::resize_zeros(sigHSV, cv::Size(512,512));
cv::MatIterator_<cv::Vec3b> m_it = resized.begin<cv::Vec3b>();
for(int i=0; m_it!=resized.end<cv::Vec3b>(); m_it++, i++){
fftData[i] = (*m_it)[2];
}
my_cv::FFTR fft = my_cv::createFFTR<double>(fftData, 512, 512, FFT_TYPE_2D);
//cv::flip(sigHSV, sigHSV, -1);
//cv::transpose(sigHSV, sigHSV);
//cv::flip(sigHSV, sigHSV, 0);
//cv::imshow("1", sigROI);
//cv::imshow("", sigHSV);
//cv::waitKey();
//resized = my_cv::resize_zeros(sigHSV, cv::Size(512,512));
//m_it = resized.begin<cv::Vec3b>();
//for(int i=0; m_it!=resized.end<cv::Vec3b>(); m_it++, i++){
// fftData[i] = (*m_it)[2];
//}
//my_cv::FFTR fft180 = my_cv::createFFTR<double>(fftData, 512, 512, FFT_TYPE_2D);
my_cv::FFTR multFFT = my_cv::multFFT(fft, fft);
my_cv::m_reverseFFTR(multFFT, FFT_TYPE_2D);
if(g_useSig){
g_newSig[g_sigCount] = my_cv::getFFTAverege(multFFT);
}else{
g_sig[g_sigCount] = my_cv::getFFTAverege(multFFT);
}
g_sigCount++;
if(g_sigCount>=SIG_BUFFER_SIZE&&g_useSig){
av1 = ((g_sig[0]+g_sig[1]+g_sig[2]+g_sig[3]+g_sig[4]+g_sig[5])/6)/1000000.0;
av2 = ((g_newSig[0]+g_newSig[1]+g_newSig[2]+g_newSig[3]+g_newSig[4]+g_newSig[5])/6)/1000000.0;
/*for(int i=0; i<SIG_BUFFER_SIZE; i++){
for(int j=0; j<SIG_BUFFER_SIZE; j++){
double diff = abs(g_newSig[i]-g_sig[j]);
minDiff = (diff<minDiff ? diff : minDiff);
}
}*/
my_cv::deleteFFTR(fft);
//my_cv::deleteFFTR(fft180);
my_cv::deleteFFTR(multFFT);
}
}
void proccesFrame(cv::Mat *frame){
cv::Mat grayFrame, negativeFrame, bwFrame, entranceROI;
negativeFrame = g_c_bg - *frame;
cv::cvtColor(negativeFrame, grayFrame, CV_BGR2GRAY);
cv::threshold(grayFrame, bwFrame, 30, 255, cv::THRESH_BINARY);
cv::Mat erode = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(7,7));
cv::Mat dilate = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(10,10));
cv::erode(bwFrame, bwFrame, erode);
cv::dilate(bwFrame, bwFrame, dilate);
entranceROI = bwFrame(g_c_largeEntranceROIRect);
cv::MatIterator_<uchar> m_it = entranceROI.begin<uchar>();
for(g_personInside = false; m_it!=entranceROI.end<uchar>(); m_it++){
if(*m_it==255){
g_personInside = true;
break;
}
}
if(!g_personInside){
g_trace = false;
g_sigCount = 0;
av1 = 0;
av2 = 0;
minDiff = 9999999999.999999;
}else{
if(g_sigCount<SIG_BUFFER_SIZE){
cv::Mat ROI = bwFrame(g_c_entranceROIRect);
cv::MatIterator_<uchar> bw_it = bwFrame.begin<uchar>();
if(!g_useSig){
for(int i=0; bw_it!=bwFrame.end<uchar>(); bw_it++, i++){
if(*bw_it==255){
g_inSigHeadCoords[g_sigCount] = cv::Point(i%bwFrame.cols, i/bwFrame.cols);
break;
}
}
}else{
for(int i=0; bw_it!=bwFrame.end<uchar>(); bw_it++, i++){
if(*bw_it==255){
g_inNewSigHeadCoords[g_sigCount] = cv::Point(i%bwFrame.cols, i/bwFrame.cols);
break;
}
}
}
saveSig(frame);
}
cv::putText(*frame, "Person inside", cv::Point(20,120), CV_FONT_HERSHEY_PLAIN, 3.0, cv::Scalar(0,255,0), 2);
if(g_useSig&&g_sigCount>=SIG_BUFFER_SIZE){
g_sig;
g_newSig;
g_sigCount++;
//g_trace = true;
}
if(g_trace){
std::vector<std::vector<cv::Point>> contours;
std::vector<cv::Vec4i> hierarchy;
findContours(bwFrame, contours, hierarchy, CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE);
std::vector<std::vector<cv::Point>>::iterator o_it = contours.begin();
for(; o_it!=contours.end(); o_it++){
std::vector<cv::Point>::iterator i_it = (*o_it).begin();
for(; i_it!=(*o_it).end()-1; i_it++){
cv::line(*frame, *i_it, *(i_it+1), cv::Scalar(0,255,0) , 3);
}
}
}
}
}
int main(int argc, char* argv[]){
//init//////////////////////////////////////////////////////////////////////
cv::VideoCapture videoBuffer(g_c_videoFilePath);
if(!videoBuffer.isOpened()){
std::cerr << "Can't load video please check the paths\n";
return LOADING_VIDEO_ERROR;
}
if(!g_c_bg.data){
std::cerr << "Can't load background image please check the paths\n";
return LOADING_BACKGROUND_IMAGE_ERROR;
}
std::vector<cv::Mat> frameBuffer;
frameBuffer.resize(FRAME_BUFFER_SIZE);
const std::vector<cv::Mat>::iterator currFrame = frameBuffer.begin();
const cv::string mainWindow = "Object Tracker";
cv::namedWindow(mainWindow, CV_WINDOW_AUTOSIZE);
cv::setMouseCallback(mainWindow, onMouse, (void*)&(*currFrame));
//init end/////////////////////////////////////////////////////////////////////////////
//video loop///////////////////////////////////////////////////////////////////////////
for(char paused = 0;;){
paused = (cv::waitKey(20)==' ' ? 1 : 0);
while(paused){
cv::resize(*currFrame, *currFrame, cv::Size(900, 540));
cv::imshow(mainWindow, *currFrame);
paused = (cv::waitKey(20)==' ' ? 0 : 1);
}
cv::Mat frame;
videoBuffer.read(frame);
frame.copyTo(g_currFrameBackup);
frameBuffer.pop_back();
frameBuffer.insert(frameBuffer.begin(), frame);
std::stringstream ss;
ss << "Frame: " << videoBuffer.get(CV_CAP_PROP_POS_FRAMES);
cv::putText(*currFrame, ss.str().c_str(), cv::Point(20,70), CV_FONT_HERSHEY_PLAIN, 3.0, cv::Scalar(0,255,0), 2);
proccesFrame(&(*currFrame));
/*if(g_personInside){
cv::resize(*currFrame, *currFrame, cv::Size(900, 540));
while(cv::waitKey(40)!=' ')
cv::imshow(mainWindow, *currFrame);
}*/
cv::resize(*currFrame, *currFrame, cv::Size(900, 540));
cv::imshow(mainWindow, *currFrame);
}
//video loop end///////////////////////////////////////////////////////////////////////
return 0;
}
and the "myCVFunctions.h" file:
#pragma once
#include "opencv\cv.h"
#include "opencv\highgui.h"
#include "fftw3.h"
#define FFT_TYPE_1D 1
#define FFT_TYPE_2D 2
namespace my_cv{
struct myComplex{
double real;
double imag;
};
struct FFTR{
myComplex** data;
int cols;
int rows;
};
struct ENTROPR{
double** data;
int cols;
int rows;
};
void printFFTR(FFTR fft);
FFTR createFFTR(cv::Mat mGrey, int type){
FFTR result;
result.rows = mGrey.rows, result.cols = mGrey.cols;
result.data = new myComplex*[result.cols];
for(int i = 0; i<result.cols; i++)
result.data[i] = new myComplex[result.rows];
fftw_complex *in, *out;
fftw_plan p;
in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * result.rows * result.cols);
out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * result.rows * result.cols);
switch(type){
case FFT_TYPE_1D:
p = fftw_plan_dft_1d(result.rows*result.cols, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
break;
case FFT_TYPE_2D:
p = fftw_plan_dft_2d(result.rows, result.cols, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
break;
}
cv::MatIterator_<uchar> mGrey_it = mGrey.begin<uchar>();
for(int i=0; mGrey_it != mGrey.end<uchar>(); mGrey_it++, i++){
in[i][0] = *mGrey_it;
in[i][1] = 0;
}
fftw_execute(p);
for(int i=0; i<result.rows*result.cols; i++){
int x = i%result.cols, y = i/result.cols;
result.data[x][y].real = out[i][0];
result.data[x][y].imag = out[i][1];
}
fftw_destroy_plan(p);
fftw_free(in);
fftw_free(out);
return result;
}
template<class T> FFTR createFFTR(const T* const mat, int cols, int rows, int type){
FFTR result;
result.rows = rows, result.cols = cols;
result.data = new myComplex*[result.cols];
for(int i = 0; i<result.cols; i++)
result.data[i] = new myComplex[result.rows];
fftw_complex *in, *out;
fftw_plan p;
in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * result.rows * result.cols);
out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * result.rows * result.cols);
switch(type){
case FFT_TYPE_1D:
p = fftw_plan_dft_1d(result.rows*result.cols, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
break;
case FFT_TYPE_2D:
p = fftw_plan_dft_2d(result.rows, result.cols, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
break;
}
for(int i=0; i<cols*rows; i++){
in[i][0] = mat[i];
in[i][1] = 0;
}
fftw_execute(p);
for(int i=0; i<result.rows*result.cols; i++){
int x = i%result.cols, y = i/result.cols;
result.data[x][y].real = out[i][0];
result.data[x][y].imag = out[i][1];
}
fftw_destroy_plan(p);
fftw_free(in);
fftw_free(out);
return result;
}
void m_reverseFFTR(FFTR fft, int type){
fftw_complex *in, *out;
fftw_plan p;
int scaleFactor = fft.cols*fft.rows;
in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * fft.rows * fft.cols);
out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * fft.rows * fft.cols);
switch(type){
case FFT_TYPE_1D:
p = fftw_plan_dft_1d(fft.rows*fft.cols, in, out, FFTW_BACKWARD, FFTW_ESTIMATE);
break;
case FFT_TYPE_2D:
p = fftw_plan_dft_2d(fft.rows, fft.cols, in, out, FFTW_BACKWARD, FFTW_ESTIMATE);
break;
}
for(int j=0; j<fft.rows; j++)
for(int i=0; i<fft.cols; i++){
int index = j*fft.cols+i;
in[index][0] = fft.data[i][j].real;
in[index][1] = fft.data[i][j].imag;
}
fftw_execute(p);
for(int i=0; i<fft.rows*fft.cols; i++){
int x = i%fft.cols, y = i/fft.cols;
fft.data[x][y].real = out[i][0]/scaleFactor;
fft.data[x][y].imag = out[i][1]/scaleFactor;
}
fftw_destroy_plan(p);
fftw_free(in);
fftw_free(out);
}
FFTR multFFT(const FFTR fft1, const FFTR fft2){
FFTR result;
result.cols = fft1.cols;
result.rows = fft1.rows;
result.data = new myComplex*[result.cols];
for(int i=0; i<result.cols; i++)
result.data[i] = new myComplex[result.rows];
for(int i=0; i<result.cols; i++){
for(int j=0; j<result.rows; j++){
result.data[i][j].real = (fft1.data[i][j].real*fft2.data[i][j].real)-(fft1.data[i][j].imag*fft2.data[i][j].imag);
result.data[i][j].imag = (fft1.data[i][j].real*fft2.data[i][j].imag)+(fft1.data[i][j].imag*fft2.data[i][j].real);
}
}
return result;
}
long double getFFTAverege(FFTR fft){
long double result = 0;
for(int i=0; i<fft.cols; i++){
long double sum=0;
for(int j=0; j<fft.rows; j++){
sum += fft.data[i][j].real;
}
result += sum/fft.rows;
}
return result/fft.rows;
}
void deleteFFTR(FFTR fftr){
for(int i=0; i<fftr.cols; i++)
if(fftr.data[i]) delete [] fftr.data[i];
if(fftr.data) delete [] fftr.data;
}
void printFFTR(FFTR fft){
for(int j=0; j<fft.rows; j++){
for(int i=0; i<fft.cols; i++){
printf("%f%si%f\n", fft.data[i][j].real, (fft.data[i][j].imag<0 ? "-" : "+"), abs(fft.data[i][j].imag));
}
}
}
cv::Mat resize_zeros(const cv::Mat src, cv::Size newSize){
cv::Mat srcROI, result, resultROI;
result.create(newSize, src.type());
srcROI = src(cv::Rect(0,0,(src.cols>result.cols ? result.cols : src.cols), (src.rows>result.rows ? result.rows : src.rows)));
result = 0;
resultROI = result(cv::Rect(0,0, srcROI.cols, srcROI.rows));
srcROI.copyTo(resultROI);
return result;
}
//otsu's threshhold
template<class T> T getThreshold(cv::Mat mGrey){
uchar* image = mGrey.data;
int columns = mGrey.cols;
int rows = mGrey.rows;
const T SIGMA = 0.000001;
const int num_bins = 257;
int counts[num_bins] = {0};
T p[num_bins] = {0};
T mu[num_bins] = {0};
T omega[num_bins] = {0};
T sigma_b_squared[num_bins] = {0};
int sumC;
// calculate histogram
for(int i = 0; i < rows*columns; i++)
counts[image[i]]++;
sumC = 0;
for(int i = 0; i < num_bins; i++)
sumC += counts[i];
for(int i = 0; i < num_bins; i++)
p[i] = ((T)counts[i])/sumC;
mu[0] = omega[0] = p[0];
for(int i = 1; i < num_bins; i++){
omega[i] = omega[i-1] + p[i];
mu[i] = mu[i-1] + p[i]*(i+1);
}
T mu_t = mu[num_bins-1];
T maxval = -1.0;
for(int i = 0; i < num_bins; i++){
T v = mu_t * omega[i] - mu[i];
if (omega[i] > SIGMA && abs(1.0-omega[i]) > SIGMA){
sigma_b_squared[i] = v*v/(omega[i]* (1.0 - omega[i]));
maxval = std::max(maxval,sigma_b_squared[i]);
}
}
// Find the location of the maximum value of sigma_b_squared.
// The maximum may extend over several bins, so average together the
// locations.
// If maxval == -1, sigma_b_squared is not defined, then return 0.
T level = 0;
if (maxval > 0){
T idx = 0;
int maxNumbers = 0;
for(int i = 0; i < num_bins; i++){
if (sigma_b_squared[i] == maxval){
idx += i;
maxNumbers++;
}
}
if (maxNumbers >= 0){
idx /= maxNumbers;
// Normalize the threshold to the range [0, 1].
// level = (idx - 1) / (num_bins - 1);
level = idx / (num_bins - 1);
}
}
return level;
}
}
double fftData[512*512];
That's (probably) 2MB of data, which is (probably) too big to fit on the stack. The simplest fix is to use a dynamic array instead:
std::vector<double> fftData(512*512);
Alternatively, if dynamic allocation is too expensive, you could use a static or global array. This is usually a bad idea, since it makes the function non-reentrant and awkward to use in a multi-threaded program; however, you already have so many globals that one more probably won't hurt.
I am implementing a Kuwahara filter in C++, with OpenCV to help opening and displaying images. The idea is quite straight forward but somehow I got weird result from it. Here' the cose:
#include "opencv2/opencv.hpp"
#include <iostream>
#include <iomanip>
#include <cmath>
using namespace std;
using namespace cv;
//This class is essentially a struct of 4 Kuwahara regions surrounding a pixel, along with each one's mean, sum and variance.
class Regions{
int* Area[4];
int Size[4];
unsigned long long Sum[4];
double Var[4];
int kernel;
public:
Regions(int _kernel) : kernel(_kernel) {
for (int i = 0; i<4; i++) {
Area[i] = new int[kernel*kernel];
Size[i] = 0;
Sum[i] = 0;
Var[i] = 0.0;
}
}
//Update data, increase the size of the area, update the sum
void sendData(int area, int data){
Area[area][Size[area]] = data;
Sum[area] += data;
Size[area]++;
}
//Calculate the variance of each area
double var(int area) {
int __mean = Sum[area]/Size[area];
double temp = 0;
for (int i = 0; i<Size[area]; i++) {
temp+= (Area[area][i] - __mean) * (Area[area][i] - __mean);
}
if (Size[area]==1) return 1.7e38; //If there is only one pixel inside the region then return the maximum of double
//So that with this big number, the region will never be considered in the below minVar()
return sqrt(temp/(Size[area]-1));
}
//Call the above function to calc the variances of all 4 areas
void calcVar() {
for (int i = 0; i<4; i++) {
Var[i] = var(i);
}
}
//Find out which regions has the least variance
int minVar() {
calcVar();
int i = 0;
double __var = Var[0];
if (__var > Var[1]) {__var = Var[1]; i = 1;}
if (__var > Var[2]) {__var = Var[2]; i = 2;}
if (__var > Var[3]) {__var = Var[3]; i = 3;}
return i;
}
//Return the mean of that regions
uchar result(){
int i = minVar();
return saturate_cast<uchar> ((double) (Sum[i] *1.0 / Size[i]));
}
};
class Kuwahara{
private:
int wid, hei, pad, kernel;
Mat image;
public:
Regions getRegions(int x, int y){
Regions regions(kernel);
uchar *data = image.data;
//Update data for each region, pixels that are outside the image's boundary will be ignored.
//Area 1 (upper left)
for (int j = (y-pad >=0)? y-pad : 0; j>= 0 && j<=y && j<hei; j++)
for (int i = ((x-pad >=0) ? x-pad : 0); i>= 0 && i<=x && i<wid; i++) {
regions.sendData(1,data[(j*wid)+i]);
}
//Area 2 (upper right)
for (int j = (y-pad >=0)? y-pad : 0; j<=y && j<hei; j++)
for (int i = x; i<=x+pad && i<wid; i++) {
regions.sendData(2,data[(j*wid)+i]);
}
//Area 3 (bottom left)
for (int j = y; j<=y+pad && j<hei; j++)
for (int i = ((x-pad >=0) ? x-pad : 0); i<=x && i<wid; i++) {
regions.sendData(3,data[(j*wid)+i]);
}
//Area 0 (bottom right)
for (int j = y; j<=y+pad && j<hei; j++)
for (int i = x; i<=x+pad && i<wid; i++) {
regions.sendData(0,data[(j*wid)+i]);
}
return regions;
}
//Constructor
Kuwahara(const Mat& _image, int _kernel) : kernel(_kernel) {
image = _image.clone();
wid = image.cols; hei = image.rows;
pad = kernel-1;
}
//Create new image and replace its pixels by the results of Kuwahara filter on the original pixels
Mat apply(){
Mat temp;
temp.create(image.size(), CV_8U);
uchar* data = temp.data;
for (int j= 0; j<hei; j++) {
for (int i = 0; i<wid; i++)
data[j*wid+i] = getRegions(i,j).result();
}
return temp;
}
};
int main() {
Mat img = imread("limes.tif", 1);
Mat gray, dest;
int kernel = 15;
gray.create(img.size(), CV_8U);
cvtColor(img, gray, CV_BGR2GRAY);
Kuwahara filter(gray, kernel);
dest = filter.apply();
imshow("Result", dest);
imwrite("result.jpg", dest);
waitKey();
}
And here's the result:
As you can see it's different from the correct result, the borders of those limes seem to be duplicated and moved upward. If I apply a 15x15 filter, it gives me a complete mess like this:
I've spent my whole day to debug, but so far nothing is found. I even did the calculation on small images by hand and compare with the result and see no differences.
Could anyone help me find out what did I do wrong?
Many many thanks.
It turns out that there's nothing wrong with my code, but the way I defined a kernel was the source of problem. My kernel is actually one of four small kuwahara sections, while the correct definition of a kernel is the whole area where data is calculated for each pixel, therefore the area that contains all four sections is actually the kernel. So when talked about a 7x7 "kernel", I actually applied a 15x15 one, and the horrible result came not from a 15x15 kernel as I thought, but from a 31x31. At that size, Kuwahara filter simply doesn't make sense and bizarre results are inevitable.
I'm using this fftw library.
Currently I'm trying to plot a 2D Gaussian in the form e^(-(x^2+y^2)/a^2).
Here is the code:
using namespace std;
int main(int argc, char** argv ){
fftw_complex *in, *out, *data;
fftw_plan p;
int i,j;
int w=16;
int h=16;
double a = 2;
in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*w*h);
out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*w*h);
for(i=0;i<w;i++){
for(j=0;j<h;j++){
in[i*h+j][0] = exp(- (i*i+j*j)/(a*a));
in[i*h+j][1] = 0;
}
}
p = fftw_plan_dft_2d(w, h, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
fftw_execute(p);
//This is something that print what's in the matrix
print_2d(out,w,h);
fftw_destroy_plan(p);
fftw_free(in);
fftw_free(out);
return 0;
}
Turns out negative numbers shows up. I thought Fourier transform of a Gaussian is another Gaussian, which shouldn't include any negative numbers.
Also, the current origin is at in[0]
EDIT: the previous answer is wrong, shifting the center of the Gaussian won't help as it introduces another phase shift. The right solution is to wrap high indices to negative ones:
double x = (i < w*0.5) ? i : (i - w);
double y = (j < h*0.5) ? j : (j - h);
in[i*h+j][0] = exp(-(x*x+y*y)/(a*a));
This allows the input to cover the entire Gaussian instead of a quarter of it. The entire code is attached below.
#include <stdio.h>
#include <math.h>
#include <fftw3.h>
int main(int argc, char** argv)
{
fftw_complex *in, *out;
fftw_plan p;
int i, j, w = 16, h = 16;
double a = 2, x, y;
in = (fftw_complex *) fftw_malloc(sizeof(fftw_complex) * w * h);
out = (fftw_complex *) fftw_malloc(sizeof(fftw_complex) * w * h);
for (i = 0; i < w; i++) {
x = (i < w*0.5) ? i : (i - w);
for (j = 0; j < h; j++) {
y = (j < h*0.5) ? j : (j - h);
in[i*h+j][0] = exp(-1.*(x*x+y*y)/(a*a));
in[i*h+j][1] = 0;
}
}
p = fftw_plan_dft_2d(w, h, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
fftw_execute(p);
for (i = 0; i < w; i++) {
for (j = 0; j < h; j++) {
printf("%4d %4d %+9.4f %+9.4f\n", i, j, out[i*h+j][0], out[i*h+j][1]);
}
}
fftw_destroy_plan(p);
fftw_cleanup();
fftw_free(in);
fftw_free(out);
return 0;
}