OpenCV + FFTW - magnitude image - c++

Hello again.
Today I'm working on extending my simple OpenCV image processing application. I would like to calculate phase and magnitude of my loaded cv::Mat. I have to use FFTW c++ library for this purpose (i know about dft in OpenCV).
My work is based on tutorial: http://www.admindojo.com/discrete-fourier-transform-in-c-with-fftw/
What's my problem
So according to tutorial my output magnitude should be like:
Unfortunately my output is quite different:
On the other hand, image of phase is almost the same as tutorial image so this part is good.
Code and My thoughts
Take a look on the most important code: (what am I doing there is trying to port tutorial as it is to work with OpenCV)
EDITED: (Both posts merged)
Ok. So I changed code a bit, but output is still different from the tutorial.
Take a look at the code:
void Processing::fft_moc(cv::Mat &pixels, cv::Mat &outMag, cv::Mat outPhase, int mode)
{
int squareSize = pixels.cols;
fftw_plan planR, planG, planB;
fftw_complex *inR, *inG, *inB, *outR, *outG, *outB;
// allocate input arrays
inB = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * squareSize * squareSize);
inG = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * squareSize * squareSize);
inR = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * squareSize * squareSize);
// allocate output arrays
outB = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * squareSize * squareSize);
outG = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * squareSize * squareSize);
outR = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * squareSize * squareSize);
if (mode == FFT)
{
// create plans
planB = fftw_plan_dft_2d(squareSize, squareSize, inR, outB, FFTW_FORWARD, FFTW_ESTIMATE);
planG = fftw_plan_dft_2d(squareSize, squareSize, inG, outG, FFTW_FORWARD, FFTW_ESTIMATE);
planR = fftw_plan_dft_2d(squareSize, squareSize, inB, outR, FFTW_FORWARD, FFTW_ESTIMATE);
}
// assig1n values to real parts (values between 0 and MaxRGB)
for( int x = 0; x < pixels.rows; x++ )
{
for( int y = 0; y < pixels.cols; y++ )
{
double blue = pixels.at<cv::Vec3b>(x,y)[0];
double green = pixels.at<cv::Vec3b>(x,y)[1];
double red = pixels.at<cv::Vec3b>(x,y)[2];
// save as real numbers
inB[squareSize*x+y][0] = blue;
inG[squareSize*x+y][0] = green;
inR[squareSize*x+y][0] = red;
}
}
// perform FORWARD fft
fftw_execute(planB);
fftw_execute(planG);
fftw_execute(planR);
double ***outMagF=new double**[pixels.rows];
for(int i = 0 ; i < pixels.rows ; i++)
{
outMagF[i]=new double *[pixels.cols];
for(int j = 0 ; j < pixels.cols ; j++)
{
outMagF[i][j]= new double[3];
}
}
//calculate magnitude
//find min and max for each channel
double n_minG = 0.0;
double n_maxG = 0.0;
double n_minB = 0.0;
double n_maxB = 0.0;
double n_minR = 0.0;
double n_maxR = 0.0;
for( int x = 0; x < pixels.rows; x++ )
{
for( int y = 0; y < pixels.cols; y++ )
{
int i = squareSize*x+y;
// normalize values
double realB = outB[i][0] / (double)(squareSize * squareSize);
double imagB = outB[i][1] / (double)(squareSize * squareSize);
double realG = outG[i][0] / (double)(squareSize * squareSize);
double imagG = outG[i][1] / (double)(squareSize * squareSize);
double realR = outR[i][0] / (double)(squareSize * squareSize);
double imagR = outR[i][1] / (double)(squareSize * squareSize);
// magnitude
double magB = log(1+sqrt((realB * realB) + (imagB * imagB)));
double magG = log(1+sqrt((realG * realG) + (imagG * imagG)));
double magR = log(1+sqrt((realR * realR) + (imagR * imagR)));
n_minB = n_minB > magB ? magB : n_minB;
n_maxB = n_maxB < magB ? magB : n_maxB;
n_minG = n_minG > magG ? magG : n_minG;
n_maxG = n_maxG < magG ? magG : n_maxG;
n_minR = n_minR > magR ? magR : n_minR;
n_maxR = n_maxR < magR ? magR : n_maxR;
outMagF[x][y][0] = magB;
outMagF[x][y][1] = magG;
outMagF[x][y][2] = magR;
}
}
for( int x = 0; x < pixels.rows; x++ )
{
for( int y = 0; y < pixels.cols; y++ )
{
int i = squareSize*x+y;
double realB = outB[i][0] / (double)(squareSize * squareSize);
double imagB = outB[i][1] / (double)(squareSize * squareSize);
double realG = outG[i][0] / (double)(squareSize * squareSize);
double imagG = outG[i][1] / (double)(squareSize * squareSize);
double realR = outR[i][0] / (double)(squareSize * squareSize);
double imagR = outR[i][1] / (double)(squareSize * squareSize);
// write normalized to output = (value-min)/(max-min)
outMag.at<cv::Vec3f>(x,y)[0] = (double)(outMagF[x][y][0]-n_minB)/(n_maxB-n_minB);
outMag.at<cv::Vec3f>(x,y)[1] = (double)(outMagF[x][y][1]-n_minG)/(n_maxG-n_minG);
outMag.at<cv::Vec3f>(x,y)[2] = (double)(outMagF[x][y][2]-n_minR)/(n_maxR-n_minR);
// std::complex for arg()
std::complex<double> cB(realB, imagB);
std::complex<double> cG(realG, imagG);
std::complex<double> cR(realR, imagR);
// phase
double phaseB = arg(cB) + M_PI;
double phaseG = arg(cG) + M_PI;
double phaseR = arg(cR) + M_PI;
// scale and write to output
outPhase.at<cv::Vec3f>(x,y)[0] = (phaseB / (double)(2 * M_PI)) * 1;
outPhase.at<cv::Vec3f>(x,y)[1] = (phaseG / (double)(2 * M_PI)) * 1;
outPhase.at<cv::Vec3f>(x,y)[2] = (phaseR / (double)(2 * M_PI)) * 1;
}
}
// move zero frequency to (squareSize/2, squareSize/2)
swapQuadrants(squareSize, outMag);
swapQuadrants(squareSize, outPhase);
// free memory
fftw_destroy_plan(planR);
fftw_destroy_plan(planG);
fftw_destroy_plan(planB);
fftw_free(inR); fftw_free(outR);
fftw_free(inG); fftw_free(outG);
fftw_free(inB); fftw_free(outB);
}
I store the final output in cv::Mat with type CV_32FC3. And Yes, the way I normalize magnitude is quite ugly but I just wanted to be sure that everything is working like I expect.
Take a look at my output again:
So as You can see I still need help with that.

FFT planes usually contain a very large difference between the 0th elements (the DC) which is very large, and the rest of the elements which are usually close to zero.
When displaying the magnitude it is common practice to actually show the log of the magnitude so that large values are reduce more strongly than small ones.
The tutorial states this explicitly: "The magnitude appears to be black but isn’t. To make the information visible we scale the image logarithmically."
You need to display the log of the values to see a similar image.

You assign computed values to uchar variables and you loosing precision and all negative values and values above 255 are also lost.
Try make your computations in real valued variables, then normalize the final result to range 0-255 and then assign it to the result image of type CV_8U.

Related

Interpreting visual studio profiler, is this subtraction slow? Can I make all this any faster?

I'm using the Visual Studio profiler for the first time and I'm trying to interpret the results. Looking at the percentages on the left, I found this subtraction's time cost a bit strange:
Other parts of the code contain more complex expressions, like:
Even a simple multiplication seems way faster than the subtraction :
Other multiplications take way longer and I really don't get why, like this :
So, I guess my question is if there is anything weird going on here.
Complex expressions take longer than that subtraction and some expressions take way longer than similar other ones. I run the profiler several times and the distribution of the percentages is always like this. Am I just interpreting this wrong?
Update:
I was asked to give the profile for the whole function so here it is, even though it's a bit big. I ran the function inside a for loop for 1 minute and got 50k samples. The function contains a double loop. I include the text first for ease, followed by the pictures of profiling. Note that the code in text is a bit updated.
for (int i = 0; i < NUMBER_OF_CONTOUR_POINTS; i++) {
vec4 contourPointV(contour3DPoints[i], 1);
float phi = angles[i];
float xW = pose[0][0] * contourPointV.x + pose[1][0] * contourPointV.y + contourPointV.z * pose[2][0] + pose[3][0];
float yW = pose[0][1] * contourPointV.x + pose[1][1] * contourPointV.y + contourPointV.z * pose[2][1] + pose[3][1];
float zW = pose[0][2] * contourPointV.x + pose[1][2] * contourPointV.y + contourPointV.z * pose[2][2] + pose[3][2];
float x = -G_FU_STRICT * xW / zW;
float y = -G_FV_STRICT * yW / zW;
x = (x + 1) * G_WIDTHo2;
y = (y + 1) * G_HEIGHTo2;
y = G_HEIGHT - y;
phi -= extraTheta;
if (phi < 0)phi += CV_PI2;
int indexForTable = phi * oneKoverPI;
//vec2 ray(cos(phi), sin(phi));
vec2 ray(cos_pre[indexForTable], sin_pre[indexForTable]);
vec2 ray2(-ray.x, -ray.y);
float outerStepX = ray.x * step;
float outerStepY = ray.y * step;
cv::Point2f outerPoint(x + outerStepX, y + outerStepY);
cv::Point2f innerPoint(x - outerStepX, y - outerStepY);
cv::Point2f contourPointCV(x, y);
cv::Point2f contourPointCVcopy(x, y);
bool cut = false;
if (!isInView(outerPoint.x, outerPoint.y) || !isInView(innerPoint.x, innerPoint.y)) {
cut = true;
}
bool outside2 = true; bool outside1 = true;
if (cut) {
outside2 = myClipLine(contourPointCV.x, contourPointCV.y, outerPoint.x, outerPoint.y, G_WIDTH - 1, G_HEIGHT - 1);
outside1 = myClipLine(contourPointCVcopy.x, contourPointCVcopy.y, innerPoint.x, innerPoint.y, G_WIDTH - 1, G_HEIGHT - 1);
}
myIterator innerRayMine(contourPointCVcopy, innerPoint);
myIterator outerRayMine(contourPointCV, outerPoint);
if (!outside1) {
innerRayMine.end = true;
innerRayMine.prob = true;
}
if (!outside2) {
outerRayMine.end = true;
innerRayMine.prob = true;
}
vec2 normal = -ray;
float dfdxTerm = -normal.x;
float dfdyTerm = normal.y;
vec3 point3D = vec3(xW, yW, zW);
cv::Point contourPoint((int)x, (int)y);
float Xc = point3D.x; float Xc2 = Xc * Xc; float Yc = point3D.y; float Yc2 = Yc * Yc; float Zc = point3D.z; float Zc2 = Zc * Zc;
float XcYc = Xc * Yc; float dfdxFu = dfdxTerm * G_FU; float dfdyFv = dfdyTerm * G_FU; float overZc2 = 1 / Zc2; float overZc = 1 / Zc;
pixelJacobi[0] = (dfdyFv * (Yc2 + Zc2) + dfdxFu * XcYc) * overZc2;
pixelJacobi[1] = (-dfdxFu * (Xc2 + Zc2) - dfdyFv * XcYc) * overZc2;
pixelJacobi[2] = (-dfdyFv * Xc + dfdxFu * Yc) * overZc;
pixelJacobi[3] = -dfdxFu * overZc;
pixelJacobi[4] = -dfdyFv * overZc;
pixelJacobi[5] = (dfdyFv * Yc + dfdxFu * Xc) * overZc2;
float commonFirstTermsSum = 0;
float commonFirstTermsSquaredSum = 0;
int test = 0;
while (!innerRayMine.end) {
test++;
cv::Point xy = innerRayMine.pos(); innerRayMine++;
int x = xy.x;
int y = xy.y;
float dx = x - contourPoint.x;
float dy = y - contourPoint.y;
vec2 dxdy(dx, dy);
float raw = -glm::dot(dxdy, normal);
float heavisideTerm = heaviside_pre[(int)raw * 100 + 1000];
float deltaTerm = delta_pre[(int)raw * 100 + 1000];
const Vec3b rgb = ante[y * 640 + x];
int red = rgb[0]; int green = rgb[1]; int blue = rgb[2];
red = red >> 3; red = red << 10; green = green >> 3; green = green << 5; blue = blue >> 3;
int colorIndex = red + green + blue;
pF = pFPointer[colorIndex];
pB = pBPointer[colorIndex];
float denAsMul = 1 / (pF + pB + 0.000001);
pF = pF * denAsMul;
float pfMinusPb = 2 * pF - 1;
float denominator = heavisideTerm * (pfMinusPb)+pB + 0.000001;
float commonFirstTerm = -pfMinusPb / denominator * deltaTerm;
commonFirstTermsSum += commonFirstTerm;
commonFirstTermsSquaredSum += commonFirstTerm * commonFirstTerm;
}
}
Visual Studio profiles by sampling: it interrupts execution often and records the value of the instruction pointer; it then maps it to the source and calculates the frequency of hitting that line.
There are few issues with that: it's not always possible to figure out which line produced a specific assembly instruction in the optimized code.
One trick I use is to move the code of interest into a separate function and declare it with __declspec(noinline) .
In your example, are you sure the subtraction was performed as many times as multiplication? I would be more puzzled by the difference in subsequent multiplication (0.39% and 0.53%)
Update:
I believe that the following lines:
float phi = angles[i];
and
phi -= extraTheta;
got moved together in assembly and the time spent getting angles[i] was added to that subtraction line.

Why isn't my 4 thread implementation faster than the single thread one?

I don't know much about multi-threading and I have no idea why this is happening so I'll just get to the point.
I'm processing an image and divide the image in 4 parts and pass each part to each thread(essentially I pass the indices of the first and last pixel rows of each part). For example, if the image has 1000 rows, each thread will process 250 of them. I can go in details about my implementation and what I'm trying to achieve in case it can help you. For now I provide the code executed by the threads in case you can detect why this is happening. I don't know if it's relevant but in both cases(1 thread or 4 threads) the process takes around 15ms and pfUMap and pbUMap are unordered maps.
void jacobiansThread(int start, int end,vector<float> &sJT,vector<float> &sJTJ) {
uchar* rgbPointer;
float* depthPointer;
float* sdfPointer;
float* dfdxPointer; float* dfdyPointer;
float fov = radians(45.0);
float aspect = 4.0 / 3.0;
float focal = 1 / (glm::tan(fov / 2));
float fu = focal * cols / 2 / aspect;
float fv = focal * rows / 2;
float strictFu = focal / aspect;
float strictFv = focal;
vector<float> pixelJacobi(6, 0);
for (int y = start; y <end; y++) {
rgbPointer = sceneImage.ptr<uchar>(y);
depthPointer = depthBuffer.ptr<float>(y);
dfdxPointer = dfdx.ptr<float>(y);
dfdyPointer = dfdy.ptr<float>(y);
sdfPointer = sdf.ptr<float>(y);
for (int x = roiX.x; x <roiX.y; x++) {
float deltaTerm;// = deltaPointer[x];
float raw = sdfPointer[x];
if (raw > 8.0)continue;
float dirac = (1.0f / float(CV_PI)) * (1.2f / (raw * 1.44f * raw + 1.0f));
deltaTerm = dirac;
vec3 rgb(rgbPointer[x * 3], rgbPointer[x * 3+1], rgbPointer[x * 3+2]);
vec3 bin = rgbToBin(rgb, numberOfBins);
int indexOfColor = bin.x * numberOfBins * numberOfBins + bin.y * numberOfBins + bin.z;
float s3 = glfwGetTime();
float pF = pfUMap[indexOfColor];
float pB = pbUMap[indexOfColor];
float heavisideTerm;
heavisideTerm = HEAVISIDE(raw);
float denominator = (heavisideTerm * pF + (1 - heavisideTerm) * pB) + 0.000001;
float commonFirstTerm = -(pF - pB) / denominator * deltaTerm;
if (pF == pB)continue;
vec3 pixel(x, y, depthPointer[x]);
float dfdxTerm = dfdxPointer[x];
float dfdyTerm = -dfdyPointer[x];
if (pixel.z == 1) {
cv::Point c = findClosestContourPoint(cv::Point(x, y), dfdxTerm, -dfdyTerm, abs(raw));
if (c.x == -1)continue;
pixel = vec3(c.x, c.y, depthBuffer.at<float>(cv::Point(c.x, c.y)));
}
vec3 point3D = pixel;
pixelToViewFast(point3D, cols, rows, strictFu, strictFv);
float Xc = point3D.x; float Xc2 = Xc * Xc; float Yc = point3D.y; float Yc2 = Yc * Yc; float Zc = point3D.z; float Zc2 = Zc * Zc;
pixelJacobi[0] = dfdyTerm * ((fv * Yc2) / Zc2 + fv) + (dfdxTerm * fu * Xc * Yc) / Zc2;
pixelJacobi[1] = -dfdxTerm * ((fu * Xc2) / Zc2 + fu) - (dfdyTerm * fv * Xc * Yc) / Zc2;
pixelJacobi[2] = -(dfdyTerm * fv * Xc) / Zc + (dfdxTerm * fu * Yc) / Zc;
pixelJacobi[3] = -(dfdxTerm * fu) / Zc;
pixelJacobi[4] = -(dfdyTerm * fv) / Zc;
pixelJacobi[5] = (dfdyTerm * fv * Yc) / Zc2 + (dfdxTerm * fu * Xc) / Zc2;
float weightingTerm = -1.0 / log(denominator);
for (int i = 0; i < 6; i++) {
pixelJacobi[i] *= commonFirstTerm;
sJT[i] += pixelJacobi[i];
}
for (int i = 0; i < 6; i++) {
for (int j = i; j < 6; j++) {
sJTJ[i * 6 + j] += weightingTerm * pixelJacobi[i] * pixelJacobi[j];
}
}
}
}
}
This is the part where I call each thread:
vector<std::thread> myThreads;
float step = (roiY.y - roiY.x) / numberOfThreads;
vector<vector<float>> tsJT(numberOfThreads, vector<float>(6, 0));
vector<vector<float>> tsJTJ(numberOfThreads, vector<float>(36, 0));
for (int i = 0; i < numberOfThreads; i++) {
int start = roiY.x+i * step;
int end = start + step;
if (end > roiY.y)end = roiY.y;
myThreads.push_back(std::thread(&pwp3dV2::jacobiansThread, this,start,end,std::ref(tsJT[i]), std::ref(tsJTJ[i])));
}
vector<float> sJT(6, 0);
vector<float> sJTJ(36, 0);
for (int i = 0; i < numberOfThreads; i++)myThreads[i].join();
Other Notes
To measure time I used glfwGetTime() before and right after the second code snippet. The measurements vary but the average is about 15ms as I mentioned, for both implementations.
Starting a thread has significant overhead, which might not be worth the time if you have only 15 milliseconds worth of work.
The common solution is to keep threads running in the background and send them data when you need them, instead of calling the std::thread constructor to create a new thread every time you have some work to do.
Pure spectaculation but two things might be preventing the full power of parallelization.
Processing speed is limited by the memory bus. Cores will wait until data is loaded before continuing.
Data sharing between cores. Some caches are core specific. If memory is shared between cores, data must traverse down to shared cache before loading.
On Linux you can use Perf to check for cache misses.
if you wanna better time you need to split a cycle runs from a counter, for this you need to do some preprocessing. some fast stuff like make an array of structures with headers for each segment or so. if say you can't mind anything better you can just do vector<int> with values of a counter. Then do for_each(std::execution::par,...) on that. way much faster.
for timings there's
auto t2 = std::chrono::system_clock::now();
std::chrono::milliseconds f = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);

C++ Kinect v2 & freenect2: how to convert depth data to real world coordinates

I am trying to compute real world xyz coordinates using a Kinect v2 camera (in Linux), but my computation give me wrong results.
Here is the code:
cv::Point3f xyzWorld={0.0f};
xyzWorld.z = pointDepth;
xyzWorld.x = (float) ((float)x -(depthcx)) * xyzWorld.z / depthfx;
xyzWorld.y = (float) ((float)y - (depthcy)) * xyzWorld.z / depthfy;
xyzWorld.z = pointDepth;
return xyzWorld;
I think the problem is due to the depth value of fx, fy, cx and cy.
Can someone help me?
I am using freenect2.
Why not just use the OpenNi implementation
OniStatus VideoStream::convertDepthToWorldCoordinates(float depthX, float depthY, float depthZ, float* pWorldX, float* pWorldY, float* pWorldZ)
{
if (m_pSensorInfo->sensorType != ONI_SENSOR_DEPTH)
{
m_errorLogger.Append("convertDepthToWorldCoordinates: Stream is not from DEPTH\n");
return ONI_STATUS_NOT_SUPPORTED;
}
float normalizedX = depthX / m_worldConvertCache.resolutionX - .5f;
float normalizedY = .5f - depthY / m_worldConvertCache.resolutionY;
OniVideoMode videoMode;
int size = sizeof(videoMode);
getProperty(ONI_STREAM_PROPERTY_VIDEO_MODE, &videoMode, &size);
float const convertToMillimeters = (videoMode.pixelFormat == ONI_PIXEL_FORMAT_DEPTH_100_UM) ? 10.f : 1.f;
*pWorldX = (normalizedX * depthZ * m_worldConvertCache.xzFactor) / convertToMillimeters;
*pWorldY = (normalizedY * depthZ * m_worldConvertCache.yzFactor) / convertToMillimeters;
*pWorldZ = depthZ / convertToMillimeters;
return ONI_STATUS_OK;
}
and
OniStatus VideoStream::convertWorldToDepthCoordinates(float worldX, float worldY, float worldZ, float* pDepthX, float* pDepthY, float* pDepthZ)
{
if (m_pSensorInfo->sensorType != ONI_SENSOR_DEPTH)
{
m_errorLogger.Append("convertWorldToDepthCoordinates: Stream is not from DEPTH\n");
return ONI_STATUS_NOT_SUPPORTED;
}
*pDepthX = m_worldConvertCache.coeffX * worldX / worldZ + m_worldConvertCache.halfResX;
*pDepthY = m_worldConvertCache.halfResY - m_worldConvertCache.coeffY * worldY / worldZ;
*pDepthZ = worldZ;
return ONI_STATUS_OK;
}
and the world conversion cache :
void VideoStream::refreshWorldConversionCache()
{
if (m_pSensorInfo->sensorType != ONI_SENSOR_DEPTH)
{
return;
}
OniVideoMode videoMode;
int size = sizeof(videoMode);
getProperty(ONI_STREAM_PROPERTY_VIDEO_MODE, &videoMode, &size);
size = sizeof(float);
float horizontalFov;
float verticalFov;
getProperty(ONI_STREAM_PROPERTY_HORIZONTAL_FOV, &horizontalFov, &size);
getProperty(ONI_STREAM_PROPERTY_VERTICAL_FOV, &verticalFov, &size);
m_worldConvertCache.xzFactor = tan(horizontalFov / 2) * 2;
m_worldConvertCache.yzFactor = tan(verticalFov / 2) * 2;
m_worldConvertCache.resolutionX = videoMode.resolutionX;
m_worldConvertCache.resolutionY = videoMode.resolutionY;
m_worldConvertCache.halfResX = m_worldConvertCache.resolutionX / 2;
m_worldConvertCache.halfResY = m_worldConvertCache.resolutionY / 2;
m_worldConvertCache.coeffX = m_worldConvertCache.resolutionX / m_worldConvertCache.xzFactor;
m_worldConvertCache.coeffY = m_worldConvertCache.resolutionY / m_worldConvertCache.yzFactor;
}
struct WorldConversionCache
{
float xzFactor;
float yzFactor;
float coeffX;
float coeffY;
int resolutionX;
int resolutionY;
int halfResX;
int halfResY;
} m_worldConvertCache;
all taken from
OpenNI GitHub repository
The horizontal and vertical fov you can just get directly from the from the description of each frame.

C++ FFTW forward backward DFTvalues get wrapped

Hello StackOverflow community,
i have a problem with the dft algorithm of the fftw library.
All i want to do is to transform a certain pattern forward and backward to receive the input pattern again, of course there will be some sort of filtering in between the transformations later on.
So, what my program does atm is:
Create a test signal
Filter or "window" the test signal with a value of 1.0 or 0.5
Copy the test signal to a fftw_complex data type
Perform a forward and backward dft
Calculate the magnitude, which is called phase here
Copy and adjust data for display purposes, and finally display the images via OpenCV
My problem is that when is use no filtering my backward transformed image is wrapped somehow and i can't calculate the correct magnitude, which should be indentical to my input image / test signal.
When i set the fitler/"window" to a value of 0.5 the backward transformation works fine, but my input image is just half as bright as it should be.
The following image illustrates my problem: (from top left to bottom right)
1. Input signal, 2. Real part of backward transformation, 3. From backward transformated data calculated magnitude, 4. Input signal multiplied with 0.5, 5. Real part of backward transformation, 6. From backward transformated data calculated magnitude.
http://imageshack.com/a/img538/5426/nbL9YZ.png
Does anybody have an idea why the dft performs in that way?! It's kind of strange...
My code looks like this atm:
/***** parameters **************************************************************************/
int imSize = 256;
int imN = imSize * imSize;
char* interferogram = new char[imN];
double* spectrumReal = new double[imN];
double* spectrumImaginary = new double[imN];
double* outputReal = new double[imN];
double* outputImaginary = new double[imN];
double* phase = new double[imN];
char* spectrumRealChar = new char[imN];
char* spectrumImaginaryChar = new char[imN];
char* outputRealChar = new char[imN];
char* outputImaginaryChar = new char[imN];
char* phaseChar = new char[imN];
Mat interferogramMat = Mat(imSize, imSize, CV_8U, interferogram);
Mat spectrumRealCharMat = Mat(imSize, imSize, CV_8U, spectrumRealChar);
Mat spectrumImaginaryCharMat = Mat(imSize, imSize, CV_8U, spectrumImaginaryChar);
Mat outputRealCharMat = Mat(imSize, imSize, CV_8U, outputRealChar);
Mat outputImaginaryCharMat = Mat(imSize, imSize, CV_8U, outputImaginaryChar);
Mat phaseCharMat = Mat(imSize, imSize, CV_8U, phaseChar);
/***** compute interferogram ****************************************************************/
fill_n(interferogram, imN, 0);
double value = 0;
double window = 0;
for (int y = 0; y < imSize; y++)
{
for (int x = 0; x < imSize; x++)
{
value = 127.5 + 127.5 * cos((2*PI) / 10000 * (pow(double(x - imSize/2), 2) + pow(double(y - imSize/2), 2)));
window = 1;
value *= window;
interferogram[y * imSize + x] = (unsigned char)value;
}
}
/***** create fftw arays and plans **********************************************************/
fftw_complex* input;
fftw_complex* spectrum;
fftw_complex* output;
fftw_plan p_fw;
fftw_plan p_bw;
input = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * imN);
spectrum = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * imN);
output = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * imN);
p_fw = fftw_plan_dft_2d(imSize, imSize, input, spectrum, FFTW_FORWARD, FFTW_ESTIMATE);
p_bw = fftw_plan_dft_2d(imSize, imSize, spectrum, output, FFTW_BACKWARD, FFTW_ESTIMATE);
/***** copy data ****************************************************************************/
for (int i = 0; i < imN; i++)
{
input[i][0] = double(interferogram[i]) / 255.;
input[i][1] = 0.;
spectrum[i][0] = 0.;
spectrum[i][1] = 0.;
output[i][0] = 0.;
output[i][1] = 0.;
}
/***** FPS algorithm ************************************************************************/
fftw_execute(p_fw);
fftw_execute(p_bw);
for (int i = 0; i < imN; i++)
{
phase[i] = sqrt(pow(output[i][0], 2) + pow(output[i][1], 2));
}
/***** copy data ****************************************************************************/
for (int i = 0; i < imN; i++)
{
spectrumReal[i] = spectrum[i][0];
spectrumImaginary[i] = spectrum[i][1];
outputReal[i] = output[i][0] / imN;
outputImaginary[i] = output[i][1];
}
SaveCharImage(interferogram, imN, "01_interferogram_512px_8bit.raw");
SaveDoubleImage(spectrumReal, imN, "02_spectrum_real_512px_64bit.raw");
SaveDoubleImage(spectrumImaginary, imN, "03_spectrum_imaginary_512px_64bit.raw");
SaveDoubleImage(outputReal, imN, "03_output_real_512px_64bit.raw");
DoubleToCharArray(spectrumReal, spectrumRealChar, imSize);
DoubleToCharArray(spectrumImaginary, spectrumImaginaryChar, imSize);
DoubleToCharArray(outputReal, outputRealChar, imSize);
DoubleToCharArray(outputImaginary, outputImaginaryChar, imSize);
DoubleToCharArray(phase, phaseChar, imSize);
/***** show images **************************************************************************/
imshow("interferogram", interferogramMat);
imshow("spectrum real", spectrumRealCharMat);
imshow("spectrum imaginary", spectrumImaginaryCharMat);
imshow("out real", outputRealCharMat);
imshow("out imaginary", outputImaginaryCharMat);
imshow("phase", phaseCharMat);
int key = waitKey(0);
Here are some lines of your code :
char* interferogram = new char[imN];
...
double value = 0;
double window = 0;
for (int y = 0; y < imSize; y++)
{
for (int x = 0; x < imSize; x++)
{
value = 127.5 + 127.5 * cos((2*PI) / 10000 * (pow(double(x - imSize/2), 2) + pow(double(y - imSize/2), 2)));
window = 1;
value *= window;
interferogram[y * imSize + x] = (unsigned char)value;
}
}
The problem is that a char is between -128 and 127, while unsigned char ranges from 0 to 255. In interferogram[y * imSize + x] = (unsigned char)value;, there is an implicit cast to char.
It does not affect the output if window=0.5, but it triggers a change if window=1 as value becomes higher than 127. This is exactly the problem that you noticed in your question !
It does not affect the first displayed image since CV_8U corresponds to unsigned char : interferogram is therefore cast back into a unsigned char*. Take a look at Can I turn unsigned char into char and vice versa? to know more about char to unsigned char cast.
The problem occurs at input[i][0] = double(interferogram[i]) / 255.; : if window=1, interferogram[i] may be negative and input[i][0] becomes negative.
Change all char to unsigned char and it should solve the problem.
You may also change
outputReal[i] = output[i][0] / imN;
outputImaginary[i] = output[i][1];
for
outputReal[i] = output[i][0];
outputImaginary[i] = output[i][1];
Calls to fftw seems to be fine.

Skin Detection with Gaussian Mixture Models

I'm doing skin detection algorithm according to this article. There are two models at page 21: Mixture of Gaussian Skin and Non-skin Color Model.
The first model for skin detection works exellent.
There are examples:
1)Orginal image:
2) Skin mask
But the non-skin model gives wrong results:
Here is my code:
ipl_image_wrapper NudityDetector::filterPixelsWithGMM(const float covarinceMatrix[][3], const float meanMatrix[][3], const float weightVector[], const float probValue) const
{
ipl_image_wrapper mask = cvCreateImage(cvGetSize(m_image.get()), IPL_DEPTH_8U, 1);
double probability = 0.0;
float x[3] = { 0, 0, 0};
for(int i = 0; i < m_image.get()->height; ++i)
{
for(int j = 0; j < m_image.get()->width; ++j)
{
if (m_image.get()->nChannels == 3)
{
x[0] = (reinterpret_cast<uchar*>(m_image.get()->imageData + i * m_image.get()->widthStep))[j * 3 + 2];
x[1] = (reinterpret_cast<uchar*>(m_image.get()->imageData + i * m_image.get()->widthStep))[j * 3 + 1];
x[2] = (reinterpret_cast<uchar*>(m_image.get()->imageData + i * m_image.get()->widthStep))[j * 3];
double cov_det = 0.0;
double power = 0.0;
double A1 = 0.0;
double A2 = 0.0;
double A3 = 0.0;
probability = 0;
for (int k = 0; k < 16; ++k)
{
cov_det = covarinceMatrix[k][0] * covarinceMatrix[k][1] * covarinceMatrix[k][2];
A1 = covarinceMatrix[k][1] * covarinceMatrix[k][2];
A2 = covarinceMatrix[k][0] * covarinceMatrix[k][2];
A3 = covarinceMatrix[k][0] * covarinceMatrix[k][1];
power =(std::pow((x[0] - meanMatrix[k][0]), 2) * A1 +
std::pow((x[1] - meanMatrix[k][1]), 2) * A2 +
std::pow((x[2] - meanMatrix[k][2]), 2) * A3 ) / (2 * cov_det);
probability += 100 * weightVector[k] *std::exp(-power) / (std::pow(2 * M_PI, 3/2) * std::pow(cov_det, 1/2));
}
if ( probability < probValue)
{
(reinterpret_cast<uchar*>(mask.get()->imageData + i * mask.get()->widthStep))[j] = 0;
}
else
{
(reinterpret_cast<uchar*>(mask.get()->imageData + i * mask.get()->widthStep))[j] = 255;
}
}
}
}
cvDilate(mask.get(), mask.get(), NULL, 2);
cvErode(mask.get(), mask.get(), NULL, 1);
return mask;
}
ipl_image_wrapper NudityDetector::detectSkinWithGMM(const float probValue) const
{
//matrices are from article
ipl_image_wrapper mask = filterPixelsWithGMM(COVARIANCE_SKIN_MATRIX, MEAN_SKIN_MATRIX, SKIN_WEIGHT_VECTOR, probValue);
return mask;
}
ipl_image_wrapper NudityDetector::detectNonSkinWithGMM(const float probValue) const
{
//matrices are from article
ipl_image_wrapper mask = filterPixelsWithGMM(COVARIANCE_NON_SKIN_MATRIX, MEAN_NON_SKIN_MATRIX, NON_SKIN_WEIGHT_VECTOR, probValue);
return mask;
}
What I'm doing wrong? Maybe I misunderstand the meaning of tre article? Or I translated formula wrong in the code?
Thank you in advance!
In fact, there seems to be nothing wrong with the results, non-skin model correctly identifies non-skin regions as 255 and skin regions as 0. You may just need to tune parameter probValue to a lower value to get rid of some false negatives (small non-skin regions)
GMM may not be an effective approach for skin detection and you may employ some edge intensity information as a regularization parameter so that detected regions will not be fragmented.