I am acquiring multi-spectral data from specim camera, and can save it in 8-bit format without any issue. But when I am trying to save the same image in 16-bit format, the image is shrinking to half in one axis. I am using OpenCV on Linux. See my code and images (both 8 and 16 bits) below.
Why is this happening, and how do I fix it?
Mat frame(FX17_HEIGHT,FX17_WIDTH, CV_16UC1); //CV_8UC1 CV_16U
int i,j;
unsigned char *dest, *src;
src = (unsigned char*) pBase;
for( i = 0; i < FX17_HEIGHT; ++i) {
dest = frame.ptr<uchar>(i);
for ( j = 0; j < FX17_WIDTH; j++)
*(dest+j) = *(src + i*FX17_WIDTH+j);
}
ostringstream name;
QString filename = QFileDialog::getSaveFileName();
name << filename.toUtf8().constData() << "png";
imwrite(name.str() , frame);
I need to read pixels from two parts (with same width and height) of image ( e.g. squares ([0,0], [300, 300]) and ([400,0], [700,300])) and make difference for each pixel.
This is C (pseudo)code:
/**
* #param img Input image
* #param pos Integer position of top left corner of the second square (in this case 400)
*/
double getSum(Image& img, int pos)
{
const int width_of_cut = 300;
int right_bottom = pos + width;
Rgb first, second;
double ret_val = 0.0;
for(int i=0; i < width_of_cut; i++)
{
for(int j=0; j < width_of_cut; j++)
{
first = img.getPixel( i, j );
second = img.getPixel( i + pos, j );
ret_val += ( first.R - second.R ) +
( first.G - second.G ) +
( first.B - second.B );
}
}
return ret_val;
}
But my kernel (with same arguments and the __global float* output is set to 0.0 in host code) is giving me completely different values:
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
CLK_ADDRESS_CLAMP_TO_EDGE |
CLK_FILTER_NEAREST;
__kernel void getSum( __read_only image2d_t input,
const int x_coord,
__global float* output )
{
int width = get_image_width( input );
int height = get_image_height( input );
int2 pixelcoord = (int2) (get_global_id(0), get_global_id(1)); // image coordinates
const int width_of_cut = 300;
const int right_bottom = x_coord + width_of_cut;
int a,b;
a = (int)(pixelcoord.x + x_coord);
b = pixelcoord.y;
if( a < right_bottom && b < width_of_cut )
{
float4 first = read_imagef(input, sampler, pixelcoord);
float4 second = read_imagef(input, sampler, (int2)(a,b));
output[get_global_id(0)] += ((first.x - second.x) +
(first.y - second.y) +
(first.z - second.z));
}
}
I am new to OpenCL and I have no idea what am I doing wrong.
Update (1d image):
I changed the kernel code. Now I'm reading an 1d image in one loop, but I'm still not getting the correct values. I'm not sure that I know, how to read pixels from 1d image correctly.
__kernel void getSum( __read_only image1d_t input,
const int x_coord,
__global float* output,
const int img_width )
{
const int width_of_cut = 300;
int i = (int)(get_global_id(0));
for(int j=0; j < width_of_cut; j++)
{
int f = ( img_width*i + j );
int s = f + x_coord;
float4 first = read_imagef( input, sampler, f ); //pixel from 1st sq.
float4 second = read_imagef( input, sampler, s ); //pixel from 2nd sq.
output[get_global_id(0)] += ((first.x - second.x) +
(first.y - second.y) +
(first.z - second.z));
}
}
Race condition.
All vertical work items are accessing the same output memory (output[get_global_id(0)] +=) and not atomically. Therefore the result are likely incorrect (e.g., two threads read the same value, add something to it, and write it back. Only one wins).
If your device supports it, you could make this an atomic operation, but it would be slow. You'd be better off running a 1D kernel that has a loop accumulating these vertically (so, the j loop from your C example).
I have to convert a 24bpp image to a 1bpp image or 8bpp image based on color table. The caller expects a unsigned char* in either case (which would be further processed or maybe for now debug output by sending the BITMAPINFOHEADER.biBitCount to its proper value, 8 or 1).
I have code to extract the color index into the palette (colorIndexArray is from color conversion or dithering algorithms)... I can get the info for an 8bpp bitmap...
But my problem is, I don't know how to put this info into a 1bpp bitmap
typedef struct {
unsigned int size;
unsigned char* pixels;
} ColorIndexArray;
unsigned char* convertImage(const ColorIndexArray& colorIndexArray, unsigned int paletteSize)
{
unsigned char* outputImage;
if (paleteSize > 2)
{
outputImage = (unsigned char*)LocalAlloc(LPTR, colorIndexArray.size);
for (int i=0; i<colorIndexArray.size; i++)
*(outputImage+i) = colorIndexArray.pixels[i];
// this works great
}
else // monochrome, caller has palette colors likely b/w (or purple/magenta or anything), must be 1bpp
{
outputImage = (unsigned char*)LocalAlloc(LPTR, colorIndexArray.size / 8);
// how can i place the unsigned char* info (which is already
// determined based on desired algorithm, representing index in
// color table) into the output image inside a single bit ?
// (obviously its value for a monochrome image would be 0 or 1 but
// it is saved as unsigned char* at the algorithm output)
// And how do I advance the pointer ?
// Will it be type safe ? Aligned to byte ? or do I have to fill
// with something at the end to make multiple of 8 bits ?
}
return outputImage;
}
Trying this after comment suggestion:
#include <GdiPlus.h>
....
else {
Gdiplus::Bitmap monoBitmap(w, h, PixelFormat1bppIndexed);
Gdiplus::BitmapData monoBitmapData;
Gdiplus::Rect rect(0, 0, w, h);
monoBitmap.LockBits(&rect, Gdiplus::ImageLockModeWrite, PixelFormat1bppIndexed, &monoBitmapData);
outputImage = (unsigned char*)monoBitmapData.Scan0;
for (unsigned int y = 0; y < h; y++)
{
for (unsigned int x = 0; x < w; x++)
{
if (colorIndexArray.pixels[x + y * w])
outputImage[y*monoBitmapData.Stride + x / 8] |= (unsigned char)(0x80 >> (x % 8));
}
}
monoBitmap.UnlockBits(&monoBitmapData);
}
return outputImage;
(Also need to allocate the memory for outputImage)
Based on the example suggested by Hans Passant (thank you also for pointing out how important the stride is), I wrote this little conversion
unsigned long stride = (((w + 31) & ~31) >> 3);
outputImage = (unsigned char*)LocalAlloc(LPTR, stride * h);
for (unsigned int y = 0; y < h; y++)
{
unsigned char* b = (unsigned char*)LocalAlloc(LPTR, stride);
for (unsigned int x = 0; x < w; x++)
if (colorIndexArray.pixels[x + y * w])
b[x / 8] |= (unsigned char)(0x80 >> (x % 8));
CopyMemory(outputImage + stride * y, b, stride);
}
I am making an application that uses OCR and I am using OpenCV to threshold the image to improve the OCR results, I have gotten pretty good results but I want to know if anyone has any suggestions for improvement.
Here is what I've done so far:
// Convert to grayscale.
cv::cvtColor(cvMat, cvMat, CV_RGB2GRAY);
// Apply adaptive threshold.
cv::adaptiveThreshold(cvMat, cvMat, 255, CV_ADAPTIVE_THRESH_GAUSSIAN_C, CV_THRESH_BINARY, 3, 5);
// Attempt to sharpen the image.
cv::GaussianBlur(cvMat, cvMat, cv::Size(0, 0), 3);
cv::addWeighted(cvMat, 1.5, cvMat, -0.5, 0, cvMat);
Let me know if you have any suggestions to improve results, thanks.
Sample Images:
After:
One of the best algorithms for thresholding problem in the OCR field is sauvola method.You can use the below code.
#ifndef _THRESHOLDER
#define _THRESHOLDER
#include <cv.h>
#include "type.h"
using namespace cv;
enum class BhThresholdMethod{OTSU,NIBLACK,SAUVOLA,WOLFJOLION};
class BhThresholder
{
public :
void doThreshold(InputArray src ,OutputArray dst,const BhThresholdMethod &method);
private:
};
#endif //_THRESHOLDER
thresholder.cpp
#include "stdafx.h"
#define uget(x,y) at<unsigned char>(y,x)
#define uset(x,y,v) at<unsigned char>(y,x)=v;
#define fget(x,y) at<float>(y,x)
#define fset(x,y,v) at<float>(y,x)=v;
// *************************************************************
// glide a window across the image and
// create two maps: mean and standard deviation.
// *************************************************************
//#define BINARIZEWOLF_VERSION "2.3 (February 26th, 2013)"
double calcLocalStats (Mat &im, Mat &map_m, Mat &map_s, int win_x, int win_y) {
double m,s,max_s, sum, sum_sq, foo;
int wxh = win_x / 2;
int wyh = win_y / 2;
int x_firstth = wxh;
int y_lastth = im.rows-wyh-1;
int y_firstth= wyh;
double winarea = win_x*win_y;
max_s = 0;
for (int j = y_firstth ; j<=y_lastth; j++)
{
// Calculate the initial window at the beginning of the line
sum = sum_sq = 0;
for (int wy=0 ; wy<win_y; wy++)
for (int wx=0 ; wx<win_x; wx++) {
foo = im.uget(wx,j-wyh+wy);
sum += foo;
sum_sq += foo*foo;
}
m = sum / winarea;
s = sqrt ((sum_sq - (sum*sum)/winarea)/winarea);
if (s > max_s)
max_s = s;
map_m.fset(x_firstth, j, m);
map_s.fset(x_firstth, j, s);
// Shift the window, add and remove new/old values to the histogram
for (int i=1 ; i <= im.cols -win_x; i++) {
// Remove the left old column and add the right new column
for (int wy=0; wy<win_y; ++wy) {
foo = im.uget(i-1,j-wyh+wy);
sum -= foo;
sum_sq -= foo*foo;
foo = im.uget(i+win_x-1,j-wyh+wy);
sum += foo;
sum_sq += foo*foo;
}
m = sum / winarea;
s = sqrt ((sum_sq - (sum*sum)/winarea)/winarea);
if (s > max_s)
max_s = s;
map_m.fset(i+wxh, j, m);
map_s.fset(i+wxh, j, s);
}
}
return max_s;
}
void NiblackSauvolaWolfJolion (InputArray _src, OutputArray _dst,const BhThresholdMethod &version,int winx, int winy, double k, double dR) {
Mat src = _src.getMat();
Mat dst = _dst.getMat();
double m, s, max_s;
double th=0;
double min_I, max_I;
int wxh = winx/2;
int wyh = winy/2;
int x_firstth= wxh;
int x_lastth = src.cols-wxh-1;
int y_lastth = src.rows-wyh-1;
int y_firstth= wyh;
int mx, my;
// Create local statistics and store them in a double matrices
Mat map_m = Mat::zeros (src.size(), CV_32FC1);
Mat map_s = Mat::zeros (src.size(), CV_32FC1);
max_s = calcLocalStats (src, map_m, map_s, winx, winy);
minMaxLoc(src, &min_I, &max_I);
Mat thsurf (src.size(), CV_32FC1);
// Create the threshold surface, including border processing
// ----------------------------------------------------
for (int j = y_firstth ; j<=y_lastth; j++) {
// NORMAL, NON-BORDER AREA IN THE MIDDLE OF THE WINDOW:
for (int i=0 ; i <= src.cols-winx; i++) {
m = map_m.fget(i+wxh, j);
s = map_s.fget(i+wxh, j);
// Calculate the threshold
switch (version) {
case BhThresholdMethod::NIBLACK:
th = m + k*s;
break;
case BhThresholdMethod::SAUVOLA:
th = m * (1 + k*(s/dR-1));
break;
case BhThresholdMethod::WOLFJOLION:
th = m + k * (s/max_s-1) * (m-min_I);
break;
default:
cerr << "Unknown threshold type in ImageThresholder::surfaceNiblackImproved()\n";
exit (1);
}
thsurf.fset(i+wxh,j,th);
if (i==0) {
// LEFT BORDER
for (int i=0; i<=x_firstth; ++i)
thsurf.fset(i,j,th);
// LEFT-UPPER CORNER
if (j==y_firstth)
for (int u=0; u<y_firstth; ++u)
for (int i=0; i<=x_firstth; ++i)
thsurf.fset(i,u,th);
// LEFT-LOWER CORNER
if (j==y_lastth)
for (int u=y_lastth+1; u<src.rows; ++u)
for (int i=0; i<=x_firstth; ++i)
thsurf.fset(i,u,th);
}
// UPPER BORDER
if (j==y_firstth)
for (int u=0; u<y_firstth; ++u)
thsurf.fset(i+wxh,u,th);
// LOWER BORDER
if (j==y_lastth)
for (int u=y_lastth+1; u<src.rows; ++u)
thsurf.fset(i+wxh,u,th);
}
// RIGHT BORDER
for (int i=x_lastth; i<src.cols; ++i)
thsurf.fset(i,j,th);
// RIGHT-UPPER CORNER
if (j==y_firstth)
for (int u=0; u<y_firstth; ++u)
for (int i=x_lastth; i<src.cols; ++i)
thsurf.fset(i,u,th);
// RIGHT-LOWER CORNER
if (j==y_lastth)
for (int u=y_lastth+1; u<src.rows; ++u)
for (int i=x_lastth; i<src.cols; ++i)
thsurf.fset(i,u,th);
}
cerr << "surface created" << endl;
for (int y=0; y<src.rows; ++y)
for (int x=0; x<src.cols; ++x)
{
if (src.uget(x,y) >= thsurf.fget(x,y))
{
dst.uset(x,y,255);
}
else
{
dst.uset(x,y,0);
}
}
}
void BhThresholder::doThreshold(InputArray _src ,OutputArray _dst,const BhThresholdMethod &method)
{
Mat src = _src.getMat();
int winx = 0;
int winy = 0;
float optK=0.5;
if (winx==0 || winy==0) {
winy = (int) (2.0 * src.rows - 1)/3;
winx = (int) src.cols-1 < winy ? src.cols-1 : winy;
// if the window is too big, than we asume that the image
// is not a single text box, but a document page: set
// the window size to a fixed constant.
if (winx > 100)
winx = winy = 40;
}
// Threshold
_dst.create(src.size(), CV_8UC1);
Mat dst = _dst.getMat();
//medianBlur(src,dst,5);
GaussianBlur(src,dst,Size(5,5),0);
//#define _BH_SHOW_IMAGE
#ifdef _BH_DEBUG
#define _BH_SHOW_IMAGE
#endif
//medianBlur(src,dst,7);
switch (method)
{
case BhThresholdMethod::OTSU :
threshold(dst,dst,128,255,CV_THRESH_OTSU);
break;
case BhThresholdMethod::SAUVOLA :
case BhThresholdMethod::WOLFJOLION :
NiblackSauvolaWolfJolion (src, dst, method, winx, winy, optK, 128);
}
bitwise_not(dst,dst);
#ifdef _BH_SHOW_IMAGE
#undef _BH_SHOW_IMAGE
#endif
}
Here is comparsion table for thresholding methods: http://clweb.csa.iisc.ernet.in/rahulsharma/binarize/set1.php?id=set1%2Fimage00b
A few thoughts:
Since you're starting with a rectangular object that may be viewed at a non-normal angle, use an affine transform to warp the image so that it appears rectangular with right angle corners.
Before the affine transform, you should probably remove barrel distortion (the curviness of the card edges).
Consider using an adaptive threshold rather than a simple global binarization threshold.
If you can find a proper OCR algorithm that doesn't require binary images, use that. Although binarization will work well for black text on a white background, in general binarization presents a lot of problems if you want to achieve high accuracy (i.e., character recognition approaching 98%+ for arbitrary strings of characters)
Try to sample with better resolution.
I'm trying to code a FFT/IFFT function with FFTW 3.3 and OpenCV 2.1 using the C++ interface. I've seen a lot of examples using the old OpenCV formats and I did a direct conversion, but something doesn't work.
The objective of my function is to return a Mat object with the real part and the imaginary part of the FFT, like dft default OpenCV function does. Here is the code of the function. Program gets blocked with memory problem in the lines that copy im_data to data_in.
Does somebody know what am I doing wrong? Thank you
Mat fft_sr(Mat& I)
{
double *im_data;
double *realP_data;
double *imP_data;
fftw_complex *data_in;
fftw_complex *fft;
fftw_plan plan_f;
int width = I.cols;
int height = I.rows;
int step = I.step;
int i, j, k;
Mat realP=Mat::zeros(height,width,CV_64F); // Real Part FFT
Mat imP=Mat::zeros(height,width,CV_64F); // Imaginary Part FFT
im_data = ( double* ) I.data;
realP_data = ( double* ) realP.data;
imP_data = ( double* ) imP.data;
data_in = ( fftw_complex* )fftw_malloc( sizeof( fftw_complex ) * width * height );
fft = ( fftw_complex* )fftw_malloc( sizeof( fftw_complex ) * width * height );
// Problem Here
for( i = 0, k = 0 ; i < height ; i++ ) {
for( j = 0 ; j < width ; j++ ) {
data_in[k][0] = ( double )im_data[i * step + j];
data_in[k][1] = ( double )0.0;
k++;
}
}
plan_f = fftw_plan_dft_2d( height, width, data_in, fft, FFTW_FORWARD, FFTW_ESTIMATE );
fftw_execute( plan_f );
// Copy real and imaginary data
for( i = 0, k = 0 ; i < height ; i++ ) {
for( j = 0 ; j < width ; j++ ) {
realP_data[i * step + j] = ( double )fft[k][0];
imP_data[i * step + j] = ( double )fft[k][1];
k++;
}
}
Mat fft_I(I.size(),CV_64FC2);
Mat fftplanes[] = {Mat_<double>(realP), Mat_<double>(imP)};
merge(fftplanes, 2, fft_I);
fftw_destroy_plan(plan_f);
fftw_free(data_in);
fftw_free(fft);
return fft_I;
}
You are using step wrong. It is meant to index into Mat::data. Since you already casted Mat::data to double* when assigning it to im_data, you can index into im_data "normally":
data_in[k][0] = im_data[i * width + j];
When using step the correct way to index is:
data_in[k][0] = ( double )I.data[i * step + j];
Update:
Try to access your images row-wise. That way you avoid running into problems with stride/step, while still exploiting fast access:
for (int i = 0; i < I.rows; i++)
{
double* row = I.ptr<double>(i);
for (int j = 0; j < I.cols; j++)
{
// Do something with the current pixel.
double someValue = row[j];
}
}
I know this is old but when you are using fftw you need to initialize fftw_complex *data_in
only after creating the plan for the fft, if i recall correctly when you create the plan it sets all the
*data_in values to 0.
so allocate before the plan and initialize after!
Statement
im_data = ( double* ) I.data;
defines im_data as double pointer to image data.
I think that should be mandatory that I was a double values image.