I'm working on a C++ code for image manipulation that works pixel-by-pixel (using Magick++), and I want to use it with OpenMP, but I have the next issue:
Magick: Semaphore operation failed (unable to destroy semaphore) [Dispositivo o recurso ocupado].
img_test: magick/pixel_cache.c:2765: ModifyCache: La declaración `image->cache != (Cache) ((void *)0)' no se cumple.
And, also, it keeps stuck in an infinite loop.
Here is the code snippet:
int main(int argc,char **argv)
{
InitializeMagick(*argv);
Image img1, img2;
img1.read(argv[1]);
img2.read(argv[2]);
int sx = img1.columns();
int sy = img1.rows();
Image out;
out.size(Geometry(sx,sy));
cout << "Processing pictures..." << endl;
int iy;
#pragma omp for private(iy)
for (iy=0;iy<sy;iy++)
{
#pragma omp parallel for
for (int ix=0;ix<sx;ix++)
{
double _r = 0.0, _g = 0.0, _b = 0.0;
ColorRGB ppix1(img1.pixelColor(ix,iy));
ColorRGB ppix2(img2.pixelColor(ix,iy));
// do some image processing...
ColorRGB opix(_r*MaxRGB,_g*MaxRGB,_b*MaxRGB);
out.pixelColor(ix,iy,opix);
}
}
out.write("Output.png");
}
Is there a way to solve this?
Is there a way to solve this?
For this example, you would want to use schedule ordered.
cout << "Processing pictures..." << endl;
int iy;
#pragma omp for schedule(static) ordered
for (iy=0;iy<sy;iy++)
{
#pragma omp ordered
for (int ix=0;ix<sx;ix++)
{
double _r = 0.0, _g = 0.0, _b = 0.0;
ColorRGB ppix1(img1.pixelColor(ix,iy));
ColorRGB ppix2(img2.pixelColor(ix,iy));
// do some image processing...
ColorRGB opix(_r*MaxRGB,_g*MaxRGB,_b*MaxRGB);
out.pixelColor(ix,iy,opix);
}
}
out.write("Output.png");
Edit
If you do want to work with low-level pixel information across parallel, #NoseKnowsAll is correct for a single region of iy. However you'll run into issues calling out.pixelColor as the internal cache may fall out of sync. I would suggest export the pixels data, perform work in parallel, and import the final results.
// Allocate three buffers the total size of x * y * RGB
double * buffer1 = new double[sx * sy * 3];
double * buffer2 = new double[sx * sy * 3];
double * buffer3 = new double[sx * sy * 3];
// Write pixel data to first two buffers
img1.write(0,0, sx, sy, "RGB", DoublePixel, buffer1);
img2.write(0,0, sx, sy, "RGB", DoublePixel, buffer2);
cout << "Processing pictures..." << endl;
int iy;
#pragma omp parallel for
for (iy=0;iy<sy;iy++)
{
for (int ix=0;ix<sx;ix++)
{
// Find where in buffer the current pixel is located at
size_t idx = (iy * sx + ix) * 3;
// For fun, let's alternate which source to assing to the
// third buffer.
if ((iy % 2 && ix % 2) || (!(iy % 2) && !(ix % 2))) {
buffer3[idx+0] = buffer1[idx+0]; // R
buffer3[idx+1] = buffer1[idx+1]; // G
buffer3[idx+2] = buffer1[idx+2]; // B
} else {
buffer3[idx+0] = buffer2[idx+0]; // R
buffer3[idx+1] = buffer2[idx+1]; // G
buffer3[idx+2] = buffer2[idx+2]; // B
}
}
}
// Import the third buffer into out Image
out.read(sx, sy, "RGB", DoublePixel, buffer3);
out.write("Output.png");
YMMV
Related
Below is the current code that I am working with. When I comment out the code to run the progress_bar function, code works perfectly as expected with the mandelbrot printed out into a seperate image file. Yet for whatever reason, when I try to include the function it screeches everything to a halt when the rest of the program has finished up. How do I include the progress_bar function without the program locking into a pseudo-deadlock state? Any and all help is appreciated.
#include <cstdint>
#include <cstdlib>
#include <complex>
#include <fstream>
#include <iostream>
#include <vector>
#include <thread>
#include <mutex>
// Import things we need from the standard library
using std::chrono::duration_cast;
using std::chrono::milliseconds;
using std::complex;
using std::cout;
using std::endl;
using std::ofstream;
// Define the alias "the_clock" for the clock type we're going to use.
typedef std::chrono::steady_clock the_clock;
// The size of the image to generate.
const int WIDTH = 1920;
const int HEIGHT = 1200;
// The number of times to iterate before we assume that a point isn't in the
// Mandelbrot set.
// (You may need to turn this up if you zoom further into the set.)
const int MAX_ITERATIONS = 500;
// The image data.
// Each pixel is represented as 0xRRGGBB.
uint32_t image[HEIGHT][WIDTH];
double progress;
bool progressDone = false;
std::mutex locking;
std::condition_variable conditionMet;
int partsDone = 0;
// Write the image to a TGA file with the given name.
// Format specification: http://www.gamers.org/dEngine/quake3/TGA.txt
void progress_bar() {
std::unique_lock<std::mutex> lock(locking);
while (!progressDone) {
//std::this_thread::sleep_for(std::chrono::nanoseconds(100));
cout << "Current Progress is at: " << progress << "%\n";
conditionMet.wait(lock);
}
cout << "Mandelbrot is finished! Take a look.";
}
void write_tga(const char *filename)
{
ofstream outfile(filename, ofstream::binary);
uint8_t header[18] = {
0, // no image ID
0, // no colour map
2, // uncompressed 24-bit image
0, 0, 0, 0, 0, // empty colour map specification
0, 0, // X origin
0, 0, // Y origin
WIDTH & 0xFF, (WIDTH >> 8) & 0xFF, // width
HEIGHT & 0xFF, (HEIGHT >> 8) & 0xFF, // height
24, // bits per pixel
0, // image descriptor
};
outfile.write((const char *)header, 18);
for (int y = 0; y < HEIGHT; ++y)
{
for (int x = 0; x < WIDTH; ++x)
{
uint8_t pixel[3] = {
image[y][x] & 0xFF, // blue channel
(image[y][x] >> 8) & 0xFF, // green channel
(image[y][x] >> 16) & 0xFF, // red channel
};
outfile.write((const char *)pixel, 3);
}
}
outfile.close();
if (!outfile)
{
// An error has occurred at some point since we opened the file.
cout << "Error writing to " << filename << endl;
exit(1);
}
}
// Render the Mandelbrot set into the image array.
// The parameters specify the region on the complex plane to plot.
void compute_mandelbrot(double left, double right, double top, double bottom, double start, double finish)
{
for (int y = start; y < finish; ++y)
{
for (int x = 0; x < WIDTH; ++x)
{
// Work out the point in the complex plane that
// corresponds to this pixel in the output image.
complex<double> c(left + (x * (right - left) / WIDTH),
top + (y * (bottom - top) / HEIGHT));
// Start off z at (0, 0).
complex<double> z(0.0, 0.0);
// Iterate z = z^2 + c until z moves more than 2 units
// away from (0, 0), or we've iterated too many times.
int iterations = 0;
while (abs(z) < 2.0 && iterations < MAX_ITERATIONS)
{
z = (z * z) + c;
++iterations;
}
if (iterations == MAX_ITERATIONS)
{
// z didn't escape from the circle.
// This point is in the Mandelbrot set.
image[y][x] = 0x000000; // black
}
else if (iterations == 0) {
image[y][x] = 0xFFFFFF;
}
else
{
// z escaped within less than MAX_ITERATIONS
// iterations. This point isn't in the set.
image[y][x] = 0xFFFFFF; // white
image[y][x] = 16711680 | iterations << 8 | iterations;
}
std::unique_lock<std::mutex> lock(locking);
progress += double((1.0 / (WIDTH*HEIGHT)) * 100.0);
conditionMet.notify_one();
}
}
partsDone += 1;
}
int main(int argc, char *argv[])
{
cout << "Please wait..." << endl;
// Start timing
std::vector<std::thread*> threads;
the_clock::time_point start = the_clock::now();
std::thread progressive(progress_bar);
for (int slice = 0; slice < 2; slice++) {
// This shows the whole set.
threads.push_back(new std::thread(compute_mandelbrot, -2.0, 1.0, 1.125, -1.125, HEIGHT * (slice / 2), HEIGHT * ((slice + 1) / 2)));
// This zooms in on an interesting bit of detail.
//compute_mandelbrot(-0.751085, -0.734975, 0.118378, 0.134488, 0, HEIGHT/16);
}
// Stop timing
for (std::thread* t : threads) {
t->join();
delete t;
}
if (partsDone == 2) {
progressDone = true;
}
progressive.join();
the_clock::time_point end = the_clock::now();
// Compute the difference between the two times in milliseconds
auto time_taken = duration_cast<milliseconds>(end - start).count();
cout << "Computing the Mandelbrot set took " << time_taken << " ms." << endl;
write_tga("output.tga");
std::this_thread::sleep_for(milliseconds(3000));
return 0;
}```
The reason for your non-termination is that no-one notifies the progress bar thread after all the work has finished. Add conditionMet.notify_one(); before the call to progressive.join(). I've ommitted IO to be able to run in an online compiler in the following Demo. Also (as #GoswinvonBrederlow mentions in the comments) make sure to turn partsDone into std::atomic because if >1 threads call partsDone += 1 you'll end up with undefined results and in turn you won't be able to tell if you program is finished.
This would all look simpler if you changed progress to std::atomic and had your progress printer just load the variable in say 100ms intervals (and printed on top of the previous line). Then all you'd need would be the progressDone flag instead of locking and printing for every modification of the progress value. You can see in the following Demo that this runs with zero thread sanitizer warnings. Make sure to adjust the printing interval. This change drops the runtime from ~10.7s to 7s, even though that is just an indication - it's not kosher to time your programs with the thread sanitizer on.
I have tried adding #pragma parallel omp for to this raytracing program and am measuring the same/very similar amount of processing time with and without the pragma statement.
This is the function:
void Scene::SaveImage(
const char *outPngFileName,
int pixelsWide,
int pixelsHigh,
double zoom,
int antiAliasFactor) const
{
// Oversample the image using the anti-aliasing factor.
const int largePixelsWide = antiAliasFactor * pixelsWide;
const int largePixelsHigh = antiAliasFactor * pixelsHigh;
const int smallerDim =
((pixelsWide < pixelsHigh) ? pixelsWide : pixelsHigh);
const double largeZoom = antiAliasFactor * zoom * smallerDim;
ImageBuffer buffer(largePixelsWide, largePixelsHigh, backgroundColor);
// The camera is located at the origin.
Vector camera(0.0, 0.0, 0.0);
// The camera faces in the -z direction.
// This allows the +x direction to be to the right,
// and the +y direction to be upward.
Vector direction(0.0, 0.0, -1.0);
const Color fullIntensity(1.0, 1.0, 1.0);
// We keep a list of (i,j) screen coordinates for pixels
// we are not able to trace definitive rays for.
// Later we will come back and fix these pixels.
PixelList ambiguousPixelList;
#pragma omp parallel for
for (int i=0; i < largePixelsWide; ++i)
{
direction.x = (i - largePixelsWide/2.0) / largeZoom;
for (int j = 0; j < largePixelsHigh; ++j)
{
direction.y = (largePixelsHigh / 2.0 - j) / largeZoom;
#if RAYTRACE_DEBUG_POINTS
{
using namespace std;
// Assume no active debug point unless we find one below.
activeDebugPoint = NULL;
DebugPointList::const_iterator iter = debugPointList.begin();
DebugPointList::const_iterator end = debugPointList.end();
for(; iter != end; ++iter)
{
if ((iter->iPixel == i) && (iter->jPixel == j))
{
cout << endl;
cout << "Hit breakpoint at (";
cout << i << ", " << j <<")" << endl;
activeDebugPoint = &(*iter);
break;
}
}
}
#endif
PixelData& pixel = buffer.Pixel(i,j);
try
{
// Trace a ray from the camera toward the given direction
// to figure out what color to assign to this pixel.
pixel.color = TraceRay(
camera,
direction,
ambientRefraction,
fullIntensity,
0);
}
catch (AmbiguousIntersectionException)
{
// Getting here means that somewhere in the recursive
// code for tracing rays, there were multiple
// intersections that had minimum distance from a
// vantage point. This can be really bad,
// for example causing a ray of light to reflect
// inward into a solid.
// Mark the pixel as ambiguous, so that any other
// ambiguous pixels nearby know not to use it.
pixel.isAmbiguous = true;
// Keep a list of all ambiguous pixel coordinates
// so that we can rapidly enumerate through them
// in the disambiguation pass.
ambiguousPixelList.push_back(PixelCoordinates(i, j));
}
}
}
#if RAYTRACE_DEBUG_POINTS
// Leave no chance of a dangling pointer into debug points.
activeDebugPoint = NULL;
#endif
// Go back and "heal" ambiguous pixels as best we can.
PixelList::const_iterator iter = ambiguousPixelList.begin();
PixelList::const_iterator end = ambiguousPixelList.end();
for (; iter != end; ++iter)
{
const PixelCoordinates& p = *iter;
ResolveAmbiguousPixel(buffer, p.i, p.j);
}
// We want to scale the arbitrary range of
// color component values to the range 0..255
// allowed by PNG format. We therefore find
// the maximum red, green, or blue value anywhere
// in the image.
const double max = buffer.MaxColorValue();
// Downsample the image buffer to an integer array of RGBA
// values that LodePNG understands.
const unsigned char OPAQUE_ALPHA_VALUE = 255;
const unsigned BYTES_PER_PIXEL = 4;
// The number of bytes in buffer to be passed to LodePNG.
const unsigned RGBA_BUFFER_SIZE =
pixelsWide * pixelsHigh * BYTES_PER_PIXEL;
std::vector<unsigned char> rgbaBuffer(RGBA_BUFFER_SIZE);
unsigned rgbaIndex = 0;
const double patchSize = antiAliasFactor * antiAliasFactor;
//#pragma parallel for collapse(3)
for (int j=0; j < pixelsHigh; ++j)
{
for (int i=0; i < pixelsWide; ++i)
{
Color sum(0.0, 0.0, 0.0);
for (int di=0; di < antiAliasFactor; ++di)
{
//#pragma parallel omp for reduction(+:sum)
for (int dj=0; dj < antiAliasFactor; ++dj)
{
sum += buffer.Pixel(
antiAliasFactor*i + di,
antiAliasFactor*j + dj).color;
}
}
sum /= patchSize;
// Convert to integer red, green, blue, alpha values,
// all of which must be in the range 0..255.
rgbaBuffer[rgbaIndex++] = ConvertPixelValue(sum.red, max);
rgbaBuffer[rgbaIndex++] = ConvertPixelValue(sum.green, max);
rgbaBuffer[rgbaIndex++] = ConvertPixelValue(sum.blue, max);
rgbaBuffer[rgbaIndex++] = OPAQUE_ALPHA_VALUE;
}
}
// Write the PNG file
const unsigned error = lodepng::encode(
outPngFileName,
rgbaBuffer,
pixelsWide,
pixelsHigh);
// If there was an encoding error, throw an exception.
if (error != 0)
{
std::string message = "PNG encoder error: ";
message += lodepng_error_text(error);
throw ImagerException(message.c_str());
}
}
// The following function searches through all solid objects
// for the first solid (if any) that contains the given point.
// In the case of ties, the solid that was inserted into the
// scene first wins. This arbitrary convention allows the
// composer of a scene to decide which of multiple overlapping
// objects should control the index of refraction for any
// overlapping volumes of space.
const SolidObject* Scene::PrimaryContainer(const Vector& point) const
{
SolidObjectList::const_iterator iter = solidObjectList.begin();
SolidObjectList::const_iterator end = solidObjectList.end();
for (; iter != end; ++iter)
{
const SolidObject* solid = *iter;
if (solid->Contains(point))
{
return solid;
}
}
return NULL;
}
This is the snippet I added it to:
#pragma omp parallel for
for (int i=0; i < largePixelsWide; ++i)
{
direction.x = (i - largePixelsWide/2.0) / largeZoom;
for (int j = 0; j < largePixelsHigh; ++j)
{
direction.y = (largePixelsHigh / 2.0 - j) / largeZoom;
I have made sure all of my VS17 settings are correct and -fopenmp main.cpp is on command arguments. Additional information: This is in another file called scene.cpp, omp header file is added to scene.cpp.
My questions are how can I get this to work and if there are other parts where I can add it where/how would I go about doing it. Thank you.
I want to implement 2D convolution function in C++ by myself, without using filter2D(). I'm trying to iterate all pixels of input image and kernel, then, assign new value to each pixel of dst.
However, I got this error.
Thread 1: EXC_BAD_ACCESS (code=1, address=0x0)
I found that this error tells I'm accessing nullptr, but I could not solve the problem. Here is my c++ code.
cv::Mat_<float> spatialConvolution(const cv::Mat_<float>& src, const cv::Mat_<float>& kernel)
{
// declare variables
Mat_<float> dst;
Mat_<float> flipped_kernel;
float tmp = 0.0;
// flip kernel
flip(kernel, flipped_kernel, -1);
// multiply and integrate
// input rows
for(int i=0;i<src.rows;i++){
// input columns
for(int j=0;j<src.cols;j++){
// kernel rows
for(int k=0;k<flipped_kernel.rows;k++){
// kernel columns
for(int l=0;l<flipped_kernel.cols;l++){
tmp += src.at<float>(i,j) * flipped_kernel.at<float>(k,l);
}
}
dst.at<float>(i,j) = tmp;
}
}
return dst.clone();
}
To simplify let's suppose you have kernel 3x3
k(0,0) k(0,1) k(0,2)
k(1,0) k(1,1) k(1,2)
k(2,0) k(2,1) k(2,2)
to calculate convolution you are scanning input image (marked as I) from left to fright, from top to bottom
and for every pixel of input image you assign one value calculated from the formula below:
newValue(y,x) = I(y-1,x-1) * k(0,0) + I(y-1,x) * k(0,1) + I(y-1,x+1) * k(0,2)
+ I(y,x-1) * k(1,0) + I(y,x) * k(1,1) + I(y,x+1) * k(1,2) +
+ I(y+1,x-1) * k(2,0) + I(y+1,x) * k(2,1) + I(y+1,x+1) * k(2,2)
------------------x------------>
|
|
| [k(0,0) k(0,1) k(0,2)]
y [k(1,0) k(1,1) k(1,2)]
| [k(2,0) k(2,1) k(2,2)]
|
(y,x) of input Image (I) is anchor point of kernel, to assign new value to I(y,x)
you need to multiply every k coefficient by corresponding point of I - your code doesn't do it.
First you need to create dst matrix with dimenstion as original image, and the same type of pixel.
Then you need to rewrite your loops to reflect formula described above:
cv::Mat_<float> spatialConvolution(const cv::Mat_<float>& src, const cv::Mat_<float>& kernel)
{
Mat dst(src.rows,src.cols,src.type());
Mat_<float> flipped_kernel;
flip(kernel, flipped_kernel, -1);
const int dx = kernel.cols / 2;
const int dy = kernel.rows / 2;
for (int i = 0; i<src.rows; i++)
{
for (int j = 0; j<src.cols; j++)
{
float tmp = 0.0f;
for (int k = 0; k<flipped_kernel.rows; k++)
{
for (int l = 0; l<flipped_kernel.cols; l++)
{
int x = j - dx + l;
int y = i - dy + k;
if (x >= 0 && x < src.cols && y >= 0 && y < src.rows)
tmp += src.at<float>(y, x) * flipped_kernel.at<float>(k, l);
}
}
dst.at<float>(i, j) = saturate_cast<float>(tmp);
}
}
return dst.clone();
}
Your memory access error is presumably happening due to the line:
dst.at<float>(i,j) = tmp;
because dst is not initialized. You can't assign something to that index of the matrix if it has no size/data. Instead, initialize the matrix first, as Mat_<float> is a declaration, not an initialization. Use one of the initializations where you can specify a cv::Size or the rows/columns from the different constructors for Mat (see the docs). For example, you can initialize dst with:
Mat dst{src.size(), src.type()};
I am attempting to find Pedestriants/People in images with the help of a cascade classifier which uses HOG as features.
The problem I'm trying to solve is in the initial stage, feature generation.
Where the HOG values in certain areas of the images are too low and hence the classifier fails.
The images below were captured using a Basler aca640-100gc Camera.
The visualization of the HOG was borrowed from the code in the webpage. Code also attached in the end of the question.
This first image here and its HOG is what I'm trying to achieve.
A realistic outdoor scene which can be used to generate features and hopefully find people. This is not what I have captured using my camera.
Captured Outdoor Images results
The images below are what I have created with the camera. I have tried all basic variations where I have played with the brightness and Focus But this still yeilds a poor result in an outdoor scene. Where I am inside the car and the camera is attached close to the windscreen.
But on the Contrary when the same camera was used to record indoor scene It works fine. Why it works when its in an indoor situtation and why not in an outdoor scene is something I can't understand.
Captured Indoor Images results
As seen in the images below same configuration works for an indoor scene.
Desired results
Ideally I would like results of the out door recordings to look like so.
Could anyone give me insight why this happens?
or How I can over come this issue to generate reliable HOGs for detection?
Code to visualize HOG
Mat img_raw = imread("C:\\testimg.png", 1); // load as color image
resize(img_raw, img_raw, Size(64,128) );
Mat img;
cvtColor(img_raw, img, CV_RGB2GRAY);
HOGDescriptor d;
// Size(128,64), //winSize
// Size(16,16), //blocksize
// Size(8,8), //blockStride,
// Size(8,8), //cellSize,
// 9, //nbins,
// 0, //derivAper,
// -1, //winSigma,
// 0, //histogramNormType,
// 0.2, //L2HysThresh,
// 0 //gammal correction,
// //nlevels=64
//);
// void HOGDescriptor::compute(const Mat& img, vector<float>& descriptors,
// Size winStride, Size padding,
// const vector<Point>& locations) const
vector<float> descriptorsValues;
vector<Point> locations;
d.compute( img, descriptorsValues, Size(8,8), Size(8,8), locations);
cout << "HOG descriptor size is " << d.getDescriptorSize() << endl;
cout << "img dimensions: " << img.cols << " width x " << img.rows << "height" << endl;
cout << "Found " << descriptorsValues.size() << " descriptor values" << endl;
cout << "Nr of locations specified : " << locations.size() << endl;
Mat get_hogdescriptor_visual_image(Mat& origImg,
vector<float>& descriptorValues,
Size winSize,
Size cellSize,
int scaleFactor,
double viz_factor)
{
Mat visual_image;
resize(origImg, visual_image, Size(origImg.cols*scaleFactor, origImg.rows*scaleFactor));
int gradientBinSize = 9;
// dividing 180° into 9 bins, how large (in rad) is one bin?
float radRangeForOneBin = 3.14/(float)gradientBinSize;
// prepare data structure: 9 orientation / gradient strenghts for each cell
int cells_in_x_dir = winSize.width / cellSize.width;
int cells_in_y_dir = winSize.height / cellSize.height;
int totalnrofcells = cells_in_x_dir * cells_in_y_dir;
float*** gradientStrengths = new float**[cells_in_y_dir];
int** cellUpdateCounter = new int*[cells_in_y_dir];
for (int y=0; y<cells_in_y_dir; y++)
{
gradientStrengths[y] = new float*[cells_in_x_dir];
cellUpdateCounter[y] = new int[cells_in_x_dir];
for (int x=0; x<cells_in_x_dir; x++)
{
gradientStrengths[y][x] = new float[gradientBinSize];
cellUpdateCounter[y][x] = 0;
for (int bin=0; bin<gradientBinSize; bin++)
gradientStrengths[y][x][bin] = 0.0;
}
}
// nr of blocks = nr of cells - 1
// since there is a new block on each cell (overlapping blocks!) but the last one
int blocks_in_x_dir = cells_in_x_dir - 1;
int blocks_in_y_dir = cells_in_y_dir - 1;
// compute gradient strengths per cell
int descriptorDataIdx = 0;
int cellx = 0;
int celly = 0;
for (int blockx=0; blockx<blocks_in_x_dir; blockx++)
{
for (int blocky=0; blocky<blocks_in_y_dir; blocky++)
{
// 4 cells per block ...
for (int cellNr=0; cellNr<4; cellNr++)
{
// compute corresponding cell nr
int cellx = blockx;
int celly = blocky;
if (cellNr==1) celly++;
if (cellNr==2) cellx++;
if (cellNr==3)
{
cellx++;
celly++;
}
for (int bin=0; bin<gradientBinSize; bin++)
{
float gradientStrength = descriptorValues[ descriptorDataIdx ];
descriptorDataIdx++;
gradientStrengths[celly][cellx][bin] += gradientStrength;
} // for (all bins)
// note: overlapping blocks lead to multiple updates of this sum!
// we therefore keep track how often a cell was updated,
// to compute average gradient strengths
cellUpdateCounter[celly][cellx]++;
} // for (all cells)
} // for (all block x pos)
} // for (all block y pos)
// compute average gradient strengths
for (int celly=0; celly<cells_in_y_dir; celly++)
{
for (int cellx=0; cellx<cells_in_x_dir; cellx++)
{
float NrUpdatesForThisCell = (float)cellUpdateCounter[celly][cellx];
// compute average gradient strenghts for each gradient bin direction
for (int bin=0; bin<gradientBinSize; bin++)
{
gradientStrengths[celly][cellx][bin] /= NrUpdatesForThisCell;
}
}
}
cout << "descriptorDataIdx = " << descriptorDataIdx << endl;
// draw cells
for (int celly=0; celly<cells_in_y_dir; celly++)
{
for (int cellx=0; cellx<cells_in_x_dir; cellx++)
{
int drawX = cellx * cellSize.width;
int drawY = celly * cellSize.height;
int mx = drawX + cellSize.width/2;
int my = drawY + cellSize.height/2;
rectangle(visual_image,
Point(drawX*scaleFactor,drawY*scaleFactor),
Point((drawX+cellSize.width)*scaleFactor,
(drawY+cellSize.height)*scaleFactor),
CV_RGB(100,100,100),
1);
// draw in each cell all 9 gradient strengths
for (int bin=0; bin<gradientBinSize; bin++)
{
float currentGradStrength = gradientStrengths[celly][cellx][bin];
// no line to draw?
if (currentGradStrength==0)
continue;
float currRad = bin * radRangeForOneBin + radRangeForOneBin/2;
float dirVecX = cos( currRad );
float dirVecY = sin( currRad );
float maxVecLen = cellSize.width/2;
float scale = viz_factor; // just a visual_imagealization scale,
// to see the lines better
// compute line coordinates
float x1 = mx - dirVecX * currentGradStrength * maxVecLen * scale;
float y1 = my - dirVecY * currentGradStrength * maxVecLen * scale;
float x2 = mx + dirVecX * currentGradStrength * maxVecLen * scale;
float y2 = my + dirVecY * currentGradStrength * maxVecLen * scale;
// draw gradient visual_imagealization
line(visual_image,
Point(x1*scaleFactor,y1*scaleFactor),
Point(x2*scaleFactor,y2*scaleFactor),
CV_RGB(0,0,255),
1);
} // for (all bins)
} // for (cellx)
} // for (celly)
// don't forget to free memory allocated by helper data structures!
for (int y=0; y<cells_in_y_dir; y++)
{
for (int x=0; x<cells_in_x_dir; x++)
{
delete[] gradientStrengths[y][x];
}
delete[] gradientStrengths[y];
delete[] cellUpdateCounter[y];
}
delete[] gradientStrengths;
delete[] cellUpdateCounter;
return visual_image;
}
I am making an application that uses OCR and I am using OpenCV to threshold the image to improve the OCR results, I have gotten pretty good results but I want to know if anyone has any suggestions for improvement.
Here is what I've done so far:
// Convert to grayscale.
cv::cvtColor(cvMat, cvMat, CV_RGB2GRAY);
// Apply adaptive threshold.
cv::adaptiveThreshold(cvMat, cvMat, 255, CV_ADAPTIVE_THRESH_GAUSSIAN_C, CV_THRESH_BINARY, 3, 5);
// Attempt to sharpen the image.
cv::GaussianBlur(cvMat, cvMat, cv::Size(0, 0), 3);
cv::addWeighted(cvMat, 1.5, cvMat, -0.5, 0, cvMat);
Let me know if you have any suggestions to improve results, thanks.
Sample Images:
After:
One of the best algorithms for thresholding problem in the OCR field is sauvola method.You can use the below code.
#ifndef _THRESHOLDER
#define _THRESHOLDER
#include <cv.h>
#include "type.h"
using namespace cv;
enum class BhThresholdMethod{OTSU,NIBLACK,SAUVOLA,WOLFJOLION};
class BhThresholder
{
public :
void doThreshold(InputArray src ,OutputArray dst,const BhThresholdMethod &method);
private:
};
#endif //_THRESHOLDER
thresholder.cpp
#include "stdafx.h"
#define uget(x,y) at<unsigned char>(y,x)
#define uset(x,y,v) at<unsigned char>(y,x)=v;
#define fget(x,y) at<float>(y,x)
#define fset(x,y,v) at<float>(y,x)=v;
// *************************************************************
// glide a window across the image and
// create two maps: mean and standard deviation.
// *************************************************************
//#define BINARIZEWOLF_VERSION "2.3 (February 26th, 2013)"
double calcLocalStats (Mat &im, Mat &map_m, Mat &map_s, int win_x, int win_y) {
double m,s,max_s, sum, sum_sq, foo;
int wxh = win_x / 2;
int wyh = win_y / 2;
int x_firstth = wxh;
int y_lastth = im.rows-wyh-1;
int y_firstth= wyh;
double winarea = win_x*win_y;
max_s = 0;
for (int j = y_firstth ; j<=y_lastth; j++)
{
// Calculate the initial window at the beginning of the line
sum = sum_sq = 0;
for (int wy=0 ; wy<win_y; wy++)
for (int wx=0 ; wx<win_x; wx++) {
foo = im.uget(wx,j-wyh+wy);
sum += foo;
sum_sq += foo*foo;
}
m = sum / winarea;
s = sqrt ((sum_sq - (sum*sum)/winarea)/winarea);
if (s > max_s)
max_s = s;
map_m.fset(x_firstth, j, m);
map_s.fset(x_firstth, j, s);
// Shift the window, add and remove new/old values to the histogram
for (int i=1 ; i <= im.cols -win_x; i++) {
// Remove the left old column and add the right new column
for (int wy=0; wy<win_y; ++wy) {
foo = im.uget(i-1,j-wyh+wy);
sum -= foo;
sum_sq -= foo*foo;
foo = im.uget(i+win_x-1,j-wyh+wy);
sum += foo;
sum_sq += foo*foo;
}
m = sum / winarea;
s = sqrt ((sum_sq - (sum*sum)/winarea)/winarea);
if (s > max_s)
max_s = s;
map_m.fset(i+wxh, j, m);
map_s.fset(i+wxh, j, s);
}
}
return max_s;
}
void NiblackSauvolaWolfJolion (InputArray _src, OutputArray _dst,const BhThresholdMethod &version,int winx, int winy, double k, double dR) {
Mat src = _src.getMat();
Mat dst = _dst.getMat();
double m, s, max_s;
double th=0;
double min_I, max_I;
int wxh = winx/2;
int wyh = winy/2;
int x_firstth= wxh;
int x_lastth = src.cols-wxh-1;
int y_lastth = src.rows-wyh-1;
int y_firstth= wyh;
int mx, my;
// Create local statistics and store them in a double matrices
Mat map_m = Mat::zeros (src.size(), CV_32FC1);
Mat map_s = Mat::zeros (src.size(), CV_32FC1);
max_s = calcLocalStats (src, map_m, map_s, winx, winy);
minMaxLoc(src, &min_I, &max_I);
Mat thsurf (src.size(), CV_32FC1);
// Create the threshold surface, including border processing
// ----------------------------------------------------
for (int j = y_firstth ; j<=y_lastth; j++) {
// NORMAL, NON-BORDER AREA IN THE MIDDLE OF THE WINDOW:
for (int i=0 ; i <= src.cols-winx; i++) {
m = map_m.fget(i+wxh, j);
s = map_s.fget(i+wxh, j);
// Calculate the threshold
switch (version) {
case BhThresholdMethod::NIBLACK:
th = m + k*s;
break;
case BhThresholdMethod::SAUVOLA:
th = m * (1 + k*(s/dR-1));
break;
case BhThresholdMethod::WOLFJOLION:
th = m + k * (s/max_s-1) * (m-min_I);
break;
default:
cerr << "Unknown threshold type in ImageThresholder::surfaceNiblackImproved()\n";
exit (1);
}
thsurf.fset(i+wxh,j,th);
if (i==0) {
// LEFT BORDER
for (int i=0; i<=x_firstth; ++i)
thsurf.fset(i,j,th);
// LEFT-UPPER CORNER
if (j==y_firstth)
for (int u=0; u<y_firstth; ++u)
for (int i=0; i<=x_firstth; ++i)
thsurf.fset(i,u,th);
// LEFT-LOWER CORNER
if (j==y_lastth)
for (int u=y_lastth+1; u<src.rows; ++u)
for (int i=0; i<=x_firstth; ++i)
thsurf.fset(i,u,th);
}
// UPPER BORDER
if (j==y_firstth)
for (int u=0; u<y_firstth; ++u)
thsurf.fset(i+wxh,u,th);
// LOWER BORDER
if (j==y_lastth)
for (int u=y_lastth+1; u<src.rows; ++u)
thsurf.fset(i+wxh,u,th);
}
// RIGHT BORDER
for (int i=x_lastth; i<src.cols; ++i)
thsurf.fset(i,j,th);
// RIGHT-UPPER CORNER
if (j==y_firstth)
for (int u=0; u<y_firstth; ++u)
for (int i=x_lastth; i<src.cols; ++i)
thsurf.fset(i,u,th);
// RIGHT-LOWER CORNER
if (j==y_lastth)
for (int u=y_lastth+1; u<src.rows; ++u)
for (int i=x_lastth; i<src.cols; ++i)
thsurf.fset(i,u,th);
}
cerr << "surface created" << endl;
for (int y=0; y<src.rows; ++y)
for (int x=0; x<src.cols; ++x)
{
if (src.uget(x,y) >= thsurf.fget(x,y))
{
dst.uset(x,y,255);
}
else
{
dst.uset(x,y,0);
}
}
}
void BhThresholder::doThreshold(InputArray _src ,OutputArray _dst,const BhThresholdMethod &method)
{
Mat src = _src.getMat();
int winx = 0;
int winy = 0;
float optK=0.5;
if (winx==0 || winy==0) {
winy = (int) (2.0 * src.rows - 1)/3;
winx = (int) src.cols-1 < winy ? src.cols-1 : winy;
// if the window is too big, than we asume that the image
// is not a single text box, but a document page: set
// the window size to a fixed constant.
if (winx > 100)
winx = winy = 40;
}
// Threshold
_dst.create(src.size(), CV_8UC1);
Mat dst = _dst.getMat();
//medianBlur(src,dst,5);
GaussianBlur(src,dst,Size(5,5),0);
//#define _BH_SHOW_IMAGE
#ifdef _BH_DEBUG
#define _BH_SHOW_IMAGE
#endif
//medianBlur(src,dst,7);
switch (method)
{
case BhThresholdMethod::OTSU :
threshold(dst,dst,128,255,CV_THRESH_OTSU);
break;
case BhThresholdMethod::SAUVOLA :
case BhThresholdMethod::WOLFJOLION :
NiblackSauvolaWolfJolion (src, dst, method, winx, winy, optK, 128);
}
bitwise_not(dst,dst);
#ifdef _BH_SHOW_IMAGE
#undef _BH_SHOW_IMAGE
#endif
}
Here is comparsion table for thresholding methods: http://clweb.csa.iisc.ernet.in/rahulsharma/binarize/set1.php?id=set1%2Fimage00b
A few thoughts:
Since you're starting with a rectangular object that may be viewed at a non-normal angle, use an affine transform to warp the image so that it appears rectangular with right angle corners.
Before the affine transform, you should probably remove barrel distortion (the curviness of the card edges).
Consider using an adaptive threshold rather than a simple global binarization threshold.
If you can find a proper OCR algorithm that doesn't require binary images, use that. Although binarization will work well for black text on a white background, in general binarization presents a lot of problems if you want to achieve high accuracy (i.e., character recognition approaching 98%+ for arbitrary strings of characters)
Try to sample with better resolution.