I am making an application that uses OCR and I am using OpenCV to threshold the image to improve the OCR results, I have gotten pretty good results but I want to know if anyone has any suggestions for improvement.
Here is what I've done so far:
// Convert to grayscale.
cv::cvtColor(cvMat, cvMat, CV_RGB2GRAY);
// Apply adaptive threshold.
cv::adaptiveThreshold(cvMat, cvMat, 255, CV_ADAPTIVE_THRESH_GAUSSIAN_C, CV_THRESH_BINARY, 3, 5);
// Attempt to sharpen the image.
cv::GaussianBlur(cvMat, cvMat, cv::Size(0, 0), 3);
cv::addWeighted(cvMat, 1.5, cvMat, -0.5, 0, cvMat);
Let me know if you have any suggestions to improve results, thanks.
Sample Images:
After:
One of the best algorithms for thresholding problem in the OCR field is sauvola method.You can use the below code.
#ifndef _THRESHOLDER
#define _THRESHOLDER
#include <cv.h>
#include "type.h"
using namespace cv;
enum class BhThresholdMethod{OTSU,NIBLACK,SAUVOLA,WOLFJOLION};
class BhThresholder
{
public :
void doThreshold(InputArray src ,OutputArray dst,const BhThresholdMethod &method);
private:
};
#endif //_THRESHOLDER
thresholder.cpp
#include "stdafx.h"
#define uget(x,y) at<unsigned char>(y,x)
#define uset(x,y,v) at<unsigned char>(y,x)=v;
#define fget(x,y) at<float>(y,x)
#define fset(x,y,v) at<float>(y,x)=v;
// *************************************************************
// glide a window across the image and
// create two maps: mean and standard deviation.
// *************************************************************
//#define BINARIZEWOLF_VERSION "2.3 (February 26th, 2013)"
double calcLocalStats (Mat &im, Mat &map_m, Mat &map_s, int win_x, int win_y) {
double m,s,max_s, sum, sum_sq, foo;
int wxh = win_x / 2;
int wyh = win_y / 2;
int x_firstth = wxh;
int y_lastth = im.rows-wyh-1;
int y_firstth= wyh;
double winarea = win_x*win_y;
max_s = 0;
for (int j = y_firstth ; j<=y_lastth; j++)
{
// Calculate the initial window at the beginning of the line
sum = sum_sq = 0;
for (int wy=0 ; wy<win_y; wy++)
for (int wx=0 ; wx<win_x; wx++) {
foo = im.uget(wx,j-wyh+wy);
sum += foo;
sum_sq += foo*foo;
}
m = sum / winarea;
s = sqrt ((sum_sq - (sum*sum)/winarea)/winarea);
if (s > max_s)
max_s = s;
map_m.fset(x_firstth, j, m);
map_s.fset(x_firstth, j, s);
// Shift the window, add and remove new/old values to the histogram
for (int i=1 ; i <= im.cols -win_x; i++) {
// Remove the left old column and add the right new column
for (int wy=0; wy<win_y; ++wy) {
foo = im.uget(i-1,j-wyh+wy);
sum -= foo;
sum_sq -= foo*foo;
foo = im.uget(i+win_x-1,j-wyh+wy);
sum += foo;
sum_sq += foo*foo;
}
m = sum / winarea;
s = sqrt ((sum_sq - (sum*sum)/winarea)/winarea);
if (s > max_s)
max_s = s;
map_m.fset(i+wxh, j, m);
map_s.fset(i+wxh, j, s);
}
}
return max_s;
}
void NiblackSauvolaWolfJolion (InputArray _src, OutputArray _dst,const BhThresholdMethod &version,int winx, int winy, double k, double dR) {
Mat src = _src.getMat();
Mat dst = _dst.getMat();
double m, s, max_s;
double th=0;
double min_I, max_I;
int wxh = winx/2;
int wyh = winy/2;
int x_firstth= wxh;
int x_lastth = src.cols-wxh-1;
int y_lastth = src.rows-wyh-1;
int y_firstth= wyh;
int mx, my;
// Create local statistics and store them in a double matrices
Mat map_m = Mat::zeros (src.size(), CV_32FC1);
Mat map_s = Mat::zeros (src.size(), CV_32FC1);
max_s = calcLocalStats (src, map_m, map_s, winx, winy);
minMaxLoc(src, &min_I, &max_I);
Mat thsurf (src.size(), CV_32FC1);
// Create the threshold surface, including border processing
// ----------------------------------------------------
for (int j = y_firstth ; j<=y_lastth; j++) {
// NORMAL, NON-BORDER AREA IN THE MIDDLE OF THE WINDOW:
for (int i=0 ; i <= src.cols-winx; i++) {
m = map_m.fget(i+wxh, j);
s = map_s.fget(i+wxh, j);
// Calculate the threshold
switch (version) {
case BhThresholdMethod::NIBLACK:
th = m + k*s;
break;
case BhThresholdMethod::SAUVOLA:
th = m * (1 + k*(s/dR-1));
break;
case BhThresholdMethod::WOLFJOLION:
th = m + k * (s/max_s-1) * (m-min_I);
break;
default:
cerr << "Unknown threshold type in ImageThresholder::surfaceNiblackImproved()\n";
exit (1);
}
thsurf.fset(i+wxh,j,th);
if (i==0) {
// LEFT BORDER
for (int i=0; i<=x_firstth; ++i)
thsurf.fset(i,j,th);
// LEFT-UPPER CORNER
if (j==y_firstth)
for (int u=0; u<y_firstth; ++u)
for (int i=0; i<=x_firstth; ++i)
thsurf.fset(i,u,th);
// LEFT-LOWER CORNER
if (j==y_lastth)
for (int u=y_lastth+1; u<src.rows; ++u)
for (int i=0; i<=x_firstth; ++i)
thsurf.fset(i,u,th);
}
// UPPER BORDER
if (j==y_firstth)
for (int u=0; u<y_firstth; ++u)
thsurf.fset(i+wxh,u,th);
// LOWER BORDER
if (j==y_lastth)
for (int u=y_lastth+1; u<src.rows; ++u)
thsurf.fset(i+wxh,u,th);
}
// RIGHT BORDER
for (int i=x_lastth; i<src.cols; ++i)
thsurf.fset(i,j,th);
// RIGHT-UPPER CORNER
if (j==y_firstth)
for (int u=0; u<y_firstth; ++u)
for (int i=x_lastth; i<src.cols; ++i)
thsurf.fset(i,u,th);
// RIGHT-LOWER CORNER
if (j==y_lastth)
for (int u=y_lastth+1; u<src.rows; ++u)
for (int i=x_lastth; i<src.cols; ++i)
thsurf.fset(i,u,th);
}
cerr << "surface created" << endl;
for (int y=0; y<src.rows; ++y)
for (int x=0; x<src.cols; ++x)
{
if (src.uget(x,y) >= thsurf.fget(x,y))
{
dst.uset(x,y,255);
}
else
{
dst.uset(x,y,0);
}
}
}
void BhThresholder::doThreshold(InputArray _src ,OutputArray _dst,const BhThresholdMethod &method)
{
Mat src = _src.getMat();
int winx = 0;
int winy = 0;
float optK=0.5;
if (winx==0 || winy==0) {
winy = (int) (2.0 * src.rows - 1)/3;
winx = (int) src.cols-1 < winy ? src.cols-1 : winy;
// if the window is too big, than we asume that the image
// is not a single text box, but a document page: set
// the window size to a fixed constant.
if (winx > 100)
winx = winy = 40;
}
// Threshold
_dst.create(src.size(), CV_8UC1);
Mat dst = _dst.getMat();
//medianBlur(src,dst,5);
GaussianBlur(src,dst,Size(5,5),0);
//#define _BH_SHOW_IMAGE
#ifdef _BH_DEBUG
#define _BH_SHOW_IMAGE
#endif
//medianBlur(src,dst,7);
switch (method)
{
case BhThresholdMethod::OTSU :
threshold(dst,dst,128,255,CV_THRESH_OTSU);
break;
case BhThresholdMethod::SAUVOLA :
case BhThresholdMethod::WOLFJOLION :
NiblackSauvolaWolfJolion (src, dst, method, winx, winy, optK, 128);
}
bitwise_not(dst,dst);
#ifdef _BH_SHOW_IMAGE
#undef _BH_SHOW_IMAGE
#endif
}
Here is comparsion table for thresholding methods: http://clweb.csa.iisc.ernet.in/rahulsharma/binarize/set1.php?id=set1%2Fimage00b
A few thoughts:
Since you're starting with a rectangular object that may be viewed at a non-normal angle, use an affine transform to warp the image so that it appears rectangular with right angle corners.
Before the affine transform, you should probably remove barrel distortion (the curviness of the card edges).
Consider using an adaptive threshold rather than a simple global binarization threshold.
If you can find a proper OCR algorithm that doesn't require binary images, use that. Although binarization will work well for black text on a white background, in general binarization presents a lot of problems if you want to achieve high accuracy (i.e., character recognition approaching 98%+ for arbitrary strings of characters)
Try to sample with better resolution.
Related
I have tried adding #pragma parallel omp for to this raytracing program and am measuring the same/very similar amount of processing time with and without the pragma statement.
This is the function:
void Scene::SaveImage(
const char *outPngFileName,
int pixelsWide,
int pixelsHigh,
double zoom,
int antiAliasFactor) const
{
// Oversample the image using the anti-aliasing factor.
const int largePixelsWide = antiAliasFactor * pixelsWide;
const int largePixelsHigh = antiAliasFactor * pixelsHigh;
const int smallerDim =
((pixelsWide < pixelsHigh) ? pixelsWide : pixelsHigh);
const double largeZoom = antiAliasFactor * zoom * smallerDim;
ImageBuffer buffer(largePixelsWide, largePixelsHigh, backgroundColor);
// The camera is located at the origin.
Vector camera(0.0, 0.0, 0.0);
// The camera faces in the -z direction.
// This allows the +x direction to be to the right,
// and the +y direction to be upward.
Vector direction(0.0, 0.0, -1.0);
const Color fullIntensity(1.0, 1.0, 1.0);
// We keep a list of (i,j) screen coordinates for pixels
// we are not able to trace definitive rays for.
// Later we will come back and fix these pixels.
PixelList ambiguousPixelList;
#pragma omp parallel for
for (int i=0; i < largePixelsWide; ++i)
{
direction.x = (i - largePixelsWide/2.0) / largeZoom;
for (int j = 0; j < largePixelsHigh; ++j)
{
direction.y = (largePixelsHigh / 2.0 - j) / largeZoom;
#if RAYTRACE_DEBUG_POINTS
{
using namespace std;
// Assume no active debug point unless we find one below.
activeDebugPoint = NULL;
DebugPointList::const_iterator iter = debugPointList.begin();
DebugPointList::const_iterator end = debugPointList.end();
for(; iter != end; ++iter)
{
if ((iter->iPixel == i) && (iter->jPixel == j))
{
cout << endl;
cout << "Hit breakpoint at (";
cout << i << ", " << j <<")" << endl;
activeDebugPoint = &(*iter);
break;
}
}
}
#endif
PixelData& pixel = buffer.Pixel(i,j);
try
{
// Trace a ray from the camera toward the given direction
// to figure out what color to assign to this pixel.
pixel.color = TraceRay(
camera,
direction,
ambientRefraction,
fullIntensity,
0);
}
catch (AmbiguousIntersectionException)
{
// Getting here means that somewhere in the recursive
// code for tracing rays, there were multiple
// intersections that had minimum distance from a
// vantage point. This can be really bad,
// for example causing a ray of light to reflect
// inward into a solid.
// Mark the pixel as ambiguous, so that any other
// ambiguous pixels nearby know not to use it.
pixel.isAmbiguous = true;
// Keep a list of all ambiguous pixel coordinates
// so that we can rapidly enumerate through them
// in the disambiguation pass.
ambiguousPixelList.push_back(PixelCoordinates(i, j));
}
}
}
#if RAYTRACE_DEBUG_POINTS
// Leave no chance of a dangling pointer into debug points.
activeDebugPoint = NULL;
#endif
// Go back and "heal" ambiguous pixels as best we can.
PixelList::const_iterator iter = ambiguousPixelList.begin();
PixelList::const_iterator end = ambiguousPixelList.end();
for (; iter != end; ++iter)
{
const PixelCoordinates& p = *iter;
ResolveAmbiguousPixel(buffer, p.i, p.j);
}
// We want to scale the arbitrary range of
// color component values to the range 0..255
// allowed by PNG format. We therefore find
// the maximum red, green, or blue value anywhere
// in the image.
const double max = buffer.MaxColorValue();
// Downsample the image buffer to an integer array of RGBA
// values that LodePNG understands.
const unsigned char OPAQUE_ALPHA_VALUE = 255;
const unsigned BYTES_PER_PIXEL = 4;
// The number of bytes in buffer to be passed to LodePNG.
const unsigned RGBA_BUFFER_SIZE =
pixelsWide * pixelsHigh * BYTES_PER_PIXEL;
std::vector<unsigned char> rgbaBuffer(RGBA_BUFFER_SIZE);
unsigned rgbaIndex = 0;
const double patchSize = antiAliasFactor * antiAliasFactor;
//#pragma parallel for collapse(3)
for (int j=0; j < pixelsHigh; ++j)
{
for (int i=0; i < pixelsWide; ++i)
{
Color sum(0.0, 0.0, 0.0);
for (int di=0; di < antiAliasFactor; ++di)
{
//#pragma parallel omp for reduction(+:sum)
for (int dj=0; dj < antiAliasFactor; ++dj)
{
sum += buffer.Pixel(
antiAliasFactor*i + di,
antiAliasFactor*j + dj).color;
}
}
sum /= patchSize;
// Convert to integer red, green, blue, alpha values,
// all of which must be in the range 0..255.
rgbaBuffer[rgbaIndex++] = ConvertPixelValue(sum.red, max);
rgbaBuffer[rgbaIndex++] = ConvertPixelValue(sum.green, max);
rgbaBuffer[rgbaIndex++] = ConvertPixelValue(sum.blue, max);
rgbaBuffer[rgbaIndex++] = OPAQUE_ALPHA_VALUE;
}
}
// Write the PNG file
const unsigned error = lodepng::encode(
outPngFileName,
rgbaBuffer,
pixelsWide,
pixelsHigh);
// If there was an encoding error, throw an exception.
if (error != 0)
{
std::string message = "PNG encoder error: ";
message += lodepng_error_text(error);
throw ImagerException(message.c_str());
}
}
// The following function searches through all solid objects
// for the first solid (if any) that contains the given point.
// In the case of ties, the solid that was inserted into the
// scene first wins. This arbitrary convention allows the
// composer of a scene to decide which of multiple overlapping
// objects should control the index of refraction for any
// overlapping volumes of space.
const SolidObject* Scene::PrimaryContainer(const Vector& point) const
{
SolidObjectList::const_iterator iter = solidObjectList.begin();
SolidObjectList::const_iterator end = solidObjectList.end();
for (; iter != end; ++iter)
{
const SolidObject* solid = *iter;
if (solid->Contains(point))
{
return solid;
}
}
return NULL;
}
This is the snippet I added it to:
#pragma omp parallel for
for (int i=0; i < largePixelsWide; ++i)
{
direction.x = (i - largePixelsWide/2.0) / largeZoom;
for (int j = 0; j < largePixelsHigh; ++j)
{
direction.y = (largePixelsHigh / 2.0 - j) / largeZoom;
I have made sure all of my VS17 settings are correct and -fopenmp main.cpp is on command arguments. Additional information: This is in another file called scene.cpp, omp header file is added to scene.cpp.
My questions are how can I get this to work and if there are other parts where I can add it where/how would I go about doing it. Thank you.
I have written a code (in c++) to add noise in the image using CImg library. Now i want to load the image with noise and remove those noise inside the image using median filter algorithm.
Below is my code.
int main()
{
int x;
cout<<"Welcome to my app\n";
cout<<"Choose options below\n";
cout<<"1. Remove pepper 2. Add pepper\n";
cin>>x;
if (x==1)
{
cout<<"Needs help";
/*
* i tried to change the noise level to 0 but it did not work like below
* image.noise(0,2);
*
*/
}
else if(x==2)
{
//image file
CImg<unsigned char> image("new.bmp");
const unsigned char red[] = { 255,0,0 }, green[] = { 0,255,0 }, blue[] = { 0,0,255 };
image.noise(100,2);
image.save("new2.bmp");
CImgDisplay main_disp(image, "Image with Pepper noise");
while (!main_disp.is_closed())
{
main_disp.wait();
}
}
getchar();
return 0;
}
If there is another way of doing this using CImg library, I will be thankful!!
According to this tutorial and the definition of the median function you will get something like this:
#include <algorithm>
using namespace std;
// ...
int ksize = 3; // 5, 7, N and so on... for NxN kernel
int ksize2 = ksize/2;
vector<uchar> kernel(ksize*ksize, 0);
for (int i=ksize2;i<image.dimx() - ksize2;i++)
for (int j=ksize2;j<image.dimy() - ksize2;j++)
for (int k=0;k<3;k++) {
// prepare kernel
int n = 0;
for(int l = -ksize2; l <= ksize2; l++)
for(int m = -ksize2; m <= ksize2; m++)
kernel[n++] = image(i + l,j + m,0,k);
// using std::algorithm to find median
sort(kernel.begin(), kernel.end());
// simple assign median value to created empty image
medianFilteredImage(i, j, 0, k) = kernel[kernel.size()/2]; // median is here now
}
}
}
I am doing a homework where we need to write a function which gets an image and a kernel and we have to calculate the 2d spatial convolution.
Using a gaussian kernel I get the expected result (a blurred image) but if I use instead for example an edge detection kernel (taken from here) I see that something isn't working properly (the image becomes very greyish).
I guess the problem is either the border handling, which should be a zero-padding but I am not totally sure if implemented it correctly or the normalization at the end.
Is there a way to display a float image (e.g. one pixel of the float has a value for 25000), because I think it always gets capped at 255 (white) if I don't use the normalization.
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <iostream>
int main(int argc, char *argv[])
{
cv::Mat img = cv::imread("orig.jpg",0); // load image as grayscale
img.convertTo(img,CV_32FC1); // convert to float
cv::Mat_<float> output(img.rows,img.cols); // create new mat with same size as source image
output = 0;
// creating a kernel (here Gaussian blur)
cv::Mat_<float> kernel(5,5);
kernel << 1,4,6,4,1,4,16,24,16,4,6,24,36,24,6,4,16,24,16,4,1,4,6,4,1;
int kCenterX = kernel.cols/2;
int kCenterY = kernel.rows/2;
for (int i = 0; i < img.rows; i++){ // for every row in image
for (int j = 0; j < img.cols; j++){ // for every column in image
for (int m = 0; m < kernel.rows; m++){ // for every row of kernel
int mm = kernel.rows - 1 -m; // row index of flipped kernel
for (int n = 0; n < kernel.cols; n++){ // for every column of kernel
int nn = kernel.cols - 1 -n; // column index of flipped kernel
// index for border handling
int ii = i + (m - kCenterY);
int jj = j + (n - kCenterX);
// checking if sample is still in bound of input image
// and if not, treat those pixels as 0 (because they won't get added to sum)
if (ii >= 0 && ii < img.rows && jj >= 0 && jj < img.cols)
output.at<float>(i,j) += img.at<float>(ii,jj) * kernel.at<float>(mm,nn);
}
}
}
}
// normalize input and output image (might be wrong, but I don't know how else I can see float images
cv::normalize(output, output, 0, 1, cv::NORM_MINMAX);
cv::normalize(img, img, 0, 1, cv::NORM_MINMAX);
// display images
cv::imshow("Original", img);
cv::imshow("Convolution", output);
cv::waitKey(0);
return 0;
}
I am attempting to find Pedestriants/People in images with the help of a cascade classifier which uses HOG as features.
The problem I'm trying to solve is in the initial stage, feature generation.
Where the HOG values in certain areas of the images are too low and hence the classifier fails.
The images below were captured using a Basler aca640-100gc Camera.
The visualization of the HOG was borrowed from the code in the webpage. Code also attached in the end of the question.
This first image here and its HOG is what I'm trying to achieve.
A realistic outdoor scene which can be used to generate features and hopefully find people. This is not what I have captured using my camera.
Captured Outdoor Images results
The images below are what I have created with the camera. I have tried all basic variations where I have played with the brightness and Focus But this still yeilds a poor result in an outdoor scene. Where I am inside the car and the camera is attached close to the windscreen.
But on the Contrary when the same camera was used to record indoor scene It works fine. Why it works when its in an indoor situtation and why not in an outdoor scene is something I can't understand.
Captured Indoor Images results
As seen in the images below same configuration works for an indoor scene.
Desired results
Ideally I would like results of the out door recordings to look like so.
Could anyone give me insight why this happens?
or How I can over come this issue to generate reliable HOGs for detection?
Code to visualize HOG
Mat img_raw = imread("C:\\testimg.png", 1); // load as color image
resize(img_raw, img_raw, Size(64,128) );
Mat img;
cvtColor(img_raw, img, CV_RGB2GRAY);
HOGDescriptor d;
// Size(128,64), //winSize
// Size(16,16), //blocksize
// Size(8,8), //blockStride,
// Size(8,8), //cellSize,
// 9, //nbins,
// 0, //derivAper,
// -1, //winSigma,
// 0, //histogramNormType,
// 0.2, //L2HysThresh,
// 0 //gammal correction,
// //nlevels=64
//);
// void HOGDescriptor::compute(const Mat& img, vector<float>& descriptors,
// Size winStride, Size padding,
// const vector<Point>& locations) const
vector<float> descriptorsValues;
vector<Point> locations;
d.compute( img, descriptorsValues, Size(8,8), Size(8,8), locations);
cout << "HOG descriptor size is " << d.getDescriptorSize() << endl;
cout << "img dimensions: " << img.cols << " width x " << img.rows << "height" << endl;
cout << "Found " << descriptorsValues.size() << " descriptor values" << endl;
cout << "Nr of locations specified : " << locations.size() << endl;
Mat get_hogdescriptor_visual_image(Mat& origImg,
vector<float>& descriptorValues,
Size winSize,
Size cellSize,
int scaleFactor,
double viz_factor)
{
Mat visual_image;
resize(origImg, visual_image, Size(origImg.cols*scaleFactor, origImg.rows*scaleFactor));
int gradientBinSize = 9;
// dividing 180° into 9 bins, how large (in rad) is one bin?
float radRangeForOneBin = 3.14/(float)gradientBinSize;
// prepare data structure: 9 orientation / gradient strenghts for each cell
int cells_in_x_dir = winSize.width / cellSize.width;
int cells_in_y_dir = winSize.height / cellSize.height;
int totalnrofcells = cells_in_x_dir * cells_in_y_dir;
float*** gradientStrengths = new float**[cells_in_y_dir];
int** cellUpdateCounter = new int*[cells_in_y_dir];
for (int y=0; y<cells_in_y_dir; y++)
{
gradientStrengths[y] = new float*[cells_in_x_dir];
cellUpdateCounter[y] = new int[cells_in_x_dir];
for (int x=0; x<cells_in_x_dir; x++)
{
gradientStrengths[y][x] = new float[gradientBinSize];
cellUpdateCounter[y][x] = 0;
for (int bin=0; bin<gradientBinSize; bin++)
gradientStrengths[y][x][bin] = 0.0;
}
}
// nr of blocks = nr of cells - 1
// since there is a new block on each cell (overlapping blocks!) but the last one
int blocks_in_x_dir = cells_in_x_dir - 1;
int blocks_in_y_dir = cells_in_y_dir - 1;
// compute gradient strengths per cell
int descriptorDataIdx = 0;
int cellx = 0;
int celly = 0;
for (int blockx=0; blockx<blocks_in_x_dir; blockx++)
{
for (int blocky=0; blocky<blocks_in_y_dir; blocky++)
{
// 4 cells per block ...
for (int cellNr=0; cellNr<4; cellNr++)
{
// compute corresponding cell nr
int cellx = blockx;
int celly = blocky;
if (cellNr==1) celly++;
if (cellNr==2) cellx++;
if (cellNr==3)
{
cellx++;
celly++;
}
for (int bin=0; bin<gradientBinSize; bin++)
{
float gradientStrength = descriptorValues[ descriptorDataIdx ];
descriptorDataIdx++;
gradientStrengths[celly][cellx][bin] += gradientStrength;
} // for (all bins)
// note: overlapping blocks lead to multiple updates of this sum!
// we therefore keep track how often a cell was updated,
// to compute average gradient strengths
cellUpdateCounter[celly][cellx]++;
} // for (all cells)
} // for (all block x pos)
} // for (all block y pos)
// compute average gradient strengths
for (int celly=0; celly<cells_in_y_dir; celly++)
{
for (int cellx=0; cellx<cells_in_x_dir; cellx++)
{
float NrUpdatesForThisCell = (float)cellUpdateCounter[celly][cellx];
// compute average gradient strenghts for each gradient bin direction
for (int bin=0; bin<gradientBinSize; bin++)
{
gradientStrengths[celly][cellx][bin] /= NrUpdatesForThisCell;
}
}
}
cout << "descriptorDataIdx = " << descriptorDataIdx << endl;
// draw cells
for (int celly=0; celly<cells_in_y_dir; celly++)
{
for (int cellx=0; cellx<cells_in_x_dir; cellx++)
{
int drawX = cellx * cellSize.width;
int drawY = celly * cellSize.height;
int mx = drawX + cellSize.width/2;
int my = drawY + cellSize.height/2;
rectangle(visual_image,
Point(drawX*scaleFactor,drawY*scaleFactor),
Point((drawX+cellSize.width)*scaleFactor,
(drawY+cellSize.height)*scaleFactor),
CV_RGB(100,100,100),
1);
// draw in each cell all 9 gradient strengths
for (int bin=0; bin<gradientBinSize; bin++)
{
float currentGradStrength = gradientStrengths[celly][cellx][bin];
// no line to draw?
if (currentGradStrength==0)
continue;
float currRad = bin * radRangeForOneBin + radRangeForOneBin/2;
float dirVecX = cos( currRad );
float dirVecY = sin( currRad );
float maxVecLen = cellSize.width/2;
float scale = viz_factor; // just a visual_imagealization scale,
// to see the lines better
// compute line coordinates
float x1 = mx - dirVecX * currentGradStrength * maxVecLen * scale;
float y1 = my - dirVecY * currentGradStrength * maxVecLen * scale;
float x2 = mx + dirVecX * currentGradStrength * maxVecLen * scale;
float y2 = my + dirVecY * currentGradStrength * maxVecLen * scale;
// draw gradient visual_imagealization
line(visual_image,
Point(x1*scaleFactor,y1*scaleFactor),
Point(x2*scaleFactor,y2*scaleFactor),
CV_RGB(0,0,255),
1);
} // for (all bins)
} // for (cellx)
} // for (celly)
// don't forget to free memory allocated by helper data structures!
for (int y=0; y<cells_in_y_dir; y++)
{
for (int x=0; x<cells_in_x_dir; x++)
{
delete[] gradientStrengths[y][x];
}
delete[] gradientStrengths[y];
delete[] cellUpdateCounter[y];
}
delete[] gradientStrengths;
delete[] cellUpdateCounter;
return visual_image;
}
I am new at OpenCV and I am trying to write a simple code to get the mean of a block size in an image. I wrote the following code, the build is ok, however, the debug is giving me an unhandled exception at memory location. This exception is at the following line:
mean_img.at<double>(i/block_size, j/block_size) = mean_img.at<double>(i/block_size,j/block_size) + new_img.at<double>(i + x, j + y) / (mean);
So, I will be grateful if anyone give me some hints. Thanks in advance and here is the whole code:
#include "opencv2/highgui/highgui.hpp" // Include Libs for OpenCV and Image Processing
#include <opencv2/opencv.hpp> // check that
#include "opencv2/core/core.hpp" // check that
#include <iostream> // Include Libs for C++
#include "opencv2/imgproc/imgproc.hpp" // Include Libs for OpenCV and Image Processing
#include <math.h>
using namespace cv; // namespace parameters not important in OpenCV2.4.6
using namespace std; // namespace parameters not important in OpenCV2.4.6
int main( int argc, const char** argv )
{
/*This part is to compute the parameters(block size, resize parameter) of the new_img*/
int resize_parameter; // resize parameter must be multiplication of 2
resize_parameter = 500;
int block_size; // block parameter must be divisable by of block size
block_size = 50;
if ((resize_parameter % 2) != 0) resize_parameter = resize_parameter - (resize_parameter % 2);
while ((resize_parameter % block_size) != 0) block_size = block_size - 1;
int mean_size = resize_parameter/block_size; // this is the size of the mean matrix
int mean = block_size * block_size; // this no is ti get the mean of every element in the matrix
//int mean_img [mean_size][mean_size] = {}; // the mean image matrix initialized by zero
/*This part is to allocate the array with dynamic size*/
//int** mean_img = new int*[mean_size];
//for(int x = 0; x < mean_size; x++)
//mean_img[x] = new int[mean_size];
/*Then we can use the array*/
/*This part is to fill all the elements of the mean matrix with zeros*/
//memset(mean_img, 0, sizeof(mean_img[0][0]) * mean_size * mean_size);
/*This part is the definition of the matrices that are used for the images*/
Mat mean_img = Mat(mean_size,mean_size,CV_64FC4, cv::Scalar(0)); // define a new matrix with meansize*meansize elements to compute the mean
Mat mean_img_full = Mat(resize_parameter,resize_parameter,CV_64FC4, cv::Scalar(0)); // define a new matrix with resizeparameter*resizeparameter elements to compute the mean
Mat new_img = Mat(resize_parameter,resize_parameter,CV_64FC4); // define a new matrix with resize_parameter*resize_parameter elements
Mat original_img = imread("Desert.JPG", CV_LOAD_IMAGE_GRAYSCALE); //define a new matrix and read the image data in the file "Desert.JPG" and store it in 'original_img'
// notes: the location of the image must be in the same directory of the C++ file
if (original_img.empty()) //check whether the image is loaded or not
{
cout << "Error : Image cannot be loaded..!!" << endl;
//system("pause"); //wait for a key press
return -1;
}
// explicitly specify dsize=dst.size(); fx and fy will be computed from that.
// resize( src matrix, dst matrix, dst.size to get the size of the dst matrix, 0, 0 "to deal with the dst matrix size, may be 0.5 or any fraction from the src size, "AREA,CUBIC,LINEAR")
resize(original_img, new_img, new_img.size(), 0, 0, CV_INTER_AREA);
/*This part is to compute the mean of each block*/
for ( int i = 0; i < resize_parameter; i = i + block_size) // i represents the index of the raw
{
for ( int j = 0; j < resize_parameter; j = j + block_size) // for the blocks in the same raw with different columns
{
for ( int x = 0; x < block_size; x++) // x represents the index of the raw
{
for ( int y = 0; y < block_size; y++) // y represents the index of the column
{
//cout << i ; //cout << "\n"; //cout << j ; //cout << "\n"; //cout << x ; //cout << "\n"; //cout << y ; //cout << "\n";
mean_img.at<double>(i/block_size, j/block_size) = mean_img.at<double>(i/block_size,j/block_size) + new_img.at<double>(i + x, j + y) / (mean);
}
}
}
}
/*This is the end of the part to compute the mean of each block*/
/*This part is to fill all the resize matrix with the mean value*/
for ( int x = 0; x < resize_parameter/block_size; x++) // x represents the index of the raw in the mean matrix
{
for ( int y = 0; y < resize_parameter/block_size; y++) // y represents the index of the column in the mean matrix
{
for ( int i = 0; i < block_size; i++) // i represents the index of the raw in the mean_full matrix
{
for ( int j = 0; j < block_size; j++) // j represents the index of the column in the mean_full matrix
{
mean_img_full.at<double>((x*block_size)+i,(y*block_size)+j) = mean_img.at<double>(x,y);
}
}
}
}
//cout << cv::getBuildInformation() << endl;
/*This is the end of the part to fill all the resize matrix with the mean value*/
namedWindow("OriginalImage", CV_WINDOW_AUTOSIZE); //create a window with the name "OriginalImage"
imshow("OriginalImage", original_img); //display the image which is stored in the 'original_img' in the "OriginalImage" window
namedWindow("NewImage", CV_WINDOW_AUTOSIZE); //create a window with the name "NewImage"
imshow("NewImage", new_img); //display the image which is stored in the 'new_img' in the "NewImage" window
namedWindow("MeanImage", CV_WINDOW_AUTOSIZE); //create a window with the name "MeanImage"
imshow("MeanImage", mean_img); //display the image which is stored in the 'mean_img' in the "MeanImage" window
namedWindow("MeanFullImage", CV_WINDOW_AUTOSIZE); //create a window with the name "MeanFullImage"
imshow("MeanFullImage", mean_img_full); //display the image which is stored in the 'mean_img_full' in the "MeanFullImage" window
waitKey(0); //wait infinite time for a keypress
destroyWindow("OriginalImage"); //destroy the window with the name, "OriginalImage"
destroyWindow("NewImage"); //destroy the window with the name, "NewImage"
destroyWindow("MeanImage"); //destroy the window with the name, "MeanImage"
destroyWindow("MeanFullImage"); //destroy the window with the name, "MeanImage"
return 0;
}
The problem was at the definition of the type of each matrix. It has to be 8 Bits Unsigned Character. It is working now. Thanks a lot ,,,