I'm trying to implement an Averaging filter with a 5x5 kernel, although there is a function within OpenCV for this, I need to do it without it.
There is something wrong and I think that are the variables uchar, but I tried int, float and double and the image resulting it's not correct. I use an image with a padding of 7.
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/opencv.hpp>
#include "filter.h"
#include <iostream>
#include <fstream>
using namespace std;
using namespace cv;
cv::Mat filter::mean_filter(cv::Mat& image_in){
int centro = 7;
float total = 0.0;
double window[25];
double mean= 0.0;
int final=0;
int nlines, ncols;
cv::Mat kernel = cv::Mat::ones(5, 5, CV_32S);
nlines=image_in.size().height;
ncols=image_in.size().width;
cv::Mat image_out = cv::Mat::zeros(nlines,ncols,CV_32S);
for (unsigned int j=centro; j<nlines - centro; j++){
for (unsigned int z=centro; z<ncols - centro; z++){
window[0]=image_in.at<uchar>(j-2,z-2);
window[1]=image_in.at<uchar>(j-1,z-2);
window[2]=image_in.at<uchar>(j ,z-2);
window[3]=image_in.at<uchar>(j+1,z-2);
window[4]=image_in.at<uchar>(j+2,z-2);
window[5]=image_in.at<uchar>(j-2,z-1);
window[6]=image_in.at<uchar>(j-1,z-1);
window[7]=image_in.at<uchar>(j ,z-1);
window[8]=image_in.at<uchar>(j+1,z-1);
window[9]=image_in.at<uchar>(j+2,z-1);
window[10]=image_in.at<uchar>(j-2,z);
window[11]=image_in.at<uchar>(j-1,z);
window[12]=image_in.at<uchar>(j ,z);
window[13]=image_in.at<uchar>(j+1,z);
window[14]=image_in.at<uchar>(j+2,z);
window[15]=image_in.at<uchar>(j-2,z+2);
window[16]=image_in.at<uchar>(j-1,z+2);
window[17]=image_in.at<uchar>(j ,z+2);
window[18]=image_in.at<uchar>(j+1,z+2);
window[19]=image_in.at<uchar>(j+2,z+2);
window[20]=image_in.at<uchar>(j-2,z+1);
window[21]=image_in.at<uchar>(j-1,z+1);
window[22]=image_in.at<uchar>(j ,z+1);
window[23]=image_in.at<uchar>(j+1,z+1);
window[24]=image_in.at<uchar>(j+2,z+1);
mean=0.0;
final=0;
for (unsigned int k=0; k<25; k++){
mean+=window[k];
}
mean=mean/25;
final=round(mean);
image_out.at<int>(j,z)=final;
}
}
return image_out;
}
I changed your code a bit and have a working solution. It is a quite primitiv approach but it works.
Possible improvements could be to reuse some of the already accumulated pixel-values by tracking which pixels leave the kernel area and which pixels enter it.
Another possibility for improvement is to parallelise the loop over the image.
cv::Mat mean_filter(cv::Mat& image_in, int kernel)
{
// Make sure you get a grayscale image.
assert(image_in.type() == CV_8UC1);
// Make sure your kernel is an uneven number
assert(kernel % 2 == 1);
// Make sure your kernel is bigger than 1
assert(kernel >= 1);
// for padding calculate the border needed
int padding = (kernel - 1) / 2;
int mean = 0.0;
int final = 0;
int nlines, ncols;
cv::Mat img_temp;
nlines = image_in.size().height;
ncols = image_in.size().width;
// Make propper padding. Here it is done with 0. Padding describes the adding of a border to the image in order to avoid a cropping by applying a filter-mask.
copyMakeBorder(image_in, img_temp, padding, padding, padding, padding, BORDER_CONSTANT, 0);
// allocate the output image as grayscale as the input is grayscale as well
cv::Mat image_out = cv::Mat::zeros(nlines, ncols, CV_8UC1);
// loop over whole image
for (unsigned int j = padding; j<nlines + padding; j++){
for (unsigned int z = padding; z<ncols + padding; z++){
mean = 0.0;
// loop over kernel area
for (int x = -padding; x <= padding; x++){
for (int y = -padding; y <= padding; y++){
// accumulate all pixel-values
mean += img_temp.at<uchar>(j + x, z + y);
}
}
mean = mean / (kernel * kernel);
final = round(mean);
// cast result to uchar and set pixel in output image
image_out.at<uchar>(j - padding, z - padding) = (uchar)final;
}
}
return image_out;
}
Related
I am doing a homework where we need to write a function which gets an image and a kernel and we have to calculate the 2d spatial convolution.
Using a gaussian kernel I get the expected result (a blurred image) but if I use instead for example an edge detection kernel (taken from here) I see that something isn't working properly (the image becomes very greyish).
I guess the problem is either the border handling, which should be a zero-padding but I am not totally sure if implemented it correctly or the normalization at the end.
Is there a way to display a float image (e.g. one pixel of the float has a value for 25000), because I think it always gets capped at 255 (white) if I don't use the normalization.
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <iostream>
int main(int argc, char *argv[])
{
cv::Mat img = cv::imread("orig.jpg",0); // load image as grayscale
img.convertTo(img,CV_32FC1); // convert to float
cv::Mat_<float> output(img.rows,img.cols); // create new mat with same size as source image
output = 0;
// creating a kernel (here Gaussian blur)
cv::Mat_<float> kernel(5,5);
kernel << 1,4,6,4,1,4,16,24,16,4,6,24,36,24,6,4,16,24,16,4,1,4,6,4,1;
int kCenterX = kernel.cols/2;
int kCenterY = kernel.rows/2;
for (int i = 0; i < img.rows; i++){ // for every row in image
for (int j = 0; j < img.cols; j++){ // for every column in image
for (int m = 0; m < kernel.rows; m++){ // for every row of kernel
int mm = kernel.rows - 1 -m; // row index of flipped kernel
for (int n = 0; n < kernel.cols; n++){ // for every column of kernel
int nn = kernel.cols - 1 -n; // column index of flipped kernel
// index for border handling
int ii = i + (m - kCenterY);
int jj = j + (n - kCenterX);
// checking if sample is still in bound of input image
// and if not, treat those pixels as 0 (because they won't get added to sum)
if (ii >= 0 && ii < img.rows && jj >= 0 && jj < img.cols)
output.at<float>(i,j) += img.at<float>(ii,jj) * kernel.at<float>(mm,nn);
}
}
}
}
// normalize input and output image (might be wrong, but I don't know how else I can see float images
cv::normalize(output, output, 0, 1, cv::NORM_MINMAX);
cv::normalize(img, img, 0, 1, cv::NORM_MINMAX);
// display images
cv::imshow("Original", img);
cv::imshow("Convolution", output);
cv::waitKey(0);
return 0;
}
Given a binary image, I want to return the list of indices for white pixels in it using GPU (Compute Unified Device Architecture). How to determine the index for points vector?
Here is the CUDA Kernel .
//copy only active pixel locations
__global__ void get_white_pixels_kernel(unsigned char* bin_image,
float * points,
int width,
int height,
int grayWidthStep)
{
int row_index = threadIdx.y+ blockIdx.y*blockDim.y;
int col_index = threadIdx.x+blockIdx.x*blockDim.x;
if ((col_index < width) && (row_index < height))
{
//Location of gray pixel in output
const int gray_tid = row_index * grayWidthStep + col_index;
if(input[gray_tid]==255)
points[--here is the index]= Point2f(row_index,col_index);
}
}
Following is a naive method to achieve the desired functionality:
Generate a mask of pixel indices with dummy values for pixel with zero value.
Count the number of non-zero pixels
Create an output vector with length equal to non-zero count.
Copy the non-zero pixel indices from the generated mask to the output vector (a process known as stream-compaction)
Following is a sample code for the above mentioned process.
Code
#include <cstdio>
#include <vector>
#include <cuda_runtime.h>
#include <thrust/count.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/execution_policy.h>
#include <opencv2/opencv.hpp>
static void _check_err(cudaError_t err, const char* file, int line)
{
if(err)
{
const char* err_str = cudaGetErrorString(err);
printf("CUDA Error: %s\nFile: %s\nLine: %d\n", err_str, file, line);
exit(EXIT_FAILURE);
}
}
#define CHECK_ERR(err) _check_err((err), __FILE__, __LINE__)
__global__ void kernel_find_indices(const unsigned char* input, int width, int height, int step, int2* indices)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if(x < width && y < height)
{
const int tidPixel = y * step + x;
const int tidIndex = y * width + x;
unsigned char value = input[tidPixel];
int2 index_to_write;
if(value)
{
//Write actual index to pixels with non-zero value
index_to_write.x = x;
index_to_write.y = y;
}
else
{
//Write dummy index to pixels with zero value
index_to_write.x = -1;
index_to_write.y = -1;
}
indices[tidIndex] = index_to_write;
}
}
//Operator to check whether an index is of a non-zero pixel
struct isNonZeroIndex
{
__host__ __device__ bool operator()(const int2 &idx)
{
return (idx.x != -1) && (idx.y != -1);
}
};
std::vector<cv::Point> getIndicesOfNonZeroPixels(cv::Mat input)
{
std::vector<int2> output_int2;
std::vector<cv::Point> output;
int pixelCount = input.cols * input.rows;
size_t imageBytes= input.step * input.rows;
unsigned char* image_d;
thrust::device_vector<int2> index_buffer_d(pixelCount);
//Allocate device memory for input image
CHECK_ERR(cudaMalloc(&image_d, imageBytes));
//Copy input image to device
CHECK_ERR(cudaMemcpy(image_d, input.ptr(), imageBytes, cudaMemcpyHostToDevice));
dim3 block(16,16);
dim3 grid;
grid.x = (input.cols + block.x - 1) / block.x;
grid.y = (input.rows + block.y - 1) / block.y;
//Generate an index mask with dummy values for indices with zero pixel value
kernel_find_indices<<<grid, block>>>(image_d, input.cols, input.rows, input.step, thrust::raw_pointer_cast(index_buffer_d.data()));
CHECK_ERR(cudaDeviceSynchronize());
int nonZeroCount = thrust::count_if(index_buffer_d.begin(), index_buffer_d.end(), isNonZeroIndex());
//Keep only those indices whose pixel value is non-zero (stream compaction)
thrust::device_vector<int2> compacted(nonZeroCount);
thrust::copy_if(index_buffer_d.begin(), index_buffer_d.end(), compacted.begin(), isNonZeroIndex());
//Copy non-zero pixel indices to host
output_int2.resize(nonZeroCount);
thrust::copy(compacted.begin(), compacted.end(), output_int2.begin());
CHECK_ERR(cudaFree(image_d));
//Convert vector<int2> to vector<cv::Point>
output.resize(nonZeroCount);
for(size_t i=0; i<nonZeroCount; i++)
output[i] = cv::Point(output_int2[i].x, output_int2[i].y);
return output;
}
void run_test()
{
//Generate a sample test image
cv::Mat test = cv::Mat::zeros(100,100, CV_8UC1);
cv::rectangle(test, cv::Rect(5,5,20,20), cv::Scalar::all(255), CV_FILLED);
//Get pixel indices of non-zero pixels
std::vector<cv::Point> indices = getIndicesOfNonZeroPixels(test);
//Display those indices
for(size_t i=0; i<indices.size(); i++)
{
printf("%d, %d\n", indices[i].x, indices[i].y);
}
//Show image
cv::imshow("Sample", test);
cv::waitKey();
}
int main(int argc, char** argv)
{
run_test();
return 0;
}
Compilation Command
nvcc -o nz nz.cu -arch=sm_61 -L/usr/local/lib -lopencv_core
-lopencv_highgui -lopencv_imgproc
Please keep in mind that this code is for image of type 8UC1 (8 bit, single channel) only. You can easily extend it to other data-types as required.
I'm relatively new to C in general and I'm trying to make a small image filter while using pthreads. After a few hours of playing around with pointers and references, it goes through the compiler but then I get a segmentation fault, the code is the following:
#include <iostream>
#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>
using namespace std;
using namespace cv;
#define WIDTH 3
#define HEIGHT 4
#define NUM_THREADS 4
struct readThreadParams{
Mat img;
Mat out;
int yStart;
int xEnd;
int yEnd;
int xRad;
int yRad;
};
//Find average of all pixels in WXH area
uchar getAverage(Mat &img, Mat &out, const float x1, const float y1, const int xRad, const int yRad){
//x1, y1: Pixel position being checked. xRad, yRad: how many pixels are being checked in x and y, relative to starting point.
uchar blue;
uchar green;
uchar red;
Vec3b outColor;
for (int c = 0; c < xRad; c++){
for (int r = 0; r < yRad; r++){
Vec3b intensity = img.at<Vec3b>(r, c);
blue =+ intensity.val[0];
green =+ intensity.val[1];
red =+ intensity.val[2];
}
}
outColor[0] = (blue/(xRad*yRad*4));
outColor[1] = (green/(xRad*yRad*4));
outColor[2] = (red/(xRad*yRad*4));
for (int c = 0; c< xRad; c++){
for (int r = 0; r< yRad; r++)
out.at<Vec3b>(Point(c, r)) = outColor;
}
}
void* parallel_processing_task(void * param){
//This is what each thread should do:
struct readThreadParams *input = (struct readThreadParams*)param;
Mat img = input->img;
Mat out = input->out;
const float yStart = input->yStart;
const float xEnd = input->xEnd;
const float yEnd = input->yEnd;
const float xRad = input->xRad;
const float yRad = input->yRad;
for (int c = 0; c < xEnd; c + xRad){
for (int r=yStart; r < yEnd; r + yRad){
getAverage(img, out, c, r, xRad, yRad);
}
}
}
int main(int argc, char *argv[]){
//prepare variables
pthread_t threads[NUM_THREADS];
void* return_status;
struct readThreadParams input;
int t;
Mat img = imread("image.jpg", IMREAD_COLOR);
int ROWS = img.rows;
int COLS = img.cols;
Mat out(ROWS, COLS, CV_8UC3);
input.img = img;
input.out = out;
input.xEnd = COLS;
input.xRad = WIDTH;
input.yRad = HEIGHT;
double t2 = (double) getTickCount();
for (int r = 0; r<ROWS ; ceil(ROWS/NUM_THREADS)){
input.yStart = r;
input.yEnd = r + ceil(ROWS/NUM_THREADS);
pthread_create(&threads[t], NULL, parallel_processing_task, (void *)&input);
}
for(t=0; t<NUM_THREADS; t++){
pthread_join(threads[t], &return_status);
}
t2 = ((double) getTickCount() - t2) / getTickFrequency();
//print execution time
cout << "Execution time: " << t2 << " s" << endl;
//result image
imwrite("output.png", out);
return(0);
}
I used GDB to find the culprit and managed to get as far as finding out it's on line 107:
pthread_create(&threads[t], NULL, parallel_processing_task, (void *)&input);
At this point, I tried going all over the place to find solutions, I tried the following:
Changing the way I defined the struct, making it receive pointers, which I later found out didn't work.
Changing the way I pass arguments (such as adding or removing
(*void) where it seemed proper), which ended up in a bigger mess of
errors or simply the same error at the end.
Furthermore, being new to this language doesn't really help me out when trying to read the gdb bt output:
#0__pthread_create_2_1(newthread=optimized out>, attr=<optimized out>, start_routine=<optimized out>, arg=<optimized out>) at pthread_create.c:601
#1 0x00011a00 in main(argc=1, argv=0x7efff394) at file.cpp:107
A part of me wants to think the problem is related to the optimized out parts, but looking it up yields no results, or at least, I may not be looking properly.
Any thoughts as to what I may be doing wrong here? I would very much appreciate the help!
You have not initialised t prior to using it in
pthread_create(&threads[t], NULL, parallel_processing_task, (void *)&input);
So this is likely to lead to undefined behaviour as t may be having any value that could make &threads[t] access invalid memory
I am making an application that uses OCR and I am using OpenCV to threshold the image to improve the OCR results, I have gotten pretty good results but I want to know if anyone has any suggestions for improvement.
Here is what I've done so far:
// Convert to grayscale.
cv::cvtColor(cvMat, cvMat, CV_RGB2GRAY);
// Apply adaptive threshold.
cv::adaptiveThreshold(cvMat, cvMat, 255, CV_ADAPTIVE_THRESH_GAUSSIAN_C, CV_THRESH_BINARY, 3, 5);
// Attempt to sharpen the image.
cv::GaussianBlur(cvMat, cvMat, cv::Size(0, 0), 3);
cv::addWeighted(cvMat, 1.5, cvMat, -0.5, 0, cvMat);
Let me know if you have any suggestions to improve results, thanks.
Sample Images:
After:
One of the best algorithms for thresholding problem in the OCR field is sauvola method.You can use the below code.
#ifndef _THRESHOLDER
#define _THRESHOLDER
#include <cv.h>
#include "type.h"
using namespace cv;
enum class BhThresholdMethod{OTSU,NIBLACK,SAUVOLA,WOLFJOLION};
class BhThresholder
{
public :
void doThreshold(InputArray src ,OutputArray dst,const BhThresholdMethod &method);
private:
};
#endif //_THRESHOLDER
thresholder.cpp
#include "stdafx.h"
#define uget(x,y) at<unsigned char>(y,x)
#define uset(x,y,v) at<unsigned char>(y,x)=v;
#define fget(x,y) at<float>(y,x)
#define fset(x,y,v) at<float>(y,x)=v;
// *************************************************************
// glide a window across the image and
// create two maps: mean and standard deviation.
// *************************************************************
//#define BINARIZEWOLF_VERSION "2.3 (February 26th, 2013)"
double calcLocalStats (Mat &im, Mat &map_m, Mat &map_s, int win_x, int win_y) {
double m,s,max_s, sum, sum_sq, foo;
int wxh = win_x / 2;
int wyh = win_y / 2;
int x_firstth = wxh;
int y_lastth = im.rows-wyh-1;
int y_firstth= wyh;
double winarea = win_x*win_y;
max_s = 0;
for (int j = y_firstth ; j<=y_lastth; j++)
{
// Calculate the initial window at the beginning of the line
sum = sum_sq = 0;
for (int wy=0 ; wy<win_y; wy++)
for (int wx=0 ; wx<win_x; wx++) {
foo = im.uget(wx,j-wyh+wy);
sum += foo;
sum_sq += foo*foo;
}
m = sum / winarea;
s = sqrt ((sum_sq - (sum*sum)/winarea)/winarea);
if (s > max_s)
max_s = s;
map_m.fset(x_firstth, j, m);
map_s.fset(x_firstth, j, s);
// Shift the window, add and remove new/old values to the histogram
for (int i=1 ; i <= im.cols -win_x; i++) {
// Remove the left old column and add the right new column
for (int wy=0; wy<win_y; ++wy) {
foo = im.uget(i-1,j-wyh+wy);
sum -= foo;
sum_sq -= foo*foo;
foo = im.uget(i+win_x-1,j-wyh+wy);
sum += foo;
sum_sq += foo*foo;
}
m = sum / winarea;
s = sqrt ((sum_sq - (sum*sum)/winarea)/winarea);
if (s > max_s)
max_s = s;
map_m.fset(i+wxh, j, m);
map_s.fset(i+wxh, j, s);
}
}
return max_s;
}
void NiblackSauvolaWolfJolion (InputArray _src, OutputArray _dst,const BhThresholdMethod &version,int winx, int winy, double k, double dR) {
Mat src = _src.getMat();
Mat dst = _dst.getMat();
double m, s, max_s;
double th=0;
double min_I, max_I;
int wxh = winx/2;
int wyh = winy/2;
int x_firstth= wxh;
int x_lastth = src.cols-wxh-1;
int y_lastth = src.rows-wyh-1;
int y_firstth= wyh;
int mx, my;
// Create local statistics and store them in a double matrices
Mat map_m = Mat::zeros (src.size(), CV_32FC1);
Mat map_s = Mat::zeros (src.size(), CV_32FC1);
max_s = calcLocalStats (src, map_m, map_s, winx, winy);
minMaxLoc(src, &min_I, &max_I);
Mat thsurf (src.size(), CV_32FC1);
// Create the threshold surface, including border processing
// ----------------------------------------------------
for (int j = y_firstth ; j<=y_lastth; j++) {
// NORMAL, NON-BORDER AREA IN THE MIDDLE OF THE WINDOW:
for (int i=0 ; i <= src.cols-winx; i++) {
m = map_m.fget(i+wxh, j);
s = map_s.fget(i+wxh, j);
// Calculate the threshold
switch (version) {
case BhThresholdMethod::NIBLACK:
th = m + k*s;
break;
case BhThresholdMethod::SAUVOLA:
th = m * (1 + k*(s/dR-1));
break;
case BhThresholdMethod::WOLFJOLION:
th = m + k * (s/max_s-1) * (m-min_I);
break;
default:
cerr << "Unknown threshold type in ImageThresholder::surfaceNiblackImproved()\n";
exit (1);
}
thsurf.fset(i+wxh,j,th);
if (i==0) {
// LEFT BORDER
for (int i=0; i<=x_firstth; ++i)
thsurf.fset(i,j,th);
// LEFT-UPPER CORNER
if (j==y_firstth)
for (int u=0; u<y_firstth; ++u)
for (int i=0; i<=x_firstth; ++i)
thsurf.fset(i,u,th);
// LEFT-LOWER CORNER
if (j==y_lastth)
for (int u=y_lastth+1; u<src.rows; ++u)
for (int i=0; i<=x_firstth; ++i)
thsurf.fset(i,u,th);
}
// UPPER BORDER
if (j==y_firstth)
for (int u=0; u<y_firstth; ++u)
thsurf.fset(i+wxh,u,th);
// LOWER BORDER
if (j==y_lastth)
for (int u=y_lastth+1; u<src.rows; ++u)
thsurf.fset(i+wxh,u,th);
}
// RIGHT BORDER
for (int i=x_lastth; i<src.cols; ++i)
thsurf.fset(i,j,th);
// RIGHT-UPPER CORNER
if (j==y_firstth)
for (int u=0; u<y_firstth; ++u)
for (int i=x_lastth; i<src.cols; ++i)
thsurf.fset(i,u,th);
// RIGHT-LOWER CORNER
if (j==y_lastth)
for (int u=y_lastth+1; u<src.rows; ++u)
for (int i=x_lastth; i<src.cols; ++i)
thsurf.fset(i,u,th);
}
cerr << "surface created" << endl;
for (int y=0; y<src.rows; ++y)
for (int x=0; x<src.cols; ++x)
{
if (src.uget(x,y) >= thsurf.fget(x,y))
{
dst.uset(x,y,255);
}
else
{
dst.uset(x,y,0);
}
}
}
void BhThresholder::doThreshold(InputArray _src ,OutputArray _dst,const BhThresholdMethod &method)
{
Mat src = _src.getMat();
int winx = 0;
int winy = 0;
float optK=0.5;
if (winx==0 || winy==0) {
winy = (int) (2.0 * src.rows - 1)/3;
winx = (int) src.cols-1 < winy ? src.cols-1 : winy;
// if the window is too big, than we asume that the image
// is not a single text box, but a document page: set
// the window size to a fixed constant.
if (winx > 100)
winx = winy = 40;
}
// Threshold
_dst.create(src.size(), CV_8UC1);
Mat dst = _dst.getMat();
//medianBlur(src,dst,5);
GaussianBlur(src,dst,Size(5,5),0);
//#define _BH_SHOW_IMAGE
#ifdef _BH_DEBUG
#define _BH_SHOW_IMAGE
#endif
//medianBlur(src,dst,7);
switch (method)
{
case BhThresholdMethod::OTSU :
threshold(dst,dst,128,255,CV_THRESH_OTSU);
break;
case BhThresholdMethod::SAUVOLA :
case BhThresholdMethod::WOLFJOLION :
NiblackSauvolaWolfJolion (src, dst, method, winx, winy, optK, 128);
}
bitwise_not(dst,dst);
#ifdef _BH_SHOW_IMAGE
#undef _BH_SHOW_IMAGE
#endif
}
Here is comparsion table for thresholding methods: http://clweb.csa.iisc.ernet.in/rahulsharma/binarize/set1.php?id=set1%2Fimage00b
A few thoughts:
Since you're starting with a rectangular object that may be viewed at a non-normal angle, use an affine transform to warp the image so that it appears rectangular with right angle corners.
Before the affine transform, you should probably remove barrel distortion (the curviness of the card edges).
Consider using an adaptive threshold rather than a simple global binarization threshold.
If you can find a proper OCR algorithm that doesn't require binary images, use that. Although binarization will work well for black text on a white background, in general binarization presents a lot of problems if you want to achieve high accuracy (i.e., character recognition approaching 98%+ for arbitrary strings of characters)
Try to sample with better resolution.
I am new at OpenCV and I am trying to write a simple code to get the mean of a block size in an image. I wrote the following code, the build is ok, however, the debug is giving me an unhandled exception at memory location. This exception is at the following line:
mean_img.at<double>(i/block_size, j/block_size) = mean_img.at<double>(i/block_size,j/block_size) + new_img.at<double>(i + x, j + y) / (mean);
So, I will be grateful if anyone give me some hints. Thanks in advance and here is the whole code:
#include "opencv2/highgui/highgui.hpp" // Include Libs for OpenCV and Image Processing
#include <opencv2/opencv.hpp> // check that
#include "opencv2/core/core.hpp" // check that
#include <iostream> // Include Libs for C++
#include "opencv2/imgproc/imgproc.hpp" // Include Libs for OpenCV and Image Processing
#include <math.h>
using namespace cv; // namespace parameters not important in OpenCV2.4.6
using namespace std; // namespace parameters not important in OpenCV2.4.6
int main( int argc, const char** argv )
{
/*This part is to compute the parameters(block size, resize parameter) of the new_img*/
int resize_parameter; // resize parameter must be multiplication of 2
resize_parameter = 500;
int block_size; // block parameter must be divisable by of block size
block_size = 50;
if ((resize_parameter % 2) != 0) resize_parameter = resize_parameter - (resize_parameter % 2);
while ((resize_parameter % block_size) != 0) block_size = block_size - 1;
int mean_size = resize_parameter/block_size; // this is the size of the mean matrix
int mean = block_size * block_size; // this no is ti get the mean of every element in the matrix
//int mean_img [mean_size][mean_size] = {}; // the mean image matrix initialized by zero
/*This part is to allocate the array with dynamic size*/
//int** mean_img = new int*[mean_size];
//for(int x = 0; x < mean_size; x++)
//mean_img[x] = new int[mean_size];
/*Then we can use the array*/
/*This part is to fill all the elements of the mean matrix with zeros*/
//memset(mean_img, 0, sizeof(mean_img[0][0]) * mean_size * mean_size);
/*This part is the definition of the matrices that are used for the images*/
Mat mean_img = Mat(mean_size,mean_size,CV_64FC4, cv::Scalar(0)); // define a new matrix with meansize*meansize elements to compute the mean
Mat mean_img_full = Mat(resize_parameter,resize_parameter,CV_64FC4, cv::Scalar(0)); // define a new matrix with resizeparameter*resizeparameter elements to compute the mean
Mat new_img = Mat(resize_parameter,resize_parameter,CV_64FC4); // define a new matrix with resize_parameter*resize_parameter elements
Mat original_img = imread("Desert.JPG", CV_LOAD_IMAGE_GRAYSCALE); //define a new matrix and read the image data in the file "Desert.JPG" and store it in 'original_img'
// notes: the location of the image must be in the same directory of the C++ file
if (original_img.empty()) //check whether the image is loaded or not
{
cout << "Error : Image cannot be loaded..!!" << endl;
//system("pause"); //wait for a key press
return -1;
}
// explicitly specify dsize=dst.size(); fx and fy will be computed from that.
// resize( src matrix, dst matrix, dst.size to get the size of the dst matrix, 0, 0 "to deal with the dst matrix size, may be 0.5 or any fraction from the src size, "AREA,CUBIC,LINEAR")
resize(original_img, new_img, new_img.size(), 0, 0, CV_INTER_AREA);
/*This part is to compute the mean of each block*/
for ( int i = 0; i < resize_parameter; i = i + block_size) // i represents the index of the raw
{
for ( int j = 0; j < resize_parameter; j = j + block_size) // for the blocks in the same raw with different columns
{
for ( int x = 0; x < block_size; x++) // x represents the index of the raw
{
for ( int y = 0; y < block_size; y++) // y represents the index of the column
{
//cout << i ; //cout << "\n"; //cout << j ; //cout << "\n"; //cout << x ; //cout << "\n"; //cout << y ; //cout << "\n";
mean_img.at<double>(i/block_size, j/block_size) = mean_img.at<double>(i/block_size,j/block_size) + new_img.at<double>(i + x, j + y) / (mean);
}
}
}
}
/*This is the end of the part to compute the mean of each block*/
/*This part is to fill all the resize matrix with the mean value*/
for ( int x = 0; x < resize_parameter/block_size; x++) // x represents the index of the raw in the mean matrix
{
for ( int y = 0; y < resize_parameter/block_size; y++) // y represents the index of the column in the mean matrix
{
for ( int i = 0; i < block_size; i++) // i represents the index of the raw in the mean_full matrix
{
for ( int j = 0; j < block_size; j++) // j represents the index of the column in the mean_full matrix
{
mean_img_full.at<double>((x*block_size)+i,(y*block_size)+j) = mean_img.at<double>(x,y);
}
}
}
}
//cout << cv::getBuildInformation() << endl;
/*This is the end of the part to fill all the resize matrix with the mean value*/
namedWindow("OriginalImage", CV_WINDOW_AUTOSIZE); //create a window with the name "OriginalImage"
imshow("OriginalImage", original_img); //display the image which is stored in the 'original_img' in the "OriginalImage" window
namedWindow("NewImage", CV_WINDOW_AUTOSIZE); //create a window with the name "NewImage"
imshow("NewImage", new_img); //display the image which is stored in the 'new_img' in the "NewImage" window
namedWindow("MeanImage", CV_WINDOW_AUTOSIZE); //create a window with the name "MeanImage"
imshow("MeanImage", mean_img); //display the image which is stored in the 'mean_img' in the "MeanImage" window
namedWindow("MeanFullImage", CV_WINDOW_AUTOSIZE); //create a window with the name "MeanFullImage"
imshow("MeanFullImage", mean_img_full); //display the image which is stored in the 'mean_img_full' in the "MeanFullImage" window
waitKey(0); //wait infinite time for a keypress
destroyWindow("OriginalImage"); //destroy the window with the name, "OriginalImage"
destroyWindow("NewImage"); //destroy the window with the name, "NewImage"
destroyWindow("MeanImage"); //destroy the window with the name, "MeanImage"
destroyWindow("MeanFullImage"); //destroy the window with the name, "MeanImage"
return 0;
}
The problem was at the definition of the type of each matrix. It has to be 8 Bits Unsigned Character. It is working now. Thanks a lot ,,,