Correct threshold for HoughCircles() circle detection opencv c++ - c++

I want to detect circles in an image with opencv in c++. I have tried various thresholds for the dp, parm1, and parm2 but I could not find the correct ones.
The radius of the circles is around 40 pixels.
I have added a link to sample images. Which values do I need to use in the HoughCircles() for correct results?
Link to sample images.

You can use following parameter values. These settings are able to detect the circles in your images.
cv::Mat img = cv::imread("75.bmp");
cv::Mat img_gray;
cv::cvtColor(img, img_gray, cv::COLOR_BGR2GRAY);
img_gray.convertTo(img_gray, CV_8UC1);
std::vector<cv::Vec3f> circles;
double minDist = 20;
double dp = 1;
double param1 = 200;
double param2 = 10;
int minRadius = 15;
int maxRadius = 25;
cv::HoughCircles(img_gray, circles, cv::HOUGH_GRADIENT, dp, minDist, param1, param2, minRadius, maxRadius);
if (circles.size() > 0) {
for (size_t current_circle = 0; current_circle < circles.size(); ++current_circle) {
cv::Point center(std::round(circles[current_circle][0]), std::round(circles[current_circle][1]));
int radius = std::round(circles[current_circle][2]);
cv::circle(img, center, radius, cv::Scalar(0, 255, 0), 1);
}
}

Related

Improve Text Binarization / OCR Preprocessing with OpenCV

I am building a scanner feature for my app and binarize the photo of the document with OpenCV:
// convert to greyscale
cv::Mat converted, blurred, blackAndWhite;
converted = cv::Mat(inputMatrix.rows, inputMatrix.cols, CV_8UC1);
cv::cvtColor(inputMatrix, converted, CV_BGR2GRAY );
// remove noise
cv::GaussianBlur(converted, blurred, cvSize(3,3), 0);
// adaptive threshold
cv::adaptiveThreshold(blackAndWhite, blackAndWhite, 255, cv::ADAPTIVE_THRESH_GAUSSIAN_C, cv::THRESH_BINARY, 15 , 9);
The result is okay, but scans from different scanner apps are much better. Especially very small, tiny sized text is much better:
Processed with opencv
Scanned With DropBox
What can I do, to improve my result?
May be the apps are using anti-aliasing to make their binarized output look nicer. To obtain a similar effect, I first tried binarizing the image, but the result didn't look very nice with all the jagged edges. Then I applied pyramid upsampling and then downsampling to the result, and the output was better.
I didn't use adaptive thresholding however. I segmented the text-like regions and processed those regions only, then pasted them to form the final images. It is a kind of local thresholding using the Otsu method or the k-means (using combinations of thr_roi_otsu, thr_roi_kmeans and proc_parts in the code). Below are some results.
Apply Otsu threshold to all text regions, then upsample followed by downsample:
Some text:
Full image:
Upsample input image, apply Otsu threshold to individual text regions, downsample the result:
Some text:
Full image:
/*
apply Otsu threshold to the region in mask
*/
Mat thr_roi_otsu(Mat& mask, Mat& im)
{
Mat bw = Mat::ones(im.size(), CV_8U) * 255;
vector<unsigned char> pixels(countNonZero(mask));
int index = 0;
for (int r = 0; r < mask.rows; r++)
{
for (int c = 0; c < mask.cols; c++)
{
if (mask.at<unsigned char>(r, c))
{
pixels[index++] = im.at<unsigned char>(r, c);
}
}
}
// threshold pixels
threshold(pixels, pixels, 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
// paste pixels
index = 0;
for (int r = 0; r < mask.rows; r++)
{
for (int c = 0; c < mask.cols; c++)
{
if (mask.at<unsigned char>(r, c))
{
bw.at<unsigned char>(r, c) = pixels[index++];
}
}
}
return bw;
}
/*
apply k-means to the region in mask
*/
Mat thr_roi_kmeans(Mat& mask, Mat& im)
{
Mat bw = Mat::ones(im.size(), CV_8U) * 255;
vector<float> pixels(countNonZero(mask));
int index = 0;
for (int r = 0; r < mask.rows; r++)
{
for (int c = 0; c < mask.cols; c++)
{
if (mask.at<unsigned char>(r, c))
{
pixels[index++] = (float)im.at<unsigned char>(r, c);
}
}
}
// cluster pixels by gray level
int k = 2;
Mat data(pixels.size(), 1, CV_32FC1, &pixels[0]);
vector<float> centers;
vector<int> labels(countNonZero(mask));
kmeans(data, k, labels, TermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 10, 1.0), k, KMEANS_PP_CENTERS, centers);
// examine cluster centers to see which pixels are dark
int label0 = centers[0] > centers[1] ? 1 : 0;
// paste pixels
index = 0;
for (int r = 0; r < mask.rows; r++)
{
for (int c = 0; c < mask.cols; c++)
{
if (mask.at<unsigned char>(r, c))
{
bw.at<unsigned char>(r, c) = labels[index++] != label0 ? 255 : 0;
}
}
}
return bw;
}
/*
apply procfn to each connected component in the mask,
then paste the results to form the final image
*/
Mat proc_parts(Mat& mask, Mat& im, Mat (procfn)(Mat&, Mat&))
{
Mat tmp = mask.clone();
vector<vector<Point>> contours;
vector<Vec4i> hierarchy;
findContours(tmp, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, Point(0, 0));
Mat byparts = Mat::ones(im.size(), CV_8U) * 255;
for(int idx = 0; idx >= 0; idx = hierarchy[idx][0])
{
Rect rect = boundingRect(contours[idx]);
Mat msk = mask(rect);
Mat img = im(rect);
// process the rect
Mat roi = procfn(msk, img);
// paste it to the final image
roi.copyTo(byparts(rect));
}
return byparts;
}
int _tmain(int argc, _TCHAR* argv[])
{
Mat im = imread("1.jpg", 0);
// detect text regions
Mat morph;
Mat kernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3));
morphologyEx(im, morph, CV_MOP_GRADIENT, kernel, Point(-1, -1), 1);
// prepare a mask for text regions
Mat bw;
threshold(morph, bw, 0, 255, THRESH_BINARY | THRESH_OTSU);
morphologyEx(bw, bw, CV_MOP_DILATE, kernel, Point(-1, -1), 10);
Mat bw2x, im2x;
pyrUp(bw, bw2x);
pyrUp(im, im2x);
// apply Otsu threshold to all text regions, then upsample followed by downsample
Mat otsu1x = thr_roi_otsu(bw, im);
pyrUp(otsu1x, otsu1x);
pyrDown(otsu1x, otsu1x);
// apply k-means to all text regions, then upsample followed by downsample
Mat kmeans1x = thr_roi_kmeans(bw, im);
pyrUp(kmeans1x, kmeans1x);
pyrDown(kmeans1x, kmeans1x);
// upsample input image, apply Otsu threshold to all text regions, downsample the result
Mat otsu2x = thr_roi_otsu(bw2x, im2x);
pyrDown(otsu2x, otsu2x);
// upsample input image, apply k-means to all text regions, downsample the result
Mat kmeans2x = thr_roi_kmeans(bw2x, im2x);
pyrDown(kmeans2x, kmeans2x);
// apply Otsu threshold to individual text regions, then upsample followed by downsample
Mat otsuparts1x = proc_parts(bw, im, thr_roi_otsu);
pyrUp(otsuparts1x, otsuparts1x);
pyrDown(otsuparts1x, otsuparts1x);
// apply k-means to individual text regions, then upsample followed by downsample
Mat kmeansparts1x = proc_parts(bw, im, thr_roi_kmeans);
pyrUp(kmeansparts1x, kmeansparts1x);
pyrDown(kmeansparts1x, kmeansparts1x);
// upsample input image, apply Otsu threshold to individual text regions, downsample the result
Mat otsuparts2x = proc_parts(bw2x, im2x, thr_roi_otsu);
pyrDown(otsuparts2x, otsuparts2x);
// upsample input image, apply k-means to individual text regions, downsample the result
Mat kmeansparts2x = proc_parts(bw2x, im2x, thr_roi_kmeans);
pyrDown(kmeansparts2x, kmeansparts2x);
return 0;
}

How to detect squares in video with OpenCV?

So I combined squares.cpp with cvBoundingRect.cpp code to detect squares in video. I therefore, had to convert from IplImage to Mat type so that findSquares and drawSquares methods could run (By using cvarrToMat function). But unfortunately, after successful compilation I get this error when running:
OpenCV Error: Assertion failed (j < nsrcs && src[j].depth() == depth) in mixChannels, file /Users/Desktop/opencv-3.0.0-rc1/modules/core/src/convert.cpp, line 1205
libc++abi.dylib: terminating with uncaught exception of type cv::Exception: /Users/Desktop/opencv-3.0.0-rc1/modules/core/src/convert.cpp:1205: error: (-215) j < nsrcs && src[j].depth() == depth in function mixChannels
Abort trap: 6
Here's the code:
#include "opencv2/core/core.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/highgui/highgui.hpp"
#include <iostream>
#include <math.h>
#include <string.h>
using namespace cv;
using namespace std;
int thresh = 50, N = 11;
const char* wndname = "Square Detection Demo";
// finds a cosine of angle between vectors
// from pt0->pt1 and from pt0->pt2
static double angle( Point pt1, Point pt2, Point pt0 )
{
double dx1 = pt1.x - pt0.x;
double dy1 = pt1.y - pt0.y;
double dx2 = pt2.x - pt0.x;
double dy2 = pt2.y - pt0.y;
return (dx1*dx2 + dy1*dy2)/sqrt((dx1*dx1 + dy1*dy1)*(dx2*dx2 + dy2*dy2) + 1e-10);
}
// returns sequence of squares detected on the image.
// the sequence is stored in the specified memory storage
static void findSquares( const Mat& image, vector<vector<Point> >& squares )
{
squares.clear();
Mat pyr, timg, gray0(image.size(), CV_8U), gray;
// down-scale and upscale the image to filter out the noise
pyrDown(image, pyr, Size(image.cols/2, image.rows/2));
pyrUp(pyr, timg, image.size());
vector<vector<Point> > contours;
// find squares in every color plane of the image
for( int c = 0; c < 3; c++ )
{
int ch[] = {c, 0};
mixChannels(&timg, 1, &gray0, 1, ch, 1);
// try several threshold levels
for( int l = 0; l < N; l++ )
{
// hack: use Canny instead of zero threshold level.
// Canny helps to catch squares with gradient shading
if( l == 0 )
{
// apply Canny. Take the upper threshold from slider
// and set the lower to 0 (which forces edges merging)
Canny(gray0, gray, 0, thresh, 5);
// dilate canny output to remove potential
// holes between edge segments
dilate(gray, gray, Mat(), Point(-1,-1));
}
else
{
// apply threshold if l!=0:
// tgray(x,y) = gray(x,y) < (l+1)*255/N ? 255 : 0
gray = gray0 >= (l+1)*255/N;
}
// find contours and store them all as a list
findContours(gray, contours, RETR_LIST, CHAIN_APPROX_SIMPLE);
vector<Point> approx;
// test each contour
for( size_t i = 0; i < contours.size(); i++ )
{
// approximate contour with accuracy proportional
// to the contour perimeter
approxPolyDP(Mat(contours[i]), approx, arcLength(Mat(contours[i]), true)*0.02, true);
// square contours should have 4 vertices after approximation
// relatively large area (to filter out noisy contours)
// and be convex.
// Note: absolute value of an area is used because
// area may be positive or negative - in accordance with the
// contour orientation
if( approx.size() == 4 &&
fabs(contourArea(Mat(approx))) > 1000 &&
isContourConvex(Mat(approx)) )
{
double maxCosine = 0;
for( int j = 2; j < 5; j++ )
{
// find the maximum cosine of the angle between joint edges
double cosine = fabs(angle(approx[j%4], approx[j-2], approx[j-1]));
maxCosine = MAX(maxCosine, cosine);
}
// if cosines of all angles are small
// (all angles are ~90 degree) then write quandrange
// vertices to resultant sequence
if( maxCosine < 0.3 )
squares.push_back(approx);
}
}
}
}
}
// the function draws all the squares in the image
static void drawSquares( Mat& image, const vector<vector<Point> >& squares )
{
for( size_t i = 0; i < squares.size(); i++ )
{
const Point* p = &squares[i][0];
int n = (int)squares[i].size();
polylines(image, &p, &n, 1, true, Scalar(255,0,0), 3, LINE_AA);
}
imshow(wndname, image);
}
CvRect rect;
CvSeq* contours = 0;
CvMemStorage* storage = NULL;
CvCapture *cam;
IplImage *currentFrame, *currentFrame_grey, *differenceImg, *oldFrame_grey;
bool first = true;
int main(int argc, char* argv[])
{
//Create a new movie capture object.
cam = cvCaptureFromCAM(0);
//create storage for contours
storage = cvCreateMemStorage(0);
//capture current frame from webcam
currentFrame = cvQueryFrame(cam);
//Size of the image.
CvSize imgSize;
imgSize.width = currentFrame->width;
imgSize.height = currentFrame->height;
//Images to use in the program.
currentFrame_grey = cvCreateImage( imgSize, IPL_DEPTH_8U, 1);
namedWindow( wndname, 1 );
vector<vector<Point> > squares;
while(1)
{
currentFrame = cvQueryFrame( cam );
if( !currentFrame ) break;
//Convert the image to grayscale.
cvCvtColor(currentFrame,currentFrame_grey,CV_RGB2GRAY);
if(first) //Capturing Background for the first time
{
differenceImg = cvCloneImage(currentFrame_grey);
oldFrame_grey = cvCloneImage(currentFrame_grey);
cvConvertScale(currentFrame_grey, oldFrame_grey, 1.0, 0.0);
first = false;
continue;
}
//Minus the current frame from the moving average.
cvAbsDiff(oldFrame_grey,currentFrame_grey,differenceImg);
//bluring the differnece image
cvSmooth(differenceImg, differenceImg, CV_BLUR);
//apply threshold to discard small unwanted movements
cvThreshold(differenceImg, differenceImg, 25, 255, CV_THRESH_BINARY);
//find contours
cv::Mat diffImg = cv::cvarrToMat(differenceImg);
cv::Mat currFrame = cv::cvarrToMat(currentFrame);
findSquares(diffImg, squares);
//draw bounding box around each contour
drawSquares(currFrame, squares);
//display colour image with bounding box
cvShowImage("Output Image", currentFrame);
//display threshold image
cvShowImage("Difference image", differenceImg);
//New Background
cvConvertScale(currentFrame_grey, oldFrame_grey, 1.0, 0.0);
//clear memory and contours
cvClearMemStorage( storage );
contours = 0;
//press Esc to exit
char c = cvWaitKey(33);
if( c == 27 ) break;
}
// Destroy the image & movies objects
cvReleaseImage(&oldFrame_grey);
cvReleaseImage(&differenceImg);
cvReleaseImage(&currentFrame);
cvReleaseImage(&currentFrame_grey);
return 0;
}
As the error message says, your problem is in cv::mixChannels(). See documentation.
Or you could simply do something like
cv::Mat channels[3];
cv::split(multiChannelImage, channels);
and then access each channel using
cv::Mat currChannel = channels[channelNumber]

Calculating skew of text OpenCV

I am trying to calculate the skew of text in an image so I can correct it for the best OCR results.
Currently this is the function I am using:
double compute_skew(Mat &img)
{
// Binarize
cv::threshold(img, img, 225, 255, cv::THRESH_BINARY);
// Invert colors
cv::bitwise_not(img, img);
cv::Mat element = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(5, 3));
cv::erode(img, img, element);
std::vector<cv::Point> points;
cv::Mat_<uchar>::iterator it = img.begin<uchar>();
cv::Mat_<uchar>::iterator end = img.end<uchar>();
for (; it != end; ++it)
if (*it)
points.push_back(it.pos());
cv::RotatedRect box = cv::minAreaRect(cv::Mat(points));
double angle = box.angle;
if (angle < -45.)
angle += 90.;
cv::Point2f vertices[4];
box.points(vertices);
for(int i = 0; i < 4; ++i)
cv::line(img, vertices[i], vertices[(i + 1) % 4], cv::Scalar(255, 0, 0), 1, CV_AA);
return angle;
}
When I look at then angle in debug I get 0.000000
However when I give it this image I get proper results of a skew of about 16 degrees:
How can I properly detect the skew in the first image?
there are a few other ways to get the skew degree, 1) by hough transform 2) by horizontal projection profile. rotate the image in different angle bins and calculate horizontal projection. the angle with the greatest horizontal histogram value is the deskewed angle.
i have provided below implementation of 1). i believe this to be superior to the boxing method you are using because it requires that you completely clean the image of any noise,which just isnt possible in most of the time.
you should know that the method doesnt work well if there's too much noise. you can reduce noise in different ways depending on what type of "line" you want to treat as the most dominant in the image. i have provided two methods for this. be sure to play with parameters and threshold etc.
results (all run using preprocess2, all run using same parameter set)
code
#include <opencv2/opencv.hpp>
using namespace cv;
using namespace std;
void hough_transform(Mat& im,Mat& orig,double* skew)
{
double max_r=sqrt(pow(.5*im.cols,2)+pow(.5*im.rows,2));
int angleBins = 180;
Mat acc = Mat::zeros(Size(2*max_r,angleBins),CV_32SC1);
int cenx = im.cols/2;
int ceny = im.rows/2;
for(int x=1;x<im.cols-1;x++)
{
for(int y=1;y<im.rows-1;y++)
{
if(im.at<uchar>(y,x)==255)
{
for(int t=0;t<angleBins;t++)
{
double r =(x-cenx)*cos((double)t/angleBins*CV_PI)+(y-ceny)*sin((double)t /angleBins*CV_PI);
r+=max_r;
acc.at<int>(t,int(r))++;
}
}
}
}
Mat thresh;
normalize(acc,acc,255,0,NORM_MINMAX);
convertScaleAbs(acc,acc);
/*debug
Mat cmap;
applyColorMap(acc,cmap,COLORMAP_JET);
imshow("cmap",cmap);
imshow("acc",acc);*/
Point maxLoc;
minMaxLoc(acc,0,0,0,&maxLoc);
double theta = (double)maxLoc.y/angleBins*CV_PI;
double rho = maxLoc.x-max_r;
if(abs(sin(theta))<0.000001)//check vertical
{
//when vertical, line equation becomes
//x = rho
double m = -cos(theta)/sin(theta);
Point2d p1 = Point2d(rho+im.cols/2,0);
Point2d p2 = Point2d(rho+im.cols/2,im.rows);
line(orig,p1,p2,Scalar(0,0,255),1);
*skew=90;
cout<<"skew angle "<<" 90"<<endl;
}else
{
//convert normal form back to slope intercept form
//y = mx + b
double m = -cos(theta)/sin(theta);
double b = rho/sin(theta)+im.rows/2.-m*im.cols/2.;
Point2d p1 = Point2d(0,b);
Point2d p2 = Point2d(im.cols,im.cols*m+b);
line(orig,p1,p2,Scalar(0,0,255),1);
double skewangle;
skewangle= p1.x-p2.x>0? (atan2(p1.y-p2.y,p1.x-p2.x)*180./CV_PI):(atan2(p2.y-p1.y,p2. x-p1.x)*180./CV_PI);
*skew=skewangle;
cout<<"skew angle "<<skewangle<<endl;
}
imshow("orig",orig);
}
Mat preprocess1(Mat& im)
{
Mat ret = Mat::zeros(im.size(),CV_32SC1);
for(int x=1;x<im.cols-1;x++)
{
for(int y=1;y<im.rows-1;y++)
{
int gy = (im.at<uchar>(y-1,x+1)-im.at<uchar>(y-1,x-1))
+2*(im.at<uchar>(y,x+1)-im.at<uchar>(y,x-1))
+(im.at<uchar>(y+1,x+1)-im.at<uchar>(y+1,x-1));
int gx = (im.at<uchar>(y+1,x-1) -im.at<uchar>(y-1,x-1))
+2*(im.at<uchar>(y+1,x)-im.at<uchar>(y-1,x))
+(im.at<uchar>(y+1,x+1)-im.at<uchar>(y-1,x+1));
int g2 = (gy*gy + gx*gx);
ret.at<int>(y,x)=g2;
}
}
normalize(ret,ret,255,0,NORM_MINMAX);
ret.convertTo(ret,CV_8UC1);
threshold(ret,ret,50,255,THRESH_BINARY);
return ret;
}
Mat preprocess2(Mat& im)
{
// 1) assume white on black and does local thresholding
// 2) only allow voting top is white and buttom is black(buttom text line)
Mat thresh;
//thresh=255-im;
thresh=im.clone();
adaptiveThreshold(thresh,thresh,255,CV_ADAPTIVE_THRESH_GAUSSIAN_C,THRESH_BINARY,15,-2);
Mat ret = Mat::zeros(im.size(),CV_8UC1);
for(int x=1;x<thresh.cols-1;x++)
{
for(int y=1;y<thresh.rows-1;y++)
{
bool toprowblack = thresh.at<uchar>(y-1,x)==0 || thresh.at<uchar>(y-1,x-1)==0 || thresh.at<uchar>(y-1,x+1)==0;
bool belowrowblack = thresh.at<uchar>(y+1,x)==0 || thresh.at<uchar>(y+1, x-1)==0 || thresh.at<uchar>(y+1,x+1)==0;
uchar pix=thresh.at<uchar>(y,x);
if((!toprowblack && pix==255 && belowrowblack))
{
ret.at<uchar>(y,x) = 255;
}
}
}
return ret;
}
Mat rot(Mat& im,double thetaRad)
{
cv::Mat rotated;
double rskew = thetaRad* CV_PI/180;
double nw = abs(sin(thetaRad))*im.rows+abs(cos(thetaRad))*im.cols;
double nh = abs(cos(thetaRad))*im.rows+abs(sin(thetaRad))*im.cols;
cv::Mat rot_mat = cv::getRotationMatrix2D(Point2d(nw*.5,nh*.5), thetaRad*180/CV_PI, 1);
Mat pos = Mat::zeros(Size(1,3),CV_64FC1);
pos.at<double>(0)=(nw-im.cols)*.5;
pos.at<double>(1)=(nh-im.rows)*.5;
Mat res = rot_mat*pos;
rot_mat.at<double>(0,2) += res.at<double>(0);
rot_mat.at<double>(1,2) += res.at<double>(1);
cv::warpAffine(im, rotated, rot_mat,Size(nw,nh), cv::INTER_LANCZOS4);
return rotated;
}
int main(int argc, char** argv)
{
string src="C:/data/skew.png";
Mat im= imread(src);
Mat gray;
cvtColor(im,gray,CV_BGR2GRAY);
Mat preprocessed = preprocess2(gray);
imshow("preprocessed2",preprocessed);
double skew;
hough_transform(preprocessed,im,&skew);
Mat rotated = rot(im,skew* CV_PI/180);
imshow("corrected",rotated);
waitKey(0);
return 0;
}
the approach you posted has its own "ideal binarization" assumption. the threshold value directly affects the process. utilize otsu threshold, or think about DFT for a generic solution.
otsu trial:
int main()
{
Mat input = imread("your text");
cvtColor(input, input, CV_BGR2GRAY);
Mat img;
cv::threshold(input, img, 100, 255, cv::THRESH_OTSU);
cv::bitwise_not(img, img);
imshow("img ", img);
waitKey(0);
vector<Point> points;
findNonZero(img, points);
cv::RotatedRect box = cv::minAreaRect(points);
double angle = box.angle;
if (angle < -45.)
angle += 90.;
cv::Point2f vertices[4];
box.points(vertices);
for(int i = 0; i < 4; ++i)
cv::line(img, vertices[i], vertices[(i + 1) % 4], cv::Scalar(255, 0, 0));
imshow("img ", img);
waitKey(0);
return 0;
}

OpenCV C++/Obj-C: Advanced square detection

A while ago I asked a question about square detection and karlphillip came up with a decent result.
Now I want to take this a step further and find squares which edge aren't fully visible. Take a look at this example:
Any ideas? I'm working with karlphillips code:
void find_squares(Mat& image, vector<vector<Point> >& squares)
{
// blur will enhance edge detection
Mat blurred(image);
medianBlur(image, blurred, 9);
Mat gray0(blurred.size(), CV_8U), gray;
vector<vector<Point> > contours;
// find squares in every color plane of the image
for (int c = 0; c < 3; c++)
{
int ch[] = {c, 0};
mixChannels(&blurred, 1, &gray0, 1, ch, 1);
// try several threshold levels
const int threshold_level = 2;
for (int l = 0; l < threshold_level; l++)
{
// Use Canny instead of zero threshold level!
// Canny helps to catch squares with gradient shading
if (l == 0)
{
Canny(gray0, gray, 10, 20, 3); //
// Dilate helps to remove potential holes between edge segments
dilate(gray, gray, Mat(), Point(-1,-1));
}
else
{
gray = gray0 >= (l+1) * 255 / threshold_level;
}
// Find contours and store them in a list
findContours(gray, contours, CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE);
// Test contours
vector<Point> approx;
for (size_t i = 0; i < contours.size(); i++)
{
// approximate contour with accuracy proportional
// to the contour perimeter
approxPolyDP(Mat(contours[i]), approx, arcLength(Mat(contours[i]), true)*0.02, true);
// Note: absolute value of an area is used because
// area may be positive or negative - in accordance with the
// contour orientation
if (approx.size() == 4 &&
fabs(contourArea(Mat(approx))) > 1000 &&
isContourConvex(Mat(approx)))
{
double maxCosine = 0;
for (int j = 2; j < 5; j++)
{
double cosine = fabs(angle(approx[j%4], approx[j-2], approx[j-1]));
maxCosine = MAX(maxCosine, cosine);
}
if (maxCosine < 0.3)
squares.push_back(approx);
}
}
}
}
}
You might try using HoughLines to detect the four sides of the square. Next, locate the four resulting line intersections to detect the corners. The Hough transform is fairly robust to noise and occlusions, so it could be useful here. Also, here is an interactive demo showing how the Hough transform works (I thought it was cool at least :). Here is one of my previous answers that detects a laser cross center showing most of the same math (except it just finds a single corner).
You will probably have multiple lines on each side, but locating the intersections should help to determine the inliers vs. outliers. Once you've located candidate corners, you can also filter these candidates by area or how "square-like" the polygon is.
EDIT : All these answers with code and images were making me think my answer was a bit lacking :) So, here is an implementation of how you could do this:
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <iostream>
#include <vector>
using namespace cv;
using namespace std;
Point2f computeIntersect(Vec2f line1, Vec2f line2);
vector<Point2f> lineToPointPair(Vec2f line);
bool acceptLinePair(Vec2f line1, Vec2f line2, float minTheta);
int main(int argc, char* argv[])
{
Mat occludedSquare = imread("Square.jpg");
resize(occludedSquare, occludedSquare, Size(0, 0), 0.25, 0.25);
Mat occludedSquare8u;
cvtColor(occludedSquare, occludedSquare8u, CV_BGR2GRAY);
Mat thresh;
threshold(occludedSquare8u, thresh, 200.0, 255.0, THRESH_BINARY);
GaussianBlur(thresh, thresh, Size(7, 7), 2.0, 2.0);
Mat edges;
Canny(thresh, edges, 66.0, 133.0, 3);
vector<Vec2f> lines;
HoughLines( edges, lines, 1, CV_PI/180, 50, 0, 0 );
cout << "Detected " << lines.size() << " lines." << endl;
// compute the intersection from the lines detected...
vector<Point2f> intersections;
for( size_t i = 0; i < lines.size(); i++ )
{
for(size_t j = 0; j < lines.size(); j++)
{
Vec2f line1 = lines[i];
Vec2f line2 = lines[j];
if(acceptLinePair(line1, line2, CV_PI / 32))
{
Point2f intersection = computeIntersect(line1, line2);
intersections.push_back(intersection);
}
}
}
if(intersections.size() > 0)
{
vector<Point2f>::iterator i;
for(i = intersections.begin(); i != intersections.end(); ++i)
{
cout << "Intersection is " << i->x << ", " << i->y << endl;
circle(occludedSquare, *i, 1, Scalar(0, 255, 0), 3);
}
}
imshow("intersect", occludedSquare);
waitKey();
return 0;
}
bool acceptLinePair(Vec2f line1, Vec2f line2, float minTheta)
{
float theta1 = line1[1], theta2 = line2[1];
if(theta1 < minTheta)
{
theta1 += CV_PI; // dealing with 0 and 180 ambiguities...
}
if(theta2 < minTheta)
{
theta2 += CV_PI; // dealing with 0 and 180 ambiguities...
}
return abs(theta1 - theta2) > minTheta;
}
// the long nasty wikipedia line-intersection equation...bleh...
Point2f computeIntersect(Vec2f line1, Vec2f line2)
{
vector<Point2f> p1 = lineToPointPair(line1);
vector<Point2f> p2 = lineToPointPair(line2);
float denom = (p1[0].x - p1[1].x)*(p2[0].y - p2[1].y) - (p1[0].y - p1[1].y)*(p2[0].x - p2[1].x);
Point2f intersect(((p1[0].x*p1[1].y - p1[0].y*p1[1].x)*(p2[0].x - p2[1].x) -
(p1[0].x - p1[1].x)*(p2[0].x*p2[1].y - p2[0].y*p2[1].x)) / denom,
((p1[0].x*p1[1].y - p1[0].y*p1[1].x)*(p2[0].y - p2[1].y) -
(p1[0].y - p1[1].y)*(p2[0].x*p2[1].y - p2[0].y*p2[1].x)) / denom);
return intersect;
}
vector<Point2f> lineToPointPair(Vec2f line)
{
vector<Point2f> points;
float r = line[0], t = line[1];
double cos_t = cos(t), sin_t = sin(t);
double x0 = r*cos_t, y0 = r*sin_t;
double alpha = 1000;
points.push_back(Point2f(x0 + alpha*(-sin_t), y0 + alpha*cos_t));
points.push_back(Point2f(x0 - alpha*(-sin_t), y0 - alpha*cos_t));
return points;
}
NOTE : The main reason I resized the image was so I could see it on my screen, and speed-up processing.
Canny
This uses Canny edge detection to help greatly reduce the number of lines detected after thresholding.
Hough transform
Then the Hough transform is used to detect the sides of the square.
Intersections
Finally, we compute the intersections of all the line pairs.
Hope that helps!
I tried to use convex hull method which is pretty simple.
Here you find convex hull of the contour detected. It removes the convexity defects at the bottom of paper.
Below is the code (in OpenCV-Python):
import cv2
import numpy as np
img = cv2.imread('sof.jpg')
img = cv2.resize(img,(500,500))
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(gray,127,255,0)
contours,hier = cv2.findContours(thresh,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
for cnt in contours:
if cv2.contourArea(cnt)>5000: # remove small areas like noise etc
hull = cv2.convexHull(cnt) # find the convex hull of contour
hull = cv2.approxPolyDP(hull,0.1*cv2.arcLength(hull,True),True)
if len(hull)==4:
cv2.drawContours(img,[hull],0,(0,255,0),2)
cv2.imshow('img',img)
cv2.waitKey(0)
cv2.destroyAllWindows()
(Here, i haven't found square in all planes. Do it yourself if you want.)
Below is the result i got:
I hope this is what you needed.
1st: start experimenting with threshold techniques to isolate the white paper sheet from the rest of the image. This is a simple way:
Mat new_img = imread(argv[1]);
double thres = 200;
double color = 255;
threshold(new_img, new_img, thres, color, CV_THRESH_BINARY);
imwrite("thres.png", new_img);
but there are other alternatives that could provide better result. One is to investigate inRange(), and another is to detect through color by converting the image to the HSV color space.
This thread also provides an interest discussion on the subject.
2nd: after you execute one of this procedures, you could try to feed the result directly into find_squares():
An alternative to find_squares() is to implement the bounding box technique, which has the potential to provide a more accurate detection of the rectangular area (provided that you have a perfect result of threshold). I've used it here and here. It's worth noting that OpenCV has it's own bounding box tutorial.
Another approach besides find_squares(), as pointed by Abid on his answer, is to use the convexHull method. Check OpenCV's C++ tutorial on this method for code.
convert to lab space
use kmeans for 2 clusters
detect suqares one internal cluster it will solve many thing in the rgb space

OpenCv 2.3 C - How to isolate object inside image

i have an image like:
i want to remove the black rows and cols round the number.
So i want that the result is:
i try this:
void findX(IplImage* imgSrc,int* min, int* max){
int i;
int minFound=0;
CvMat data;
CvScalar maxVal=cvRealScalar(imgSrc->width * 255);
CvScalar val=cvRealScalar(0);
//For each col sum, if sum < width*255 then we find the min
//then continue to end to search the max, if sum< width*255 then is new max
for (i=0; i< imgSrc->width; i++){
cvGetCol(imgSrc, &data, i);
val= cvSum(&data);
if(val.val[0] < maxVal.val[0]){
*max= i;
if(!minFound){
*min= i;
minFound= 1;
}
}
}
}
void findY(IplImage* imgSrc,int* min, int* max){
int i;
int minFound=0;
CvMat data;
CvScalar maxVal=cvRealScalar(imgSrc->width * 255);
CvScalar val=cvRealScalar(0);
//For each col sum, if sum < width*255 then we find the min
//then continue to end to search the max, if sum< width*255 then is new max
for (i=0; i< imgSrc->height; i++){
cvGetRow(imgSrc, &data, i);
val= cvSum(&data);
if(val.val[0] < maxVal.val[0]){
*max=i;
if(!minFound){
*min= i;
minFound= 1;
}
}
}
}
CvRect findBB(IplImage* imgSrc){
CvRect aux;
int xmin, xmax, ymin, ymax;
xmin=xmax=ymin=ymax=0;
findX(imgSrc, &xmin, &xmax);
findY(imgSrc, &ymin, &ymax);
aux=cvRect(xmin, ymin, xmax-xmin, ymax-ymin);
//printf("BB: %d,%d - %d,%d\n", aux.x, aux.y, aux.width, aux.height);
return aux;
}
So i use:
IplImage *my_image = cvLoad....
CvRect bb = findBB(my_image);
IplImage *new_image = cvCreateImage(cvSize(bb.width,bb.height), my_image->depth, 1);
cvShowImage("test",new_image);
it doesn't work good, cause i try to check if in new image there are black rows or cols and they are present. what can i do? can someone help me? (sorry for my english!)
One way to do it is to simply execute the bounding box technique to detect the digit, as illustrated by the image below:
Since your image is already processed the bounding box technique I use is a lot simpler.
After that procedure, all you really need to do is set the ROI (Region of Interest) of the original image to the area defined by the box to achieve the crop effect and isolate the object:
Notice that in the resulting image there is one extra row/column of pixels in the border that are not white. Well, they are not black either. That's because I didn't performed any threshold method to binarize the image to black and white. The code below demonstrates the bounding box technique being executed on a grayscale version of the image.
This is pretty much the roadmap to achieve what you want. For educational purposes I'm sharing the code I wrote using the C++ interface of OpenCV. I'm sure you are capable of converting it to the C interface.
#include <cv.h>
#include <highgui.h>
#include <vector>
int main(int argc, char* argv[])
{
cv::Mat img = cv::imread(argv[1]);
// Convert RGB Mat to GRAY
cv::Mat gray;
cv::cvtColor(img, gray, CV_BGR2GRAY);
// Store the set of points in the image before assembling the bounding box
std::vector<cv::Point> points;
cv::Mat_<uchar>::iterator it = gray.begin<uchar>();
cv::Mat_<uchar>::iterator end = gray.end<uchar>();
for (; it != end; ++it)
{
if (*it) points.push_back(it.pos());
}
// Compute minimal bounding box
cv::RotatedRect box = cv::minAreaRect(cv::Mat(points));
// Draw bounding box in the original image (debug purposes)
//cv::Point2f vertices[4];
//box.points(vertices);
//for (int i = 0; i < 4; ++i)
//{
//cv::line(img, vertices[i], vertices[(i + 1) % 4], cv::Scalar(0, 255, 0), 1, CV_AA);
//}
//cv::imshow("box", img);
//cv::imwrite("box.png", img);
// Set Region of Interest to the area defined by the box
cv::Rect roi;
roi.x = box.center.x - (box.size.width / 2);
roi.y = box.center.y - (box.size.height / 2);
roi.width = box.size.width;
roi.height = box.size.height;
// Crop the original image to the defined ROI
cv::Mat crop = img(roi);
cv::imshow("crop", crop);
cv::imwrite("cropped.png", crop);
cvWaitKey(0);
return 0;
}