I am trying to calculate the skew of text in an image so I can correct it for the best OCR results.
Currently this is the function I am using:
double compute_skew(Mat &img)
{
// Binarize
cv::threshold(img, img, 225, 255, cv::THRESH_BINARY);
// Invert colors
cv::bitwise_not(img, img);
cv::Mat element = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(5, 3));
cv::erode(img, img, element);
std::vector<cv::Point> points;
cv::Mat_<uchar>::iterator it = img.begin<uchar>();
cv::Mat_<uchar>::iterator end = img.end<uchar>();
for (; it != end; ++it)
if (*it)
points.push_back(it.pos());
cv::RotatedRect box = cv::minAreaRect(cv::Mat(points));
double angle = box.angle;
if (angle < -45.)
angle += 90.;
cv::Point2f vertices[4];
box.points(vertices);
for(int i = 0; i < 4; ++i)
cv::line(img, vertices[i], vertices[(i + 1) % 4], cv::Scalar(255, 0, 0), 1, CV_AA);
return angle;
}
When I look at then angle in debug I get 0.000000
However when I give it this image I get proper results of a skew of about 16 degrees:
How can I properly detect the skew in the first image?
there are a few other ways to get the skew degree, 1) by hough transform 2) by horizontal projection profile. rotate the image in different angle bins and calculate horizontal projection. the angle with the greatest horizontal histogram value is the deskewed angle.
i have provided below implementation of 1). i believe this to be superior to the boxing method you are using because it requires that you completely clean the image of any noise,which just isnt possible in most of the time.
you should know that the method doesnt work well if there's too much noise. you can reduce noise in different ways depending on what type of "line" you want to treat as the most dominant in the image. i have provided two methods for this. be sure to play with parameters and threshold etc.
results (all run using preprocess2, all run using same parameter set)
code
#include <opencv2/opencv.hpp>
using namespace cv;
using namespace std;
void hough_transform(Mat& im,Mat& orig,double* skew)
{
double max_r=sqrt(pow(.5*im.cols,2)+pow(.5*im.rows,2));
int angleBins = 180;
Mat acc = Mat::zeros(Size(2*max_r,angleBins),CV_32SC1);
int cenx = im.cols/2;
int ceny = im.rows/2;
for(int x=1;x<im.cols-1;x++)
{
for(int y=1;y<im.rows-1;y++)
{
if(im.at<uchar>(y,x)==255)
{
for(int t=0;t<angleBins;t++)
{
double r =(x-cenx)*cos((double)t/angleBins*CV_PI)+(y-ceny)*sin((double)t /angleBins*CV_PI);
r+=max_r;
acc.at<int>(t,int(r))++;
}
}
}
}
Mat thresh;
normalize(acc,acc,255,0,NORM_MINMAX);
convertScaleAbs(acc,acc);
/*debug
Mat cmap;
applyColorMap(acc,cmap,COLORMAP_JET);
imshow("cmap",cmap);
imshow("acc",acc);*/
Point maxLoc;
minMaxLoc(acc,0,0,0,&maxLoc);
double theta = (double)maxLoc.y/angleBins*CV_PI;
double rho = maxLoc.x-max_r;
if(abs(sin(theta))<0.000001)//check vertical
{
//when vertical, line equation becomes
//x = rho
double m = -cos(theta)/sin(theta);
Point2d p1 = Point2d(rho+im.cols/2,0);
Point2d p2 = Point2d(rho+im.cols/2,im.rows);
line(orig,p1,p2,Scalar(0,0,255),1);
*skew=90;
cout<<"skew angle "<<" 90"<<endl;
}else
{
//convert normal form back to slope intercept form
//y = mx + b
double m = -cos(theta)/sin(theta);
double b = rho/sin(theta)+im.rows/2.-m*im.cols/2.;
Point2d p1 = Point2d(0,b);
Point2d p2 = Point2d(im.cols,im.cols*m+b);
line(orig,p1,p2,Scalar(0,0,255),1);
double skewangle;
skewangle= p1.x-p2.x>0? (atan2(p1.y-p2.y,p1.x-p2.x)*180./CV_PI):(atan2(p2.y-p1.y,p2. x-p1.x)*180./CV_PI);
*skew=skewangle;
cout<<"skew angle "<<skewangle<<endl;
}
imshow("orig",orig);
}
Mat preprocess1(Mat& im)
{
Mat ret = Mat::zeros(im.size(),CV_32SC1);
for(int x=1;x<im.cols-1;x++)
{
for(int y=1;y<im.rows-1;y++)
{
int gy = (im.at<uchar>(y-1,x+1)-im.at<uchar>(y-1,x-1))
+2*(im.at<uchar>(y,x+1)-im.at<uchar>(y,x-1))
+(im.at<uchar>(y+1,x+1)-im.at<uchar>(y+1,x-1));
int gx = (im.at<uchar>(y+1,x-1) -im.at<uchar>(y-1,x-1))
+2*(im.at<uchar>(y+1,x)-im.at<uchar>(y-1,x))
+(im.at<uchar>(y+1,x+1)-im.at<uchar>(y-1,x+1));
int g2 = (gy*gy + gx*gx);
ret.at<int>(y,x)=g2;
}
}
normalize(ret,ret,255,0,NORM_MINMAX);
ret.convertTo(ret,CV_8UC1);
threshold(ret,ret,50,255,THRESH_BINARY);
return ret;
}
Mat preprocess2(Mat& im)
{
// 1) assume white on black and does local thresholding
// 2) only allow voting top is white and buttom is black(buttom text line)
Mat thresh;
//thresh=255-im;
thresh=im.clone();
adaptiveThreshold(thresh,thresh,255,CV_ADAPTIVE_THRESH_GAUSSIAN_C,THRESH_BINARY,15,-2);
Mat ret = Mat::zeros(im.size(),CV_8UC1);
for(int x=1;x<thresh.cols-1;x++)
{
for(int y=1;y<thresh.rows-1;y++)
{
bool toprowblack = thresh.at<uchar>(y-1,x)==0 || thresh.at<uchar>(y-1,x-1)==0 || thresh.at<uchar>(y-1,x+1)==0;
bool belowrowblack = thresh.at<uchar>(y+1,x)==0 || thresh.at<uchar>(y+1, x-1)==0 || thresh.at<uchar>(y+1,x+1)==0;
uchar pix=thresh.at<uchar>(y,x);
if((!toprowblack && pix==255 && belowrowblack))
{
ret.at<uchar>(y,x) = 255;
}
}
}
return ret;
}
Mat rot(Mat& im,double thetaRad)
{
cv::Mat rotated;
double rskew = thetaRad* CV_PI/180;
double nw = abs(sin(thetaRad))*im.rows+abs(cos(thetaRad))*im.cols;
double nh = abs(cos(thetaRad))*im.rows+abs(sin(thetaRad))*im.cols;
cv::Mat rot_mat = cv::getRotationMatrix2D(Point2d(nw*.5,nh*.5), thetaRad*180/CV_PI, 1);
Mat pos = Mat::zeros(Size(1,3),CV_64FC1);
pos.at<double>(0)=(nw-im.cols)*.5;
pos.at<double>(1)=(nh-im.rows)*.5;
Mat res = rot_mat*pos;
rot_mat.at<double>(0,2) += res.at<double>(0);
rot_mat.at<double>(1,2) += res.at<double>(1);
cv::warpAffine(im, rotated, rot_mat,Size(nw,nh), cv::INTER_LANCZOS4);
return rotated;
}
int main(int argc, char** argv)
{
string src="C:/data/skew.png";
Mat im= imread(src);
Mat gray;
cvtColor(im,gray,CV_BGR2GRAY);
Mat preprocessed = preprocess2(gray);
imshow("preprocessed2",preprocessed);
double skew;
hough_transform(preprocessed,im,&skew);
Mat rotated = rot(im,skew* CV_PI/180);
imshow("corrected",rotated);
waitKey(0);
return 0;
}
the approach you posted has its own "ideal binarization" assumption. the threshold value directly affects the process. utilize otsu threshold, or think about DFT for a generic solution.
otsu trial:
int main()
{
Mat input = imread("your text");
cvtColor(input, input, CV_BGR2GRAY);
Mat img;
cv::threshold(input, img, 100, 255, cv::THRESH_OTSU);
cv::bitwise_not(img, img);
imshow("img ", img);
waitKey(0);
vector<Point> points;
findNonZero(img, points);
cv::RotatedRect box = cv::minAreaRect(points);
double angle = box.angle;
if (angle < -45.)
angle += 90.;
cv::Point2f vertices[4];
box.points(vertices);
for(int i = 0; i < 4; ++i)
cv::line(img, vertices[i], vertices[(i + 1) % 4], cv::Scalar(255, 0, 0));
imshow("img ", img);
waitKey(0);
return 0;
}
Related
I'm trying to use cv::warpAffine to perform image transformations, but I have an issue.
Note: I already saw questions/46998895 and it works well too, but with no interpolation and I wanted to use affine matrixes.
So I want to shear the image and the result goes in a bigger image to have the entire sheared image like this:
Right now, my idea was to perform the shear on inverted image if the values are negatives rather than calculate the new image size with negative values etc as seen on paperspace blog.
When we use a negative shear, the direction of shear is right to left, while x2 is not further in the negative direction than x1. One way to solve this could be to get the other set of corners (that would satisfy the constraint, can you prove it?). Apply the shear transformation and then change to the other set of corners because of the notation we follow.
Well, we could do that, but there's a better method. Here's how how perform a negative shear with shearing factor -alpha.
Flip the image and boxes horizontally.
Apply the positive shear transformation with shearing factor alpha
Flip the image and boxes horizontally again.
The left ones are my results, and the right ones are the expected results :
So as you can see it works well when x and y are both positive values or when one is 0 and the other is negative, its ok. When one is positive and the other is negative, or when they are both negative, it provides always the same result for my shear function, see the 4 last lines in the pic above. Again, the left forms are my results, the right ones are the mickaShear() results (and obviously the correct ones).
My question is: what do I have to do in my shear_matrix to perform the correct transformation with negative values? Is the cv::flip() method described by paperSpace the right way to achieve this transformation?
Here you can find the reproductible code with the comparison between my function and the function in this answer. You will need OpenCV. I work on OpenCV 4.5.1
#include <opencv2/core/mat.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/features2d.hpp>
#include <opencv2/xfeatures2d.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/core.hpp>
#include <iostream>
cv::Mat myShear(const cv::Mat & src, float sx, float sy) {
cv::Mat tmp;
cv::Mat dst;
std::vector<cv::Point2f> extremePoints;
extremePoints.emplace_back(cv::Point2f(0, 0));
extremePoints.emplace_back(cv::Point2f((float)src.cols, 0));
extremePoints.emplace_back(cv::Point2f((float)src.cols, (float)src.rows));
extremePoints.emplace_back(cv::Point2f(0, (float)src.rows));
for(auto & pt : extremePoints){
pt = cv::Point2f(pt.x + pt.y * abs(sx), pt.y + pt.x * abs(sy));
}
cv::Rect offsets = cv::boundingRect(extremePoints);
cv::Size new_size = offsets.size();
float mat_values[] = {1.0f, abs(sx), 0.0f, abs(sy), 1.0f, 0.0f};
cv::Mat shear_matrix = cv::Mat(2, 3, CV_32F, mat_values);
dst = cv::Mat::zeros(new_size, src.type());
/*
*cv::flip(img, tmp, INT_FLIP_CODE) where INT_FLIP_CODE can be:
* 0 (Vertically)
* 1 (Horizontally)
* -1 (Both)
*/
if(sx < 0.0f and sy < 0.0f) {
cv::flip(img, tmp, -1);
cv::warpAffine(tmp, dst, shear_matrix, new_size, cv::INTER_LINEAR);
cv::flip(dst, dst, -1);
} else if(sx < 0.0f) {
cv::flip(img, tmp, 1);
cv::warpAffine(tmp, dst, shear_matrix, new_size, cv::INTER_LINEAR);
cv::flip(dst, dst, 1);
} else if(sy < 0.0f) {
cv::flip(img, tmp, 0);
cv::warpAffine(tmp, dst, shear_matrix, new_size, cv::INTER_LINEAR);
cv::flip(dst, dst, 0);
} else {
tmp = src.clone();
cv::warpAffine(tmp, dst, shear_matrix, new_size, cv::INTER_LINEAR);
}
return dst;
}
cv::Mat mickaShear(const cv::Mat & input, float Bx, float By)
{
if (Bx*By == 1)
{
std::cerr << "error == 1" << std::endl;
}
if (input.type() != CV_8UC3) return {};
std::vector<cv::Point2f> extremePoints;
extremePoints.emplace_back(0, 0);
extremePoints.emplace_back(input.cols, 0);
extremePoints.emplace_back(input.cols, input.rows);
extremePoints.emplace_back(0, input.rows);
for (auto & pt : extremePoints)
{
pt = cv::Point2f(pt.x + pt.y*Bx, pt.y + pt.x*By);
}
cv::Rect offsets = cv::boundingRect(extremePoints);
cv::Point2f offset = -offsets.tl();
cv::Size resultSize = offsets.size();
cv::Mat shearedImage = cv::Mat::zeros(resultSize, input.type());
for (int j = 0; j < shearedImage.rows; ++j){
for (int i = 0; i < shearedImage.cols; ++i){
cv::Point2f pp((float)i, (float)j);
pp = pp - offset; // go back to original coordinate system
cv::Point2f p;
p.x = int((-pp.y*Bx + pp.x) / (1 - By*Bx));
p.y = int(pp.y - p.x*By);
if ((p.x >= 0 && p.x < (float)input.cols) && (p.y >= 0 && p.y < (float)input.rows)){
shearedImage.at<cv::Vec3b>(j, i) = input.at<cv::Vec3b>(p);
}
}
}
return shearedImage;
}
int main(int argc, char *argv[]){
float x = -0.2f; //CHANGE SIGN TO TEST
float y = 0.5f; //CHANGE SIGN TO TEST
cv::Mat im = cv::imread("MODIFY BY JPG FILE PATH");
cv::Mat result = im.clone();
cv::Mat output = cv::Mat();
for(auto & aug: augments){
output = mickaShear(im, x, y);
result = myShear(im);
}
cv::imshow("result", result);
cv::imshow("output", output);
auto k = cv::waitKey(0);
if(k == (int)'q'){
cv::destroyAllWindows();
break;
}
cv::destroyAllWindows();
return 0;
}
I have a 3D depth camera placed above three moving belt lanes and I'm trying to rotate the depth image (or the point cloud) so that the three lanes match the camera's angle. I'm not experienced at all with point clouds, but after some research I've tried to do the following:
Acquire an XYZ cartesian image from the sensor which I turn into a point cloud vector.
Define three points on the point cloud, one on each of the three lanes.
Fit a plane through them by finding the plane coefficients.
Finding the cross product between the plane and the z_normal, and then finding the angle of
rotation.
Use the Eigen library to transform the PCL cloud and turn it back into an openCV Mat.
For whatever reason, I always end up with a bad image with max-int values on one side and zeros on the other. I'm not certain anymore if there's something wrong with the code or if the method above is incorrect to start with.
My code so far:
// helper functions
pcl::PointCloud<pcl::PointXYZ>::Ptr MatToPcl(cv::Mat xyzMat);
cv::Mat PclToMat(pcl::PointCloud<pcl::PointXYZ>::Ptr point_cloud_ptr);
void colorize(cv::Mat& src, cv::Mat& dst);
void clip(cv::Mat& m, const uint16_t lowerBound, const uint16_t upperBound);
while(1)
{
// camera framegrabber object to capture an image
fg->SWTrigger();
if (!fg->WaitForFrame(im.get(), 2000))
{
throw;
}
// openCV Mat declerations
cv::Mat zDepth, zDepthColor;
cv::Mat xyz = im->XYZImage();
vector<cv::Mat> channels(3);
cv::split(xyz, channels);
zDepth = channels[0];
cv::imwrite("xyzMat.png", xyz);
cv::imwrite("depthImage.png", zDepth);
clip(zDepth, 1250, 1400);
colorise(zDepth, zDepthColor);
cv::imwrite("depthColored.png", zDepthColor);
// specify a 3D point on each lane
cv::Point3i p1, p2, p3;
p1.x = w / 4;
p1.y = 24;
p1.z = zDepth.at<uint16_t>(p1.x, p1.y);
p2.x = w / 2;
p2.y = 70;
p2.z = zDepth.at<uint16_t>(p2.x, p2.y);
p3.x = int(w * 0.75);
p3.y = 114;
p3.z = zDepth.at<uint16_t>(p3.x, p3.y);
auto cross = (p2 - p1).cross(p3 - p1);
// transform Mats to point clouds
pcl::PointCloud<pcl::PointXYZ>::Ptr floor_plane, xyzCentered;
floor_plane = MatToPcl(zDepth);
Eigen::Matrix<float, 1, 3> floor_plane_normal_vector, xy_plane_normal_vector, rotation_vector;
floor_plane_normal_vector[0] = cross.x;
floor_plane_normal_vector[1] = cross.y;
floor_plane_normal_vector[2] = cross.z;
// specify the z normal from the xy-plane
xy_plane_normal_vector[0] = 0.0;
xy_plane_normal_vector[1] = 0.0;
xy_plane_normal_vector[2] = 1.0;
// cross product and normalize vector
rotation_vector = xy_plane_normal_vector.cross(floor_plane_normal_vector);
rotation_vector.normalized();
// angle of rotation
float theta = -atan2(rotation_vector.norm(), xy_plane_normal_vector.dot(floor_plane_normal_vector));
// transform plane according to angle
Eigen::Affine3f transform_2 = Eigen::Affine3f::Identity();
transform_2.translation() << 0, 0, 30;
transform_2.rotate(Eigen::AngleAxisf(theta, rotation_vector));
pcl::transformPointCloud(*floor_plane, *xyzCentered, transform_2);
// Pointcloud to Mat again
cv::Mat xyzRot = PclToMat(xyzCentered);
// clipLow and clipHigh values obtained from trackbars
clip(xyzRot, clipLow, clipHigh);
cv::Mat xyzRotColor;
colorize(xyzRot, xyzRotColor)
cv::imshow("result", xyzRotColor);
cv::waitKey(1);
}
pcl::PointCloud<pcl::PointXYZ>::Ptr MatToPcl(cv::Mat xyzMat)
{
/*
* Function: Get from a Mat to pcl pointcloud datatype
* In: cv::Mat
* Out: pcl::PointCloud
*/
//char pr=100, pg=100, pb=100;
pcl::PointCloud<pcl::PointXYZ>::Ptr point_cloud_ptr(new pcl::PointCloud<pcl::PointXYZ>);;
vector<cv::Mat> channels(3);
cv::split(xyzMat, channels);
for (int i = 0; i < ifmXYZ.rows; i++)
{
for (int j = 0; j < ifmXYZ.cols; j++)
{
pcl::PointXYZ point;
point.x = channels[0].at<short>(i,j);
point.y = channels[1].at<short>(i, j);
point.z = channels[2].at<short>(i, j);
// when color needs to be added:
//uint32_t rgb = (static_cast<uint32_t>(pr) << 16 | static_cast<uint32_t>(pg) << 8 | static_cast<uint32_t>(pb));
//point.rgb = *reinterpret_cast<float*>(&rgb);
point_cloud_ptr->points.push_back(point);
}
}
point_cloud_ptr->width = (int)point_cloud_ptr->points.size();
/*point_cloud_ptr->height = 1;*/
return point_cloud_ptr;
}
// convert PCL to cv::Mat, taking only the depth values at z.
cv::Mat PclToMat(pcl::PointCloud<pcl::PointXYZ>::Ptr point_cloud_ptr)
{
cv::Mat depth_image;
if (!depth_image.empty())
depth_image.release();
depth_image.create(132, 176, CV_32F);
int count = 0;
for (int i = 0; i < 132; i++)
{
for (int j = 0; j < 176; j++)
{
depth_image.at<float>(i, j) = point_cloud_ptr->points.at(count++).z;
}
}
depth_image.convertTo(depth_image, CV_16UC1);
return depth_image;
}
/*
* For display purposes with cv::imshow, will convert a 16bit depth image to 8bit 3 channel colored image
* thanks to fmw42 for the function at https://stackoverflow.com/a/67678634/13184944
*/
void colorize(cv::Mat& src, cv::Mat& dst)
{
// stretch the image by rescaling intensity within the output 8-bit range
double oldMin;
double oldMax;
cv::Point minLoc;
cv::Point maxLoc;
cv::minMaxLoc(src, &oldMin, &oldMax, &minLoc, &maxLoc);
double oldRange = oldMax - oldMin;
double newMin = 0.0;
double newMax = 255.0;
double newRange = newMax - newMin;
//cout << oldMin << ' ' << oldMax << ' ' << oldRange << '\n';
// clip the values of the image to the required range
clip(src, oldMin, oldMax);
//TODO: Look at difference between OpenCV normalization and skimage
normalize(src, dst, 0, 255, NORM_MINMAX, CV_8UC1);
//img = (img - cv::Scalar(oldMin)) / (cv::Scalar(oldRange));
//img = (img * cv::Scalar(newRange)) + cv::Scalar(newMin);
cv::Mat channels[3] = { dst, dst, dst };
cv::merge(channels, 3, dst);
cv::Mat C(1, 6, CV_8UC(3));
cv::Vec3b color1 = { 0, 0, 255 };
cv::Vec3b color2 = { 0, 165, 255 };
cv::Vec3b color3 = { 0, 255, 255 };
cv::Vec3b color4 = { 255, 255, 0 };
cv::Vec3b color5 = { 255, 0, 0 };
cv::Vec3b color6 = { 128, 64, 64 };
C.at<cv::Vec3b>(0, 0) = color1;
C.at<cv::Vec3b>(0, 1) = color2;
C.at<cv::Vec3b>(0, 2) = color3;
C.at<cv::Vec3b>(0, 3) = color4;
C.at<cv::Vec3b>(0, 4) = color5;
C.at<cv::Vec3b>(0, 5) = color6;
cv::Mat lut;
cv::resize(C, lut, cv::Size(256, 1), 0.0, 0.0, cv::INTER_LINEAR);
//cout << lut.size << '\n';
cv::LUT(dst, lut, dst);
return;
}
void clip(cv::Mat& m, const uint16_t lowerBound, const uint16_t upperBound)
{
m.setTo(lowerBound, m < lowerBound);
m.setTo(upperBound, m > upperBound);
return;
}
Apologies if this is really basic or something is obviously wrong but I feel stuck here. I also tried segmentation with ransac but the it never aligns the plane in the way I wanted.
Thanks!
Edit: Updated the code to include additional steps and functions. Only the camera initialization is skipped.
The clip and colorize functions aid in displaying the 16bit depth image. My end goal here is to be able to use trackbars with clip(zImg, low, high) where the three lanes will always be vertically aligns (as in, change color at the same rate) as I change the clip values.
download link with image files: link
Colorized depth image:
I want to detect circles in an image with opencv in c++. I have tried various thresholds for the dp, parm1, and parm2 but I could not find the correct ones.
The radius of the circles is around 40 pixels.
I have added a link to sample images. Which values do I need to use in the HoughCircles() for correct results?
Link to sample images.
You can use following parameter values. These settings are able to detect the circles in your images.
cv::Mat img = cv::imread("75.bmp");
cv::Mat img_gray;
cv::cvtColor(img, img_gray, cv::COLOR_BGR2GRAY);
img_gray.convertTo(img_gray, CV_8UC1);
std::vector<cv::Vec3f> circles;
double minDist = 20;
double dp = 1;
double param1 = 200;
double param2 = 10;
int minRadius = 15;
int maxRadius = 25;
cv::HoughCircles(img_gray, circles, cv::HOUGH_GRADIENT, dp, minDist, param1, param2, minRadius, maxRadius);
if (circles.size() > 0) {
for (size_t current_circle = 0; current_circle < circles.size(); ++current_circle) {
cv::Point center(std::round(circles[current_circle][0]), std::round(circles[current_circle][1]));
int radius = std::round(circles[current_circle][2]);
cv::circle(img, center, radius, cv::Scalar(0, 255, 0), 1);
}
}
So I combined squares.cpp with cvBoundingRect.cpp code to detect squares in video. I therefore, had to convert from IplImage to Mat type so that findSquares and drawSquares methods could run (By using cvarrToMat function). But unfortunately, after successful compilation I get this error when running:
OpenCV Error: Assertion failed (j < nsrcs && src[j].depth() == depth) in mixChannels, file /Users/Desktop/opencv-3.0.0-rc1/modules/core/src/convert.cpp, line 1205
libc++abi.dylib: terminating with uncaught exception of type cv::Exception: /Users/Desktop/opencv-3.0.0-rc1/modules/core/src/convert.cpp:1205: error: (-215) j < nsrcs && src[j].depth() == depth in function mixChannels
Abort trap: 6
Here's the code:
#include "opencv2/core/core.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/highgui/highgui.hpp"
#include <iostream>
#include <math.h>
#include <string.h>
using namespace cv;
using namespace std;
int thresh = 50, N = 11;
const char* wndname = "Square Detection Demo";
// finds a cosine of angle between vectors
// from pt0->pt1 and from pt0->pt2
static double angle( Point pt1, Point pt2, Point pt0 )
{
double dx1 = pt1.x - pt0.x;
double dy1 = pt1.y - pt0.y;
double dx2 = pt2.x - pt0.x;
double dy2 = pt2.y - pt0.y;
return (dx1*dx2 + dy1*dy2)/sqrt((dx1*dx1 + dy1*dy1)*(dx2*dx2 + dy2*dy2) + 1e-10);
}
// returns sequence of squares detected on the image.
// the sequence is stored in the specified memory storage
static void findSquares( const Mat& image, vector<vector<Point> >& squares )
{
squares.clear();
Mat pyr, timg, gray0(image.size(), CV_8U), gray;
// down-scale and upscale the image to filter out the noise
pyrDown(image, pyr, Size(image.cols/2, image.rows/2));
pyrUp(pyr, timg, image.size());
vector<vector<Point> > contours;
// find squares in every color plane of the image
for( int c = 0; c < 3; c++ )
{
int ch[] = {c, 0};
mixChannels(&timg, 1, &gray0, 1, ch, 1);
// try several threshold levels
for( int l = 0; l < N; l++ )
{
// hack: use Canny instead of zero threshold level.
// Canny helps to catch squares with gradient shading
if( l == 0 )
{
// apply Canny. Take the upper threshold from slider
// and set the lower to 0 (which forces edges merging)
Canny(gray0, gray, 0, thresh, 5);
// dilate canny output to remove potential
// holes between edge segments
dilate(gray, gray, Mat(), Point(-1,-1));
}
else
{
// apply threshold if l!=0:
// tgray(x,y) = gray(x,y) < (l+1)*255/N ? 255 : 0
gray = gray0 >= (l+1)*255/N;
}
// find contours and store them all as a list
findContours(gray, contours, RETR_LIST, CHAIN_APPROX_SIMPLE);
vector<Point> approx;
// test each contour
for( size_t i = 0; i < contours.size(); i++ )
{
// approximate contour with accuracy proportional
// to the contour perimeter
approxPolyDP(Mat(contours[i]), approx, arcLength(Mat(contours[i]), true)*0.02, true);
// square contours should have 4 vertices after approximation
// relatively large area (to filter out noisy contours)
// and be convex.
// Note: absolute value of an area is used because
// area may be positive or negative - in accordance with the
// contour orientation
if( approx.size() == 4 &&
fabs(contourArea(Mat(approx))) > 1000 &&
isContourConvex(Mat(approx)) )
{
double maxCosine = 0;
for( int j = 2; j < 5; j++ )
{
// find the maximum cosine of the angle between joint edges
double cosine = fabs(angle(approx[j%4], approx[j-2], approx[j-1]));
maxCosine = MAX(maxCosine, cosine);
}
// if cosines of all angles are small
// (all angles are ~90 degree) then write quandrange
// vertices to resultant sequence
if( maxCosine < 0.3 )
squares.push_back(approx);
}
}
}
}
}
// the function draws all the squares in the image
static void drawSquares( Mat& image, const vector<vector<Point> >& squares )
{
for( size_t i = 0; i < squares.size(); i++ )
{
const Point* p = &squares[i][0];
int n = (int)squares[i].size();
polylines(image, &p, &n, 1, true, Scalar(255,0,0), 3, LINE_AA);
}
imshow(wndname, image);
}
CvRect rect;
CvSeq* contours = 0;
CvMemStorage* storage = NULL;
CvCapture *cam;
IplImage *currentFrame, *currentFrame_grey, *differenceImg, *oldFrame_grey;
bool first = true;
int main(int argc, char* argv[])
{
//Create a new movie capture object.
cam = cvCaptureFromCAM(0);
//create storage for contours
storage = cvCreateMemStorage(0);
//capture current frame from webcam
currentFrame = cvQueryFrame(cam);
//Size of the image.
CvSize imgSize;
imgSize.width = currentFrame->width;
imgSize.height = currentFrame->height;
//Images to use in the program.
currentFrame_grey = cvCreateImage( imgSize, IPL_DEPTH_8U, 1);
namedWindow( wndname, 1 );
vector<vector<Point> > squares;
while(1)
{
currentFrame = cvQueryFrame( cam );
if( !currentFrame ) break;
//Convert the image to grayscale.
cvCvtColor(currentFrame,currentFrame_grey,CV_RGB2GRAY);
if(first) //Capturing Background for the first time
{
differenceImg = cvCloneImage(currentFrame_grey);
oldFrame_grey = cvCloneImage(currentFrame_grey);
cvConvertScale(currentFrame_grey, oldFrame_grey, 1.0, 0.0);
first = false;
continue;
}
//Minus the current frame from the moving average.
cvAbsDiff(oldFrame_grey,currentFrame_grey,differenceImg);
//bluring the differnece image
cvSmooth(differenceImg, differenceImg, CV_BLUR);
//apply threshold to discard small unwanted movements
cvThreshold(differenceImg, differenceImg, 25, 255, CV_THRESH_BINARY);
//find contours
cv::Mat diffImg = cv::cvarrToMat(differenceImg);
cv::Mat currFrame = cv::cvarrToMat(currentFrame);
findSquares(diffImg, squares);
//draw bounding box around each contour
drawSquares(currFrame, squares);
//display colour image with bounding box
cvShowImage("Output Image", currentFrame);
//display threshold image
cvShowImage("Difference image", differenceImg);
//New Background
cvConvertScale(currentFrame_grey, oldFrame_grey, 1.0, 0.0);
//clear memory and contours
cvClearMemStorage( storage );
contours = 0;
//press Esc to exit
char c = cvWaitKey(33);
if( c == 27 ) break;
}
// Destroy the image & movies objects
cvReleaseImage(&oldFrame_grey);
cvReleaseImage(&differenceImg);
cvReleaseImage(¤tFrame);
cvReleaseImage(¤tFrame_grey);
return 0;
}
As the error message says, your problem is in cv::mixChannels(). See documentation.
Or you could simply do something like
cv::Mat channels[3];
cv::split(multiChannelImage, channels);
and then access each channel using
cv::Mat currChannel = channels[channelNumber]
A while ago I asked a question about square detection and karlphillip came up with a decent result.
Now I want to take this a step further and find squares which edge aren't fully visible. Take a look at this example:
Any ideas? I'm working with karlphillips code:
void find_squares(Mat& image, vector<vector<Point> >& squares)
{
// blur will enhance edge detection
Mat blurred(image);
medianBlur(image, blurred, 9);
Mat gray0(blurred.size(), CV_8U), gray;
vector<vector<Point> > contours;
// find squares in every color plane of the image
for (int c = 0; c < 3; c++)
{
int ch[] = {c, 0};
mixChannels(&blurred, 1, &gray0, 1, ch, 1);
// try several threshold levels
const int threshold_level = 2;
for (int l = 0; l < threshold_level; l++)
{
// Use Canny instead of zero threshold level!
// Canny helps to catch squares with gradient shading
if (l == 0)
{
Canny(gray0, gray, 10, 20, 3); //
// Dilate helps to remove potential holes between edge segments
dilate(gray, gray, Mat(), Point(-1,-1));
}
else
{
gray = gray0 >= (l+1) * 255 / threshold_level;
}
// Find contours and store them in a list
findContours(gray, contours, CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE);
// Test contours
vector<Point> approx;
for (size_t i = 0; i < contours.size(); i++)
{
// approximate contour with accuracy proportional
// to the contour perimeter
approxPolyDP(Mat(contours[i]), approx, arcLength(Mat(contours[i]), true)*0.02, true);
// Note: absolute value of an area is used because
// area may be positive or negative - in accordance with the
// contour orientation
if (approx.size() == 4 &&
fabs(contourArea(Mat(approx))) > 1000 &&
isContourConvex(Mat(approx)))
{
double maxCosine = 0;
for (int j = 2; j < 5; j++)
{
double cosine = fabs(angle(approx[j%4], approx[j-2], approx[j-1]));
maxCosine = MAX(maxCosine, cosine);
}
if (maxCosine < 0.3)
squares.push_back(approx);
}
}
}
}
}
You might try using HoughLines to detect the four sides of the square. Next, locate the four resulting line intersections to detect the corners. The Hough transform is fairly robust to noise and occlusions, so it could be useful here. Also, here is an interactive demo showing how the Hough transform works (I thought it was cool at least :). Here is one of my previous answers that detects a laser cross center showing most of the same math (except it just finds a single corner).
You will probably have multiple lines on each side, but locating the intersections should help to determine the inliers vs. outliers. Once you've located candidate corners, you can also filter these candidates by area or how "square-like" the polygon is.
EDIT : All these answers with code and images were making me think my answer was a bit lacking :) So, here is an implementation of how you could do this:
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <iostream>
#include <vector>
using namespace cv;
using namespace std;
Point2f computeIntersect(Vec2f line1, Vec2f line2);
vector<Point2f> lineToPointPair(Vec2f line);
bool acceptLinePair(Vec2f line1, Vec2f line2, float minTheta);
int main(int argc, char* argv[])
{
Mat occludedSquare = imread("Square.jpg");
resize(occludedSquare, occludedSquare, Size(0, 0), 0.25, 0.25);
Mat occludedSquare8u;
cvtColor(occludedSquare, occludedSquare8u, CV_BGR2GRAY);
Mat thresh;
threshold(occludedSquare8u, thresh, 200.0, 255.0, THRESH_BINARY);
GaussianBlur(thresh, thresh, Size(7, 7), 2.0, 2.0);
Mat edges;
Canny(thresh, edges, 66.0, 133.0, 3);
vector<Vec2f> lines;
HoughLines( edges, lines, 1, CV_PI/180, 50, 0, 0 );
cout << "Detected " << lines.size() << " lines." << endl;
// compute the intersection from the lines detected...
vector<Point2f> intersections;
for( size_t i = 0; i < lines.size(); i++ )
{
for(size_t j = 0; j < lines.size(); j++)
{
Vec2f line1 = lines[i];
Vec2f line2 = lines[j];
if(acceptLinePair(line1, line2, CV_PI / 32))
{
Point2f intersection = computeIntersect(line1, line2);
intersections.push_back(intersection);
}
}
}
if(intersections.size() > 0)
{
vector<Point2f>::iterator i;
for(i = intersections.begin(); i != intersections.end(); ++i)
{
cout << "Intersection is " << i->x << ", " << i->y << endl;
circle(occludedSquare, *i, 1, Scalar(0, 255, 0), 3);
}
}
imshow("intersect", occludedSquare);
waitKey();
return 0;
}
bool acceptLinePair(Vec2f line1, Vec2f line2, float minTheta)
{
float theta1 = line1[1], theta2 = line2[1];
if(theta1 < minTheta)
{
theta1 += CV_PI; // dealing with 0 and 180 ambiguities...
}
if(theta2 < minTheta)
{
theta2 += CV_PI; // dealing with 0 and 180 ambiguities...
}
return abs(theta1 - theta2) > minTheta;
}
// the long nasty wikipedia line-intersection equation...bleh...
Point2f computeIntersect(Vec2f line1, Vec2f line2)
{
vector<Point2f> p1 = lineToPointPair(line1);
vector<Point2f> p2 = lineToPointPair(line2);
float denom = (p1[0].x - p1[1].x)*(p2[0].y - p2[1].y) - (p1[0].y - p1[1].y)*(p2[0].x - p2[1].x);
Point2f intersect(((p1[0].x*p1[1].y - p1[0].y*p1[1].x)*(p2[0].x - p2[1].x) -
(p1[0].x - p1[1].x)*(p2[0].x*p2[1].y - p2[0].y*p2[1].x)) / denom,
((p1[0].x*p1[1].y - p1[0].y*p1[1].x)*(p2[0].y - p2[1].y) -
(p1[0].y - p1[1].y)*(p2[0].x*p2[1].y - p2[0].y*p2[1].x)) / denom);
return intersect;
}
vector<Point2f> lineToPointPair(Vec2f line)
{
vector<Point2f> points;
float r = line[0], t = line[1];
double cos_t = cos(t), sin_t = sin(t);
double x0 = r*cos_t, y0 = r*sin_t;
double alpha = 1000;
points.push_back(Point2f(x0 + alpha*(-sin_t), y0 + alpha*cos_t));
points.push_back(Point2f(x0 - alpha*(-sin_t), y0 - alpha*cos_t));
return points;
}
NOTE : The main reason I resized the image was so I could see it on my screen, and speed-up processing.
Canny
This uses Canny edge detection to help greatly reduce the number of lines detected after thresholding.
Hough transform
Then the Hough transform is used to detect the sides of the square.
Intersections
Finally, we compute the intersections of all the line pairs.
Hope that helps!
I tried to use convex hull method which is pretty simple.
Here you find convex hull of the contour detected. It removes the convexity defects at the bottom of paper.
Below is the code (in OpenCV-Python):
import cv2
import numpy as np
img = cv2.imread('sof.jpg')
img = cv2.resize(img,(500,500))
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(gray,127,255,0)
contours,hier = cv2.findContours(thresh,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
for cnt in contours:
if cv2.contourArea(cnt)>5000: # remove small areas like noise etc
hull = cv2.convexHull(cnt) # find the convex hull of contour
hull = cv2.approxPolyDP(hull,0.1*cv2.arcLength(hull,True),True)
if len(hull)==4:
cv2.drawContours(img,[hull],0,(0,255,0),2)
cv2.imshow('img',img)
cv2.waitKey(0)
cv2.destroyAllWindows()
(Here, i haven't found square in all planes. Do it yourself if you want.)
Below is the result i got:
I hope this is what you needed.
1st: start experimenting with threshold techniques to isolate the white paper sheet from the rest of the image. This is a simple way:
Mat new_img = imread(argv[1]);
double thres = 200;
double color = 255;
threshold(new_img, new_img, thres, color, CV_THRESH_BINARY);
imwrite("thres.png", new_img);
but there are other alternatives that could provide better result. One is to investigate inRange(), and another is to detect through color by converting the image to the HSV color space.
This thread also provides an interest discussion on the subject.
2nd: after you execute one of this procedures, you could try to feed the result directly into find_squares():
An alternative to find_squares() is to implement the bounding box technique, which has the potential to provide a more accurate detection of the rectangular area (provided that you have a perfect result of threshold). I've used it here and here. It's worth noting that OpenCV has it's own bounding box tutorial.
Another approach besides find_squares(), as pointed by Abid on his answer, is to use the convexHull method. Check OpenCV's C++ tutorial on this method for code.
convert to lab space
use kmeans for 2 clusters
detect suqares one internal cluster it will solve many thing in the rgb space