I want to make a musaic image by stitching some images.
I am doing this by sift features.
when I stitch the following two images :
img1
img2
I achieve this result. as you can see the images are not aligned completely and there is a gap between them. Can you tell me what is the problem and how can I solve it?
Here is my function that I used for stitching :
Mat StitchVertical(Mat& imGrayImg1, Mat& imGrayImg2)
{
Mat GrayImg1(imGrayImg1, Rect(0, 0, imGrayImg1.cols, imGrayImg1.rows)); // init roi
Mat GrayImg2(imGrayImg2, Rect(0, 0, imGrayImg2.cols, imGrayImg2.rows));
cvtColor(GrayImg1, GrayImg1, COLOR_BGR2GRAY);
cvtColor(GrayImg2, GrayImg2, COLOR_BGR2GRAY);
vector<cv::KeyPoint> keypointsImg1, keypointsImg2;
Mat descriptorImg1, descriptorImg2;
BFMatcher matcher;
vector<cv::DMatch> matches, good_matches;
cv::Ptr<cv::SIFT> sift = cv::SIFT::create();
int i, dist = 80;
sift->detectAndCompute(GrayImg1, cv::Mat(), keypointsImg1, descriptorImg1); /* get keypoints of ROI image */
sift->detectAndCompute(GrayImg2, cv::Mat(), keypointsImg2, descriptorImg2); /* get keypoints of the image */
matcher.match(descriptorImg1, descriptorImg2, matches); // Matching between descriptors
double max_dist = 0; double min_dist = 5000;
for (int i = 0; i < descriptorImg1.rows; i++)
{
double dist = matches[i].distance;
if (dist < min_dist) min_dist = dist;
if (dist > max_dist) max_dist = dist;
}
for (i = 0; i < descriptorImg1.rows; i++)
{
if (matches[i].distance < 3 * min_dist)
{
good_matches.push_back(matches[i]);
}
}
Mat img_matches;
// Draw match
drawMatches(imGrayImg1, keypointsImg1, imGrayImg2, keypointsImg2,
good_matches, img_matches, Scalar::all(-1),
Scalar::all(-1), vector<char>(),
DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS);
//imshow("matches", img_matches);
vector<Point2f> keypoints1, keypoints2;
for (i = 0; i < good_matches.size(); i++)
{
keypoints1.push_back(keypointsImg2[good_matches[i].trainIdx].pt);
keypoints2.push_back(keypointsImg1[good_matches[i].queryIdx].pt);
}
Mat H, H2;
H = findHomography(keypoints1, keypoints2, RANSAC);
H2 = findHomography(keypoints2, keypoints1, RANSAC);
Mat stitchedImage;
int mRows = imGrayImg2.cols;
if (imGrayImg1.cols > imGrayImg2.cols)
{
mRows = imGrayImg1.cols;
}
int count = 0;
for (int i = 0; i < keypoints2.size(); i++)
{
if (keypoints2[i].y >= imGrayImg2.rows / 3) { count++; }
}
int minx,miny;
if (count / float(keypoints2.size()) >= 0.5) // To be spliced imGrayImg2 The image is on the right
{
cout << "imGrayImg1 should left" << endl;
vector<Point2f>corners(4);
vector<Point2f>corners2(4);
corners[0] = Point(0, 0);
corners[1] = Point(0, imGrayImg2.rows);
corners[2] = Point(imGrayImg2.cols, imGrayImg2.rows);
corners[3] = Point(imGrayImg2.cols, 0);
stitchedImage = Mat::zeros( mRows, imGrayImg2.rows + imGrayImg1.rows, CV_8UC3);
warpPerspective(imGrayImg2, stitchedImage, H, Size(mRows, imGrayImg1.cols + imGrayImg2.cols));
perspectiveTransform(corners, corners2, H);
Mat half(stitchedImage, Rect(0, 0, imGrayImg1.cols, imGrayImg1.rows));
imGrayImg1.copyTo(half);
minx = stitchedImage.size().width;
miny = stitchedImage.size().height;
if ((int(corners2[2].x) - 10) < stitchedImage.size().width) {
minx = (int(corners2[2].x) - 10);
}
if ((int(corners2[2].y) - 10) < stitchedImage.size().height) {
miny = (int(corners2[2].y) - 10);
}
Rect crop_region(0, 0, minx, miny);
Mat cropped_image = stitchedImage(crop_region);
return cropped_image;
}
else
{
cout << "imGrayImg2 should be up" << endl;
stitchedImage = Mat::zeros(mRows, imGrayImg2.rows + imGrayImg1.rows, CV_8UC3);
warpPerspective(imGrayImg1, stitchedImage, H2, Size(mRows, imGrayImg1.cols + imGrayImg2.cols));
//imshow("temp", stitchedImage);
vector<Point2f>corners(4);
vector<Point2f>corners2(4);
corners[0] = Point(0, 0);
corners[1] = Point(0, imGrayImg1.rows);
corners[2] = Point(imGrayImg1.cols, imGrayImg1.rows);
corners[3] = Point(imGrayImg1.cols, 0);
cout << "second if in up and down" << endl;
perspectiveTransform(corners, corners2, H2); // Affine transformations correspond to endpoints
Mat half(stitchedImage, Rect(0, 0, imGrayImg2.cols, imGrayImg2.rows));
imGrayImg2.copyTo(half);
minx = stitchedImage.size().width;
miny = stitchedImage.size().height;
if ((int(corners2[2].x) - 10) < stitchedImage.size().width) {
minx = (int(corners2[2].x) - 10);
}
if ((int(corners2[2].y) - 10) < stitchedImage.size().height) {
miny = (int(corners2[2].y) - 10);
}
Rect crop_region(0, 0, minx, miny);
Mat cropped_image = stitchedImage(crop_region); return cropped_image;
}
imwrite("result.bmp", stitchedImage);
return stitchedImage;
}
Related
I'm trying to detect the following book, using findcontours but it cannot be detected at all and I get exception because there is no convex hull.
I tried to blur, dilate, canny detection, with no success at all.
I hope to get a solution for finding a rectangular paper/book using openCV.
Please let me know if you have further questions or need resources.
#include "opencv2/imgproc.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/highgui.hpp"
#include <iostream>
using namespace cv;
using namespace std;
double angle(cv::Point pt1, cv::Point pt2, cv::Point pt0) {
double dx1 = pt1.x - pt0.x;
double dy1 = pt1.y - pt0.y;
double dx2 = pt2.x - pt0.x;
double dy2 = pt2.y - pt0.y;
return (dx1*dx2 + dy1*dy2) / sqrt((dx1*dx1 + dy1*dy1)*(dx2*dx2 + dy2*dy2) + 1e-10);
}
void find_squares(Mat& image, vector<vector<Point> >& squares)
{
// blur will enhance edge detection
Mat blurred(image);
Mat dst;
medianBlur(image, dst, 9);
Mat gray0(dst.size(), CV_8U), gray;
vector<vector<Point> > contours;
// find squares in every color plane of the image
for (int c = 0; c < 3; c++)
{
int ch[] = { c, 0 };
mixChannels(&dst, 1, &gray0, 1, ch, 1);
// try several threshold levels
const int threshold_level = 2;
for (int l = 0; l < threshold_level; l++)
{
// Use Canny instead of zero threshold level!
// Canny helps to catch squares with gradient shading
if (l == 0)
{
Canny(gray0, gray, 10, 20, 3); //
// Dilate helps to remove potential holes between edge segments
dilate(gray, gray, Mat(), Point(-1, -1));
}
else
{
gray = gray0 >= (l + 1) * 255 / threshold_level;
}
// Find contours and store them in a list
findContours(gray, contours, CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE);
// Test contours
vector<Point> approx;
for (size_t i = 0; i < contours.size(); i++)
{
// approximate contour with accuracy proportional
// to the contour perimeter
approxPolyDP(Mat(contours[i]), approx, arcLength(Mat(contours[i]), true)*0.02, true);
// Note: absolute value of an area is used because
// area may be positive or negative - in accordance with the
// contour orientation
if (approx.size() == 4 &&
fabs(contourArea(Mat(approx))) > 1000 &&
isContourConvex(Mat(approx)))
{
double maxCosine = 0;
for (int j = 2; j < 5; j++)
{
double cosine = fabs(angle(approx[j % 4], approx[j - 2], approx[j - 1]));
maxCosine = MAX(maxCosine, cosine);
}
if (maxCosine < 0.3)
squares.push_back(approx);
}
}
}
}
}
cv::Mat debugSquares(std::vector<std::vector<cv::Point> > squares, cv::Mat image)
{
for (int i = 0; i< squares.size(); i++) {
// draw contour
cv::drawContours(image, squares, i, cv::Scalar(255, 0, 0), 1, 8, std::vector<cv::Vec4i>(), 0, cv::Point());
// draw bounding rect
cv::Rect rect = boundingRect(cv::Mat(squares[i]));
cv::rectangle(image, rect.tl(), rect.br(), cv::Scalar(0, 255, 0), 2, 8, 0);
// draw rotated rect
cv::RotatedRect minRect = minAreaRect(cv::Mat(squares[i]));
cv::Point2f rect_points[4];
minRect.points(rect_points);
for (int j = 0; j < 4; j++) {
cv::line(image, rect_points[j], rect_points[(j + 1) % 4], cv::Scalar(0, 0, 255), 1, 8); // blue
}
}
return image;
}
static std::vector<cv::Point> extremePoints(std::vector<cv::Point>pts)
{
int xmin = 0, ymin = 0, xmax = -1, ymax = -1, i;
Point ptxmin, ptymin, ptxmax, ptymax;
Point pt = pts[0];
ptxmin = ptymin = ptxmax = ptymax = pt;
xmin = xmax = pt.x;
ymin = ymax = pt.y;
for (size_t i = 1; i < pts.size(); i++)
{
pt = pts[i];
if (xmin > pt.x)
{
xmin = pt.x;
ptxmin = pt;
}
if (xmax < pt.x)
{
xmax = pt.x;
ptxmax = pt;
}
if (ymin > pt.y)
{
ymin = pt.y;
ptymin = pt;
}
if (ymax < pt.y)
{
ymax = pt.y;
ptymax = pt;
}
}
std::vector<cv::Point> res;
res.push_back(ptxmin);
res.push_back(ptxmax);
res.push_back(ptymin);
res.push_back(ptymax);
return res;
}
void sortCorners(std::vector<cv::Point2f>& corners)
{
std::vector<cv::Point2f> top, bot;
cv::Point2f center;
// Get mass center
for (int i = 0; i < corners.size(); i++)
center += corners[i];
center *= (1. / corners.size());
for (int i = 0; i < corners.size(); i++)
{
if (corners[i].y < center.y)
top.push_back(corners[i]);
else
bot.push_back(corners[i]);
}
corners.clear();
if (top.size() == 2 && bot.size() == 2) {
cv::Point2f tl = top[0].x > top[1].x ? top[1] : top[0];
cv::Point2f tr = top[0].x > top[1].x ? top[0] : top[1];
cv::Point2f bl = bot[0].x > bot[1].x ? bot[1] : bot[0];
cv::Point2f br = bot[0].x > bot[1].x ? bot[0] : bot[1];
corners.push_back(tl);
corners.push_back(tr);
corners.push_back(br);
corners.push_back(bl);
}
}
int main(int, char**)
{
int largest_area = 0;
int largest_contour_index = 0;
cv::Rect bounding_rect;
Mat src, edges;
src = imread("20628991_10159154614610574_1244594322_o.jpg");
cvtColor(src, edges, COLOR_BGR2GRAY);
GaussianBlur(edges, edges, Size(5, 5), 1.5, 1.5);
erode(edges, edges, Mat());// these lines may need to be optimized
dilate(edges, edges, Mat());
dilate(edges, edges, Mat());
erode(edges, edges, Mat());
Canny(edges, edges, 150, 150, 3); // canny parameters may need to be optimized
imshow("edges", edges);
vector<Point> selected;
vector<vector<Point> > contours;
findContours(edges, contours, RETR_LIST, CHAIN_APPROX_SIMPLE);
for (size_t i = 0; i < contours.size(); i++)
{
Rect minRect = boundingRect(contours[i]);
if (minRect.width > 150 & minRect.height > 150) // this line also need to be optimized
{
selected.insert(selected.end(), contours[i].begin(), contours[i].end());
}
}
convexHull(selected, selected);
RotatedRect minRect = minAreaRect(selected);
std::vector<cv::Point> corner_points = extremePoints(selected);
std::vector<cv::Point2f> corners;
corners.push_back(corner_points[0]);
corners.push_back(corner_points[1]);
corners.push_back(corner_points[2]);
corners.push_back(corner_points[3]);
sortCorners(corners);
cv::Mat quad = cv::Mat::zeros(norm(corners[1] - corners[2]), norm(corners[2] - corners[3]), CV_8UC3);
std::vector<cv::Point2f> quad_pts;
quad_pts.push_back(cv::Point2f(0, 0));
quad_pts.push_back(cv::Point2f(quad.cols, 0));
quad_pts.push_back(cv::Point2f(quad.cols, quad.rows));
quad_pts.push_back(cv::Point2f(0, quad.rows));
cv::Mat transmtx = cv::getPerspectiveTransform(corners, quad_pts);
cv::warpPerspective(src, quad, transmtx, quad.size());
resize(quad, quad, Size(), 0.25, 0.25); // you can remove this line to keep the image original size
imshow("quad", quad);
polylines(src, selected, true, Scalar(0, 0, 255), 2);
resize(src, src, Size(), 0.5, 0.5); // you can remove this line to keep the image original size
imshow("result", src);
waitKey(0);
return 0;
}
Strange, I did it with exactly that (blur, dilate, canny):
The code (in Python, but there's nothing but OpenCV function calls, so should be easy to follow; as one of the references I used this answer, which is in C++, it also shows how to correct the perspective and turn it into a rectangle):
import numpy as np
import cv2
img = cv2.imread('sngo1.jpg')
#resize and create a copy for future drawing
resize_coeff = 0.5
w, h, c = img.shape
img_in = cv2.resize(img, (int(resize_coeff*h), int(resize_coeff*w)))
img_out = img_in.copy()
#median and canny
img_in = cv2.medianBlur(img_in, 5)
img_in = cv2.Canny(img_in, 100, 200)
#morphological close for our edges
kernel = np.ones((17, 17), np.uint8)
img_in = cv2.morphologyEx(img_in, cv2.MORPH_CLOSE, kernel, iterations = 1)
#find contours, get max by area
img_in, contours, hierarchy = cv2.findContours(img_in, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
max_index, max_area = max(enumerate([cv2.contourArea(x) for x in contours]), key = lambda x: x[1])
max_contour = contours[max_index]
#approximage it with a quadrangle
approx = cv2.approxPolyDP(max_contour, 0.1*cv2.arcLength(max_contour, True), True)
approx = approx[:,0,:]
cv2.drawContours(img_out, [approx], 0, (255, 0, 0), 2)
cv2.imwrite("result.png", img_out)
I found this code inside LATCH_match.cpp in xfeature2D folder:
BFMatcher matcher(NORM_HAMMING);
vector< vector<DMatch> > nn_matches;
matcher.knnMatch(desc1, desc2, nn_matches, 2);
vector<KeyPoint> matched1, matched2, inliers1, inliers2;
vector<DMatch> good_matches;
for (unsigned i = 0; i < matched1.size(); i++) {
Mat col = Mat::ones(3, 1, CV_64F);
col.at<double>(0) = matched1[i].pt.x;
col.at<double>(1) = matched1[i].pt.y;
col = homography * col;
col /= col.at<double>(2);
double dist = sqrt(pow(col.at<double>(0) - matched2[i].pt.x, 2) +
pow(col.at<double>(1) - matched2[i].pt.y, 2));
if (dist < inlier_threshold) {
int new_i = static_cast<int>(inliers1.size());
inliers1.push_back(matched1[i]);
inliers2.push_back(matched2[i]);
good_matches.push_back(DMatch(new_i, new_i, 0));
}
}
Mat res;
drawMatches(img1, inliers1, img2, inliers2, good_matches, res);
imwrite("latch_res.png", res);
I see that this a kind of ratio test, but I don't get what's the point of inliers and homography
I extracted four features of regions in an image using opencv249 and Visual Studio 2013. Here is the code:
vector<double> calculateFeatures(Mat src, Mat mask, Rect Rect){
vector<double> sonuc;
double Feature1, Feature2, Feature3, Feature4;
Mat bolum(src, Rect);
Scalar mean_number = mean(src, mask);
double num = mean_number.val[0];
double minVal;
double maxVal = 0;
Point minLoc;
Point maxLoc = 0;
minMaxLoc(bolum, &minVal, &maxVal, &minLoc, &maxLoc);
double fark = num - minVal;
Feature1 = fark;
double thresh = num*0.95;
int sayi = countNonZero(bolum < thresh);
int alan = countNonZero(mask);
double pixelSayisi = sayi / alan;
Feature2 = pixelSayisi;
Mat dst, smoothed;
GaussianBlur(bolum, smoothed, Size(25, 25), 4, 4);
Laplacian(smoothed, dst, CV_8UC1, 25, 1, 0, BORDER_DEFAULT);
cv::Scalar s = cv::sum(dst);
double toplam = s.val[0];
Feature3 = toplam;
cout << "s: " << s << endl;
Mat dst2;
cornerHarris(bolum, dst2, 2, 25, 0.04, BORDER_DEFAULT);
Scalar top = sum(dst2);
double top2 = top.val[0];
Feature4 = top2 / alan;
cout << "Feature4: " << Feature4 << endl;
sonuc.push_back(Feature1);
sonuc.push_back(Feature2);
sonuc.push_back(Feature3);
sonuc.push_back(Feature4);
return sonuc;
}
I would like to see representative feature images of that regions in order to evaluate features whether they are useful or not. How can I implement?
Here I computed the features on patches 32x32 on this image:
You can create an image for each dimension of your feature, and the value of the pixels is given by the feature value at that location.
If you have at most 4 features, like in this case, you can combine all images into a 4 channel image:
Full code for reference:
#include <opencv2/opencv.hpp>
#include <vector>
using namespace cv;
using namespace std;
vector<double> calculateFeatures(Mat src, Mat mask, Rect Rect){
vector<double> sonuc;
double Feature1, Feature2, Feature3, Feature4;
Mat bolum(src, Rect);
Scalar mean_number = mean(src, mask);
double num = mean_number.val[0];
double minVal;
double maxVal = 0;
Point minLoc;
Point maxLoc = 0;
minMaxLoc(bolum, &minVal, &maxVal, &minLoc, &maxLoc);
double fark = num - minVal;
Feature1 = fark;
double thresh = num*0.95;
int sayi = countNonZero(bolum < thresh);
int alan = countNonZero(mask);
double pixelSayisi = sayi / alan;
Feature2 = pixelSayisi;
Mat dst, smoothed;
GaussianBlur(bolum, smoothed, Size(25, 25), 4, 4);
Laplacian(smoothed, dst, CV_8UC1, 25, 1, 0, BORDER_DEFAULT);
cv::Scalar s = cv::sum(dst);
double toplam = s.val[0];
Feature3 = toplam;
cout << "s: " << s << endl;
Mat dst2;
cornerHarris(bolum, dst2, 2, 25, 0.04, BORDER_DEFAULT);
Scalar top = sum(dst2);
double top2 = top.val[0];
Feature4 = top2 / alan;
cout << "Feature4: " << Feature4 << endl;
sonuc.push_back(Feature1);
sonuc.push_back(Feature2);
sonuc.push_back(Feature3);
sonuc.push_back(Feature4);
return sonuc;
}
int main()
{
Mat1b img = imread("path_to_image", IMREAD_GRAYSCALE);
int featuresSize = 4;
int patch_size = 32;
int right = patch_size - (img.cols % patch_size);
int bottom = patch_size - (img.rows % patch_size);
copyMakeBorder(img, img, 0, bottom, 0, right, BORDER_REFLECT101);
vector<Mat1d> visual;
for (int i = 0; i < featuresSize; ++i)
{
Mat1d v(img.rows, img.cols);
v.setTo(0);
visual.push_back(v.clone());
}
for (int r = 0; r < img.rows; r += patch_size)
{
for (int c = 0; c < img.cols; c += patch_size)
{
Rect roi(c, r, patch_size, patch_size);
Mat1b mask(img.rows, img.cols, uchar(0));
mask(roi) = uchar(255);
vector<double> features = calculateFeatures(img, mask, roi);
for (int i = 0; i < featuresSize; ++i)
{
visual[i](roi) = features[i];
}
}
}
for (int i = 0; i < featuresSize; ++i)
{
normalize(visual[i], visual[i], 0.0, 1.0, NORM_MINMAX);
Mat1b b;
visual[i].convertTo(b, CV_8U, 255.0);
imshow("Feature " + to_string(i), b);
}
// Makes sense only if nFeatures <= 4
Mat all;
merge(visual, all);
Mat allb;
all.convertTo(allb, CV_8U, 255.0);
imshow("All", all);
waitKey(0);
return 0;
}
I need to binarize images with text.. It works very well but in some cases the output is empty (white image)
code
/*
* Compile
* # g++ txtbin.cpp -o txtbin `pkg-config opencv --cflags --libs`
*
* Run
* # ./txtbin input.jpg output.png
*/
#include "string"
#include "fstream"
#include "/usr/include/opencv2/opencv.hpp"
#include "/usr/include/boost/tuple/tuple.hpp"
using namespace std;
using namespace cv;
using namespace boost;
void CalcBlockMeanVariance(Mat& Img, Mat& Res, float blockSide=21, float contrast=0.01){
/*
* blockSide: set greater for larger fonts in image
* contrast: set smaller for lower contrast image
*/
Mat I;
Img.convertTo(I, CV_32FC1);
Res = Mat::zeros(Img.rows / blockSide, Img.cols / blockSide, CV_32FC1);
Mat inpaintmask;
Mat patch;
Mat smallImg;
Scalar m, s;
for(int i = 0; i < Img.rows - blockSide; i += blockSide){
for(int j = 0; j < Img.cols - blockSide; j += blockSide){
patch = I(Range(i, i + blockSide + 1), Range(j, j + blockSide + 1));
meanStdDev(patch, m, s);
if(s[0] > contrast){
Res.at<float>(i / blockSide, j / blockSide) = m[0];
}
else{
Res.at<float>(i / blockSide, j / blockSide) = 0;
}
}
}
resize(I, smallImg, Res.size());
threshold(Res, inpaintmask, 0.02, 1.0, THRESH_BINARY);
Mat inpainted;
smallImg.convertTo(smallImg, CV_8UC1, 255);
inpaintmask.convertTo(inpaintmask, CV_8UC1);
inpaint(smallImg, inpaintmask, inpainted, 5, INPAINT_TELEA);
resize(inpainted, Res, Img.size());
Res.convertTo(Res, CV_32FC1, 1.0 / 255.0);
}
tuple<int, int, int, int> detect_text_box(string input, Mat& res, bool draw_contours=false){
Mat large = imread(input);
bool test_output = false;
int
top = large.rows,
bottom = 0,
left = large.cols,
right = 0;
int
rect_bottom,
rect_right;
Mat rgb;
// downsample and use it for processing
pyrDown(large, rgb);
Mat small;
cvtColor(rgb, small, CV_BGR2GRAY);
// morphological gradient
Mat grad;
Mat morphKernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3));
morphologyEx(small, grad, MORPH_GRADIENT, morphKernel);
// binarize
Mat bw;
threshold(grad, bw, 0.0, 255.0, THRESH_BINARY | THRESH_OTSU);
// connect horizontally oriented regions
Mat connected;
morphKernel = getStructuringElement(MORPH_RECT, Size(9, 1));
morphologyEx(bw, connected, MORPH_CLOSE, morphKernel);
// find contours
Mat mask = Mat::zeros(bw.size(), CV_8UC1);
vector<vector<Point> > contours;
vector<Vec4i> hierarchy;
findContours(connected, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, Point(0, 0));
// filter contours
for(int idx = 0; idx >= 0; idx = hierarchy[idx][0]){
Rect rect = boundingRect(contours[idx]);
Mat maskROI(mask, rect);
maskROI = Scalar(0, 0, 0);
// fill the contour
drawContours(mask, contours, idx, Scalar(255, 255, 255), CV_FILLED);
// ratio of non-zero pixels in the filled region
double r = (double)countNonZero(maskROI) / (rect.width * rect.height);
// assume at least 45% of the area is filled if it contains text
if (r > 0.45 &&
(rect.height > 8 && rect.width > 8) // constraints on region size
// these two conditions alone are not very robust. better to use something
//like the number of significant peaks in a horizontal projection as a third condition
){
if(draw_contours){
rectangle(res, Rect(rect.x * 2, rect.y * 2, rect.width * 2, rect.height * 2), Scalar(0, 255, 0), 2);
}
if(test_output){
rectangle(rgb, rect, Scalar(0, 255, 0), 2);
}
if(rect.y < top){
top = rect.y;
}
rect_bottom = rect.y + rect.height;
if(rect_bottom > bottom){
bottom = rect_bottom;
}
if(rect.x < left){
left = rect.x;
}
rect_right = rect.x + rect.width;
if(rect_right > right){
right = rect_right;
}
}
}
if(draw_contours){
rectangle(res, Point(left * 2, top * 2), Point(right * 2, bottom * 2), Scalar(0, 0, 255), 2);
}
if(test_output){
rectangle(rgb, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 2);
imwrite(string("test_text_contours.jpg"), rgb);
}
return make_tuple(left * 2, top * 2, (right - left) * 2, (bottom - top) * 2);
}
int main(int argc, char* argv[]){
string input;
string output = "output.png";
int
width = 0,
height = 0;
bool
crop = false,
draw = false;
float margin = 0;
// Return error if arguments are missing
if(argc < 3){
cerr << "\nUsage: txtbin input [options] output\n\n"
"Options:\n"
"\t-w <number> -- set max width (keeps aspect ratio)\n"
"\t-h <number> -- set max height (keeps aspect ratio)\n"
"\t-c -- crop text content contour\n"
"\t-m <number> -- add margins (number in %)\n"
"\t-d -- draw text content contours (debugging)\n" << endl;
return 1;
}
// Parse arguments
for(int i = 1; i < argc; i++){
if(i == 1){
input = string(argv[i]);
// Return error if input file is invalid
ifstream stream(input.c_str());
if(!stream.good()){
cerr << "Error: Input file is invalid!" << endl;
return 1;
}
}
else if(string(argv[i]) == "-w"){
width = atoi(argv[++i]);
}
else if(string(argv[i]) == "-h"){
height = atoi(argv[++i]);
}
else if(string(argv[i]) == "-c"){
crop = true;
}
else if(string(argv[i]) == "-m"){
margin = atoi(argv[++i]);
}
else if(string(argv[i]) == "-d"){
draw = true;
}
else if(i == argc - 1){
output = string(argv[i]);
}
}
Mat Img = imread(input, CV_LOAD_IMAGE_GRAYSCALE);
Mat res;
Img.convertTo(Img, CV_32FC1, 1.0 / 255.0);
CalcBlockMeanVariance(Img, res);
res = 1.0 - res;
res = Img + res;
threshold(res, res, 0.85, 1, THRESH_BINARY);
int
txt_x,
txt_y,
txt_width,
txt_height;
if(crop || draw){
tie(txt_x, txt_y, txt_width, txt_height) = detect_text_box(input, res, draw);
}
if(crop){
//res = res(Rect(txt_x, txt_y, txt_width, txt_height)).clone();
res = res(Rect(txt_x, txt_y, txt_width, txt_height));
}
if(margin){
int border = res.cols * margin / 100;
copyMakeBorder(res, res, border, border, border, border, BORDER_CONSTANT, Scalar(255, 255, 255));
}
float
width_input = res.cols,
height_input = res.rows;
bool resized = false;
// Downscale image
if(width > 0 && width_input > width){
float scale = width_input / width;
width_input /= scale;
height_input /= scale;
resized = true;
}
if(height > 0 && height_input > height){
float scale = height_input / height;
width_input /= scale;
height_input /= scale;
resized = true;
}
if(resized){
resize(res, res, Size(round(width_input), round(height_input)));
}
imwrite(output, res * 255);
return 0;
}
Ok :)
Set blockSide smaller (7 for instance) it will give you result image as shown below. It depends on font size, smaller fonts need smaller block size, else text will be filtered out and you get empty image.
#include <iostream>
#include <vector>
#include <stdio.h>
#include <stdarg.h>
#include "/usr/include/opencv2/opencv.hpp"
#include "fstream"
#include "iostream"
using namespace std;
using namespace cv;
void CalcBlockMeanVariance(Mat& Img,Mat& Res,float blockSide=9) // blockSide - the parameter (set greater for larger font on image)
{
Mat I;
Img.convertTo(I,CV_32FC1);
Res=Mat::zeros(Img.rows/blockSide,Img.cols/blockSide,CV_32FC1);
Mat inpaintmask;
Mat patch;
Mat smallImg;
Scalar m,s;
for(int i=0;i<Img.rows-blockSide;i+=blockSide)
{
for (int j=0;j<Img.cols-blockSide;j+=blockSide)
{
patch=I(Range(i,i+blockSide+1),Range(j,j+blockSide+1));
cv::meanStdDev(patch,m,s);
if(s[0]>0.01) // Thresholding parameter (set smaller for lower contrast image)
{
Res.at<float>(i/blockSide,j/blockSide)=m[0];
}else
{
Res.at<float>(i/blockSide,j/blockSide)=0;
}
}
}
cv::resize(I,smallImg,Res.size());
cv::threshold(Res,inpaintmask,0.02,1.0,cv::THRESH_BINARY);
Mat inpainted;
smallImg.convertTo(smallImg,CV_8UC1,255);
inpaintmask.convertTo(inpaintmask,CV_8UC1);
inpaint(smallImg, inpaintmask, inpainted, 5, INPAINT_TELEA);
cv::resize(inpainted,Res,Img.size());
Res.convertTo(Res,CV_32FC1,1.0/255.0);
}
int main( int argc, char** argv )
{
namedWindow("Img");
namedWindow("Edges");
//Mat Img=imread("D:\\ImagesForTest\\BookPage.JPG",0);
Mat Img=imread("test2.jpg",0);
Mat res;
Img.convertTo(Img,CV_32FC1,1.0/255.0);
CalcBlockMeanVariance(Img,res);
res=1.0-res;
res=Img+res;
imshow("Img",Img);
cv::threshold(res,res,0.85,1,cv::THRESH_BINARY);
cv::resize(res,res,cv::Size(res.cols/2,res.rows/2));
imwrite("result.jpg",res*255);
imshow("Edges",res);
waitKey(0);
return 0;
}
I asked a previous question here and following the advice from the answer I built the below program which I thought would detect large rectangle but it doesn't detect the rectangle at all. It does work on this image though.
Original Image
Desired Image
I want the solution to work on not only this image but different images of this kind. Major part of the code below is from different answers on SO
My full program:
#include <cv.h>
#include <highgui.h>
using namespace cv;
using namespace std;
double angle( Point pt1, Point pt2, Point pt0 ) {
double dx1 = pt1.x - pt0.x;
double dy1 = pt1.y - pt0.y;
double dx2 = pt2.x - pt0.x;
double dy2 = pt2.y - pt0.y;
return (dx1*dx2 + dy1*dy2)/sqrt((dx1*dx1 + dy1*dy1)*(dx2*dx2 + dy2*dy2) + 1e-10);
}
void find_squares( Mat& image, vector< vector< Point> >& squares)
{
// blur will enhance edge detection
Mat blurred(image);
medianBlur(image, blurred, 9);
Mat gray0(blurred.size(), CV_8U), gray;
vector< vector< Point> > contours;
// find squares in every color plane of the image
for (int c = 0; c < 3; c++)
{
int ch[] = {c, 0};
mixChannels(&blurred, 1, &gray0, 1, ch, 1);
// try several threshold levels
const int threshold_level = 2;
for (int l = 0; l < threshold_level; l++)
{
// Use Canny instead of zero threshold level!
// Canny helps to catch squares with gradient shading
if (l == 0)
{
Canny(gray0, gray, 10, 20, 3); //
// Dilate helps to remove potential holes between edge segments
dilate(gray, gray, Mat(), Point(-1,-1));
}
else
{
gray = gray0 >= (l+1) * 255 / threshold_level;
}
// Find contours and store them in a list
findContours(gray, contours, CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE);
// Test contours
vector< Point> approx;
for (size_t i = 0; i < contours.size(); i++)
{
// approximate contour with accuracy proportional
// to the contour perimeter
approxPolyDP( Mat(contours[i]), approx, arcLength( Mat(contours[i]), true)*0.02, true);
// Note: absolute value of an area is used because
// area may be positive or negative - in accordance with the
// contour orientation
if (approx.size() == 4 &&
fabs(contourArea( Mat(approx))) > 1000 &&
isContourConvex( Mat(approx)))
{
double maxCosine = 0;
for (int j = 2; j < 5; j++)
{
double cosine = fabs(angle(approx[j%4], approx[j-2], approx[j-1]));
maxCosine = MAX(maxCosine, cosine);
}
if (maxCosine < 0.3)
squares.push_back(approx);
}
}
}
}
}
void find_largest_square(const vector<vector <Point> >& squares, vector<Point>& biggest_square) {
if (!squares.size()) {
return;
}
int max_width = 0;
int max_height = 0;
int max_square_idx = 0;
const int n_points = 4;
for (size_t i = 0; i < squares.size(); i++) {
Rect rectangle = boundingRect(Mat(squares[i]));
if ((rectangle.width >= max_width) && (rectangle.height >= max_height)) {
max_width = rectangle.width;
max_height = rectangle.height;
max_square_idx = i;
}
}
biggest_square = squares[max_square_idx];
}
int main(int argc, char* argv[])
{
Mat img = imread(argv[1]);
if (img.empty())
{
cout << "!!! imread() failed to open target image" << endl;
return -1;
}
vector< vector< Point> > squares;
find_squares(img, squares);
vector<Point> largest_square;
find_largest_square(squares, largest_square);
for (int i = 0; i < 4; ++i) {
line(img, largest_square[i], largest_square[(i+1)%4], Scalar(0, 255, 0), 1, CV_AA);
}
imwrite("squares.png", img);
imshow("squares", img);
waitKey(0);
return 0;
}
I think you can do it easily using findContours function - http://docs.opencv.org/doc/tutorials/imgproc/shapedescriptors/find_contours/find_contours.html The biggest contour (or eventually second biggest) should be contour of black rectangle. Then just find the smallest rectangle which will surround this contour (just find points with the biggest/smallest x/y coordinates).