Extracting text OpenCV - c++

I am trying to find the bounding boxes of text in an image and am currently using this approach:
// calculate the local variances of the grayscale image
Mat t_mean, t_mean_2;
Mat grayF;
outImg_gray.convertTo(grayF, CV_32F);
int winSize = 35;
blur(grayF, t_mean, cv::Size(winSize,winSize));
blur(grayF.mul(grayF), t_mean_2, cv::Size(winSize,winSize));
Mat varMat = t_mean_2 - t_mean.mul(t_mean);
varMat.convertTo(varMat, CV_8U);
// threshold the high variance regions
Mat varMatRegions = varMat > 100;
When given an image like this:
Then when I show varMatRegions I get this image:
As you can see it somewhat combines the left block of text with the header of the card, for most cards this method works great but on busier cards it can cause problems.
The reason it is bad for those contours to connect is that it makes the bounding box of the contour nearly take up the entire card.
Can anyone suggest a different way I can find the text to ensure proper detection of text?
200 points to whoever can find the text in the card above the these two.

I used a gradient based method in the program below. Added the resulting images. Please note that I'm using a scaled down version of the image for processing.
c++ version
The MIT License (MIT)
Copyright (c) 2014 Dhanushka Dangampola
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
#include "stdafx.h"
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <iostream>
using namespace cv;
using namespace std;
#define INPUT_FILE "1.jpg"
#define OUTPUT_FOLDER_PATH string("")
int _tmain(int argc, _TCHAR* argv[])
{
Mat large = imread(INPUT_FILE);
Mat rgb;
// downsample and use it for processing
pyrDown(large, rgb);
Mat small;
cvtColor(rgb, small, CV_BGR2GRAY);
// morphological gradient
Mat grad;
Mat morphKernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3));
morphologyEx(small, grad, MORPH_GRADIENT, morphKernel);
// binarize
Mat bw;
threshold(grad, bw, 0.0, 255.0, THRESH_BINARY | THRESH_OTSU);
// connect horizontally oriented regions
Mat connected;
morphKernel = getStructuringElement(MORPH_RECT, Size(9, 1));
morphologyEx(bw, connected, MORPH_CLOSE, morphKernel);
// find contours
Mat mask = Mat::zeros(bw.size(), CV_8UC1);
vector<vector<Point>> contours;
vector<Vec4i> hierarchy;
findContours(connected, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, Point(0, 0));
// filter contours
for(int idx = 0; idx >= 0; idx = hierarchy[idx][0])
{
Rect rect = boundingRect(contours[idx]);
Mat maskROI(mask, rect);
maskROI = Scalar(0, 0, 0);
// fill the contour
drawContours(mask, contours, idx, Scalar(255, 255, 255), CV_FILLED);
// ratio of non-zero pixels in the filled region
double r = (double)countNonZero(maskROI)/(rect.width*rect.height);
if (r > .45 /* assume at least 45% of the area is filled if it contains text */
&&
(rect.height > 8 && rect.width > 8) /* constraints on region size */
/* these two conditions alone are not very robust. better to use something
like the number of significant peaks in a horizontal projection as a third condition */
)
{
rectangle(rgb, rect, Scalar(0, 255, 0), 2);
}
}
imwrite(OUTPUT_FOLDER_PATH + string("rgb.jpg"), rgb);
return 0;
}
python version
The MIT License (MIT)
Copyright (c) 2017 Dhanushka Dangampola
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
import cv2
import numpy as np
large = cv2.imread('1.jpg')
rgb = cv2.pyrDown(large)
small = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
grad = cv2.morphologyEx(small, cv2.MORPH_GRADIENT, kernel)
_, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 1))
connected = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, kernel)
# using RETR_EXTERNAL instead of RETR_CCOMP
contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
#For opencv 3+ comment the previous line and uncomment the following line
#_, contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
mask = np.zeros(bw.shape, dtype=np.uint8)
for idx in range(len(contours)):
x, y, w, h = cv2.boundingRect(contours[idx])
mask[y:y+h, x:x+w] = 0
cv2.drawContours(mask, contours, idx, (255, 255, 255), -1)
r = float(cv2.countNonZero(mask[y:y+h, x:x+w])) / (w * h)
if r > 0.45 and w > 8 and h > 8:
cv2.rectangle(rgb, (x, y), (x+w-1, y+h-1), (0, 255, 0), 2)
cv2.imshow('rects', rgb)

You can detect text by finding close edge elements (inspired from a LPD):
#include "opencv2/opencv.hpp"
std::vector<cv::Rect> detectLetters(cv::Mat img)
{
std::vector<cv::Rect> boundRect;
cv::Mat img_gray, img_sobel, img_threshold, element;
cvtColor(img, img_gray, CV_BGR2GRAY);
cv::Sobel(img_gray, img_sobel, CV_8U, 1, 0, 3, 1, 0, cv::BORDER_DEFAULT);
cv::threshold(img_sobel, img_threshold, 0, 255, CV_THRESH_OTSU+CV_THRESH_BINARY);
element = getStructuringElement(cv::MORPH_RECT, cv::Size(17, 3) );
cv::morphologyEx(img_threshold, img_threshold, CV_MOP_CLOSE, element); //Does the trick
std::vector< std::vector< cv::Point> > contours;
cv::findContours(img_threshold, contours, 0, 1);
std::vector<std::vector<cv::Point> > contours_poly( contours.size() );
for( int i = 0; i < contours.size(); i++ )
if (contours[i].size()>100)
{
cv::approxPolyDP( cv::Mat(contours[i]), contours_poly[i], 3, true );
cv::Rect appRect( boundingRect( cv::Mat(contours_poly[i]) ));
if (appRect.width>appRect.height)
boundRect.push_back(appRect);
}
return boundRect;
}
Usage:
int main(int argc,char** argv)
{
//Read
cv::Mat img1=cv::imread("side_1.jpg");
cv::Mat img2=cv::imread("side_2.jpg");
//Detect
std::vector<cv::Rect> letterBBoxes1=detectLetters(img1);
std::vector<cv::Rect> letterBBoxes2=detectLetters(img2);
//Display
for(int i=0; i< letterBBoxes1.size(); i++)
cv::rectangle(img1,letterBBoxes1[i],cv::Scalar(0,255,0),3,8,0);
cv::imwrite( "imgOut1.jpg", img1);
for(int i=0; i< letterBBoxes2.size(); i++)
cv::rectangle(img2,letterBBoxes2[i],cv::Scalar(0,255,0),3,8,0);
cv::imwrite( "imgOut2.jpg", img2);
return 0;
}
Results:
a. element = getStructuringElement(cv::MORPH_RECT, cv::Size(17, 3) );
b. element = getStructuringElement(cv::MORPH_RECT, cv::Size(30, 30) );
Results are similar for the other image mentioned.

Here is an alternative approach that I used to detect the text blocks:
Converted the image to grayscale
Applied threshold (simple binary threshold, with a handpicked value of 150 as the threshold value)
Applied dilation to thicken lines in image, leading to more compact objects and less white space fragments. Used a high value for number of iterations, so dilation is very heavy (13 iterations, also handpicked for optimal results).
Identified contours of objects in resulted image using opencv findContours function.
Drew a bounding box (rectangle) circumscribing each contoured object - each of them frames a block of text.
Optionally discarded areas that are unlikely to be the object you are searching for (e.g. text blocks) given their size, as the algorithm above can also find intersecting or nested objects (like the entire top area for the first card) some of which could be uninteresting for your purposes.
Below is the code written in python with pyopencv, it should easy to port to C++.
import cv2
image = cv2.imread("card.png")
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY) # grayscale
_,thresh = cv2.threshold(gray,150,255,cv2.THRESH_BINARY_INV) # threshold
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS,(3,3))
dilated = cv2.dilate(thresh,kernel,iterations = 13) # dilate
_, contours, hierarchy = cv2.findContours(dilated,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE) # get contours
# for each contour found, draw a rectangle around it on original image
for contour in contours:
# get rectangle bounding contour
[x,y,w,h] = cv2.boundingRect(contour)
# discard areas that are too large
if h>300 and w>300:
continue
# discard areas that are too small
if h<40 or w<40:
continue
# draw rectangle around contour on original image
cv2.rectangle(image,(x,y),(x+w,y+h),(255,0,255),2)
# write original image with added contours to disk
cv2.imwrite("contoured.jpg", image)
The original image is the first image in your post.
After preprocessing (grayscale, threshold and dilate - so after step 3) the image looked like this:
Below is the resulted image ("contoured.jpg" in the last line); the final bounding boxes for the objects in the image look like this:
You can see the text block on the left is detected as a separate block, delimited from its surroundings.
Using the same script with the same parameters (except for thresholding type that was changed for the second image like described below), here are the results for the other 2 cards:
Tuning the parameters
The parameters (threshold value, dilation parameters) were optimized for this image and this task (finding text blocks) and can be adjusted, if needed, for other cards images or other types of objects to be found.
For thresholding (step 2), I used a black threshold. For images where text is lighter than the background, such as the second image in your post, a white threshold should be used, so replace thesholding type with cv2.THRESH_BINARY). For the second image I also used a slightly higher value for the threshold (180). Varying the parameters for the threshold value and the number of iterations for dilation will result in different degrees of sensitivity in delimiting objects in the image.
Finding other object types:
For example, decreasing the dilation to 5 iterations in the first image gives us a more fine delimitation of objects in the image, roughly finding all words in the image (rather than text blocks):
Knowing the rough size of a word, here I discarded areas that were too small (below 20 pixels width or height) or too large (above 100 pixels width or height) to ignore objects that are unlikely to be words, to get the results in the above image.

#dhanushka's approach showed the most promise but I wanted to play around in Python so went ahead and translated it for fun:
import cv2
import numpy as np
from cv2 import boundingRect, countNonZero, cvtColor, drawContours, findContours, getStructuringElement, imread, morphologyEx, pyrDown, rectangle, threshold
large = imread(image_path)
# downsample and use it for processing
rgb = pyrDown(large)
# apply grayscale
small = cvtColor(rgb, cv2.COLOR_BGR2GRAY)
# morphological gradient
morph_kernel = getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
grad = morphologyEx(small, cv2.MORPH_GRADIENT, morph_kernel)
# binarize
_, bw = threshold(src=grad, thresh=0, maxval=255, type=cv2.THRESH_BINARY+cv2.THRESH_OTSU)
morph_kernel = getStructuringElement(cv2.MORPH_RECT, (9, 1))
# connect horizontally oriented regions
connected = morphologyEx(bw, cv2.MORPH_CLOSE, morph_kernel)
mask = np.zeros(bw.shape, np.uint8)
# find contours
im2, contours, hierarchy = findContours(connected, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
# filter contours
for idx in range(0, len(hierarchy[0])):
rect = x, y, rect_width, rect_height = boundingRect(contours[idx])
# fill the contour
mask = drawContours(mask, contours, idx, (255, 255, 2555), cv2.FILLED)
# ratio of non-zero pixels in the filled region
r = float(countNonZero(mask)) / (rect_width * rect_height)
if r > 0.45 and rect_height > 8 and rect_width > 8:
rgb = rectangle(rgb, (x, y+rect_height), (x+rect_width, y), (0,255,0),3)
Now to display the image:
from PIL import Image
Image.fromarray(rgb).show()
Not the most Pythonic of scripts but I tried to resemble the original C++ code as closely as possible for readers to follow.
It works almost as well as the original. I'll be happy to read suggestions how it could be improved/fixed to resemble the original results fully.

You can try this method that is developed by Chucai Yi and Yingli Tian.
They also share a software (which is based on Opencv-1.0 and it should run under Windows platform.) that you can use (though no source code available). It will generate all the text bounding boxes (shown in color shadows) in the image. By applying to your sample images, you will get the following results:
Note: to make the result more robust, you can further merge adjacent boxes together.
Update: If your ultimate goal is to recognize the texts in the image, you can further check out gttext, which is an OCR free software and Ground Truthing tool for Color Images with Text. Source code is also available.
With this, you can get recognized texts like:

Above Code JAVA version:
Thanks #William
public static List<Rect> detectLetters(Mat img){
List<Rect> boundRect=new ArrayList<>();
Mat img_gray =new Mat(), img_sobel=new Mat(), img_threshold=new Mat(), element=new Mat();
Imgproc.cvtColor(img, img_gray, Imgproc.COLOR_RGB2GRAY);
Imgproc.Sobel(img_gray, img_sobel, CvType.CV_8U, 1, 0, 3, 1, 0, Core.BORDER_DEFAULT);
//at src, Mat dst, double thresh, double maxval, int type
Imgproc.threshold(img_sobel, img_threshold, 0, 255, 8);
element=Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(15,5));
Imgproc.morphologyEx(img_threshold, img_threshold, Imgproc.MORPH_CLOSE, element);
List<MatOfPoint> contours = new ArrayList<MatOfPoint>();
Mat hierarchy = new Mat();
Imgproc.findContours(img_threshold, contours,hierarchy, 0, 1);
List<MatOfPoint> contours_poly = new ArrayList<MatOfPoint>(contours.size());
for( int i = 0; i < contours.size(); i++ ){
MatOfPoint2f mMOP2f1=new MatOfPoint2f();
MatOfPoint2f mMOP2f2=new MatOfPoint2f();
contours.get(i).convertTo(mMOP2f1, CvType.CV_32FC2);
Imgproc.approxPolyDP(mMOP2f1, mMOP2f2, 2, true);
mMOP2f2.convertTo(contours.get(i), CvType.CV_32S);
Rect appRect = Imgproc.boundingRect(contours.get(i));
if (appRect.width>appRect.height) {
boundRect.add(appRect);
}
}
return boundRect;
}
And use this code in practice :
System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
Mat img1=Imgcodecs.imread("abc.png");
List<Rect> letterBBoxes1=Utils.detectLetters(img1);
for(int i=0; i< letterBBoxes1.size(); i++)
Imgproc.rectangle(img1,letterBBoxes1.get(i).br(), letterBBoxes1.get(i).tl(),new Scalar(0,255,0),3,8,0);
Imgcodecs.imwrite("abc1.png", img1);

This is a C# version of the answer from dhanushka using OpenCVSharp
Mat large = new Mat(INPUT_FILE);
Mat rgb = new Mat(), small = new Mat(), grad = new Mat(), bw = new Mat(), connected = new Mat();
// downsample and use it for processing
Cv2.PyrDown(large, rgb);
Cv2.CvtColor(rgb, small, ColorConversionCodes.BGR2GRAY);
// morphological gradient
var morphKernel = Cv2.GetStructuringElement(MorphShapes.Ellipse, new OpenCvSharp.Size(3, 3));
Cv2.MorphologyEx(small, grad, MorphTypes.Gradient, morphKernel);
// binarize
Cv2.Threshold(grad, bw, 0, 255, ThresholdTypes.Binary | ThresholdTypes.Otsu);
// connect horizontally oriented regions
morphKernel = Cv2.GetStructuringElement(MorphShapes.Rect, new OpenCvSharp.Size(9, 1));
Cv2.MorphologyEx(bw, connected, MorphTypes.Close, morphKernel);
// find contours
var mask = new Mat(Mat.Zeros(bw.Size(), MatType.CV_8UC1), Range.All);
Cv2.FindContours(connected, out OpenCvSharp.Point[][] contours, out HierarchyIndex[] hierarchy, RetrievalModes.CComp, ContourApproximationModes.ApproxSimple, new OpenCvSharp.Point(0, 0));
// filter contours
var idx = 0;
foreach (var hierarchyItem in hierarchy)
{
idx = hierarchyItem.Next;
if (idx < 0)
break;
OpenCvSharp.Rect rect = Cv2.BoundingRect(contours[idx]);
var maskROI = new Mat(mask, rect);
maskROI.SetTo(new Scalar(0, 0, 0));
// fill the contour
Cv2.DrawContours(mask, contours, idx, Scalar.White, -1);
// ratio of non-zero pixels in the filled region
double r = (double)Cv2.CountNonZero(maskROI) / (rect.Width * rect.Height);
if (r > .45 /* assume at least 45% of the area is filled if it contains text */
&&
(rect.Height > 8 && rect.Width > 8) /* constraints on region size */
/* these two conditions alone are not very robust. better to use something
like the number of significant peaks in a horizontal projection as a third condition */
)
{
Cv2.Rectangle(rgb, rect, new Scalar(0, 255, 0), 2);
}
}
rgb.SaveImage(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "rgb.jpg"));

Python Implementation for #dhanushka's solution:
def process_rgb(rgb):
hasText = False
gray = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)
morphKernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
grad = cv2.morphologyEx(gray, cv2.MORPH_GRADIENT, morphKernel)
# binarize
_, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
# connect horizontally oriented regions
morphKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 1))
connected = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, morphKernel)
# find contours
mask = np.zeros(bw.shape[:2], dtype="uint8")
_,contours, hierarchy = cv2.findContours(connected, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
# filter contours
idx = 0
while idx >= 0:
x,y,w,h = cv2.boundingRect(contours[idx])
# fill the contour
cv2.drawContours(mask, contours, idx, (255, 255, 255), cv2.FILLED)
# ratio of non-zero pixels in the filled region
r = cv2.contourArea(contours[idx])/(w*h)
if(r > 0.45 and h > 5 and w > 5 and w > h):
cv2.rectangle(rgb, (x,y), (x+w,y+h), (0, 255, 0), 2)
hasText = True
idx = hierarchy[0][idx][0]
return hasText, rgb

You can utilize a python implementation SWTloc.
Full Disclosure : I am the author of this library
To do that :-
First and Second Image
Notice that the text_mode here is 'lb_df', which stands for Light Background Dark Foreground i.e the text in this image is going to be in darker color than the background
from swtloc import SWTLocalizer
from swtloc.utils import imgshowN, imgshow
swtl = SWTLocalizer()
# Stroke Width Transform
swtl.swttransform(imgpaths='img1.jpg', text_mode = 'lb_df',
save_results=True, save_rootpath = 'swtres/',
minrsw = 3, maxrsw = 20, max_angledev = np.pi/3)
imgshow(swtl.swtlabelled_pruned13C)
# Grouping
respacket=swtl.get_grouped(lookup_radii_multiplier=0.9, ht_ratio=3.0)
grouped_annot_bubble = respacket[2]
maskviz = respacket[4]
maskcomb = respacket[5]
# Saving the results
_=cv2.imwrite('img1_processed.jpg', swtl.swtlabelled_pruned13C)
imgshowN([maskcomb, grouped_annot_bubble], savepath='grouped_img1.jpg')
Third Image
Notice that the text_mode here is 'db_lf', which stands for Dark Background Light Foreground i.e the text in this image is going to be in lighter color than the background
from swtloc import SWTLocalizer
from swtloc.utils import imgshowN, imgshow
swtl = SWTLocalizer()
# Stroke Width Transform
swtl.swttransform(imgpaths=imgpaths[1], text_mode = 'db_lf',
save_results=True, save_rootpath = 'swtres/',
minrsw = 3, maxrsw = 20, max_angledev = np.pi/3)
imgshow(swtl.swtlabelled_pruned13C)
# Grouping
respacket=swtl.get_grouped(lookup_radii_multiplier=0.9, ht_ratio=3.0)
grouped_annot_bubble = respacket[2]
maskviz = respacket[4]
maskcomb = respacket[5]
# Saving the results
_=cv2.imwrite('img1_processed.jpg', swtl.swtlabelled_pruned13C)
imgshowN([maskcomb, grouped_annot_bubble], savepath='grouped_img1.jpg')
You will also notice that the grouping done is not so accurate, to get the desired results as the images might vary, try to tune the grouping parameters in swtl.get_grouped() function.

this is a VB.NET version of the answer from dhanushka using EmguCV.
A few functions and structures in EmguCV need different consideration than the C# version with OpenCVSharp
Imports Emgu.CV
Imports Emgu.CV.Structure
Imports Emgu.CV.CvEnum
Imports Emgu.CV.Util
Dim input_file As String = "C:\your_input_image.png"
Dim large As Mat = New Mat(input_file)
Dim rgb As New Mat
Dim small As New Mat
Dim grad As New Mat
Dim bw As New Mat
Dim connected As New Mat
Dim morphanchor As New Point(0, 0)
'//downsample and use it for processing
CvInvoke.PyrDown(large, rgb)
CvInvoke.CvtColor(rgb, small, ColorConversion.Bgr2Gray)
'//morphological gradient
Dim morphKernel As Mat = CvInvoke.GetStructuringElement(ElementShape.Ellipse, New Size(3, 3), morphanchor)
CvInvoke.MorphologyEx(small, grad, MorphOp.Gradient, morphKernel, New Point(0, 0), 1, BorderType.Isolated, New MCvScalar(0))
'// binarize
CvInvoke.Threshold(grad, bw, 0, 255, ThresholdType.Binary Or ThresholdType.Otsu)
'// connect horizontally oriented regions
morphKernel = CvInvoke.GetStructuringElement(ElementShape.Rectangle, New Size(9, 1), morphanchor)
CvInvoke.MorphologyEx(bw, connected, MorphOp.Close, morphKernel, morphanchor, 1, BorderType.Isolated, New MCvScalar(0))
'// find contours
Dim mask As Mat = Mat.Zeros(bw.Size.Height, bw.Size.Width, DepthType.Cv8U, 1) '' MatType.CV_8UC1
Dim contours As New VectorOfVectorOfPoint
Dim hierarchy As New Mat
CvInvoke.FindContours(connected, contours, hierarchy, RetrType.Ccomp, ChainApproxMethod.ChainApproxSimple, Nothing)
'// filter contours
Dim idx As Integer
Dim rect As Rectangle
Dim maskROI As Mat
Dim r As Double
For Each hierarchyItem In hierarchy.GetData
rect = CvInvoke.BoundingRectangle(contours(idx))
maskROI = New Mat(mask, rect)
maskROI.SetTo(New MCvScalar(0, 0, 0))
'// fill the contour
CvInvoke.DrawContours(mask, contours, idx, New MCvScalar(255), -1)
'// ratio of non-zero pixels in the filled region
r = CvInvoke.CountNonZero(maskROI) / (rect.Width * rect.Height)
'/* assume at least 45% of the area Is filled if it contains text */
'/* constraints on region size */
'/* these two conditions alone are Not very robust. better to use something
'Like the number of significant peaks in a horizontal projection as a third condition */
If r > 0.45 AndAlso rect.Height > 8 AndAlso rect.Width > 8 Then
'draw green rectangle
CvInvoke.Rectangle(rgb, rect, New MCvScalar(0, 255, 0), 2)
End If
idx += 1
Next
rgb.Save(IO.Path.Combine(Application.StartupPath, "rgb.jpg"))

Related

OpenCV and C++ - Shape and road signs detection

I have to write a program that detect 3 types of road signs (speed limit, no parking and warnings). I know how to detect a circle using HoughCircles but I have several images and the parameters for HoughCircles are different for each image. There's a general way to detect circles without changing parameters for each image?
Moreover I need to detect triangle (warning signs) so I'm searching for a general shape detector. Have you any suggestions/code that can help me in this task?
Finally for detect the number on speed limit signs I thought to use SIFT and compare the image with some templates in order to identify the number on the sign. Could it be a good approach?
Thank you for the answer!
I know this is a pretty old question but I had been through the same problem and now I show you how I solved it.
The following images show some of the most accurate results that are displayed by the opencv program.
In the following images the street signs detected are circled with three different colors that distinguish the three kinds of street signs (warning, no parking, speed limit).
Red for warning signs
Blue for no parking signs
Fuchsia for speed limit signs
The speed limit value is written in green above the speed limit signs
[![example][1]][1]
[![example][2]][2]
[![example][3]][3]
[![example][4]][4]
As you can see the program performs quite well, it is able to detect and distinguish the three kinds of sign and to recognize the speed limit value in case of speed limit signs. Everything is done without computing too many false positives when, for instance, in the image there are some signs that do not belong to one of the three categories.
In order to achieve this result the software computes the detection in three main steps.
The first step involves a color based approach where the red objects in the image are detected and their region are extract to be analyzed. This step is particularly useful in order to prevent the detection of false positives, because only a small part of the image is processed.
The second step works with a machine learning algorithm: in particular we use a Cascade Classifier to compute the detection. This operation firstly requires to train the classifiers and on a later stage to use them to detect the signs.
In the last step the speed limit values inside the speed limit signs are read, also in this case through a machine learning algorithm but using the k-nearest neighbor algorithm.
Now we are going to see in detail each step.
COLOR BASED STEP
Since the street signs are always circled by a red frame, we can afford to take out and analyze only the regions where the red objects are detected.
In order to select the red objects, we consider all the ranges of the red color: even if this may produce some false positives, they will be easily discarded in the next steps.
inRange(image, Scalar(0, 70, 50), Scalar(10, 255, 255), mask1);
inRange(image, Scalar(170, 70, 50), Scalar(180, 255, 255), mask2);
In the image below we can see an example of the red objects detected with this method.
After having found the red pixels we can gather them to find the regions using a clustering algorithm, I use the method
partition(<#_ForwardIterator __first#>, _ForwardIterator __last, <#_Predicate __pred#>)
After the execution of this method we can save all the points in the same cluster in a vector (one for each cluster) and extract the bounding boxes which represent the
regions to be analyzed in the next step.
HAAR CASCADE CLASSIFIERS FOR SIGNS DETECTION
This is the real detection step where the street signs are detected. In order to perform a cascade classifier the first step consist in building a dataset of positives and negatives images. Now I explain how I have built my own datasets of images.
The first thing to note is that we need to train three different Haar cascades in order to distinguish between the three kind of signs that we have to detect, hence we must repeat the following steps for each of the three kinds of sign.
We need two datasets: one for the positive samples (which must be a set of images that contains the road signs that we are going to detect) and another one for the negative samples which can be any kind of image without street signs.
After collecting a set of 100 images for the positive samples and a set of 200 images for the negatives in two different folders, we need to write two text files:
Signs.info which contains a list of file names like the one below,
one for each positive sample in the positive folder.
pos/image_name.png 1 0 0 50 45
Here, the numbers after the name represent respectively the number
of street signs in the image, the coordinate of the upper left
corner of the street sign, his height and his width.
Bg.txt which contains a list of file names like the one below, one
for each sign in the negative folder.
neg/street15.png
With the command line below we generate the .vect file which contains all the information that the software retrieves from the positive samples.
opencv_createsamples -info sign.info -num 100 -w 50 -h 50 -vec signs.vec
Afterwards we train the cascade classifier with the following command:
opencv_traincascade -data data -vec signs.vec -bg bg.txt -numPos 60 -numNeg 200 -numStages 15 -w 50 -h 50 -featureType LBP
where the number of stages indicates the number of classifiers that will be generated in order to build the cascade.
At the end of this process we gain a file cascade.xml which will be used from the CascadeClassifier program in order to detect the objects in the image.
Now we have trained our algorithm and we can declare a CascadeClassifier for each kind of street sign, than we detect the signs in the image through
detectMultiScale(<#InputArray image#>, <#std::vector<Rect> &objects#>)
this method creates a Rect around each object that has been detected.
It is important to note that exactly as every machine learning algorithm, in order to perform well, we need a large number of samples in the dataset. The dataset that I have built, is not extremely large, thus in some situations it is not able to detect all the signs. This mostly happens when a small part of the street sign is not visible in the image like in the warning sign below:
I have expanded my dataset up to the point where I have obtained a fairly accurate result without
too many errors.
SPEED LIMIT VALUE DETECTION
Like for the street signs detection also here I used a machine learning algorithm but with a different approach. After some work, I realized that an OCR (tesseract) solution does not perform well, so I decided to build my own ocr software.
For the machine learning algorithm I took the image below as training data which contains some speed limit values:
The amount of training data is small. But, since in speed limit signs all letters have the same font, it is not a huge problem.
To prepare the data for training, I made a small code in OpenCV. It does the following things:
It loads the image on the left;
It selects the digits (obviously by contour finding and applying constraints on area and height of letters to avoid false detections).
It draws the bounding rectangle around one letter and it waits for the key to be manually pressed. This time the user presses the digit key corresponding to the letter in box by himself.
Once the corresponding digit key is pressed, it saves 100 pixel values in an array and the correspondent manually entered digit in another array.
Eventually it saves both the arrays in separate txt files.
Following the manual digit classification all the digits in the train data( train.png) are manually labeled, and the image will look like the one below.
Now we enter into training and testing part.
For training we do as follows:
Load the txt files we already saved earlier
Create an instance of classifier that we are going to use ( KNearest)
Then we use KNearest.train function to train the data
Now the detection:
We load the image with the speed limit sign detected
Process the image as before and extract each digit using contour methods
Draw bounding box for it, then resize to 10x10, and store its pixel values in an array as done earlier.
Then we use KNearest.find_nearest() function to find the nearest item to the one we gave.
And it recognizes the correct digit.
I tested this little OCR on many images, and just with this small dataset I have obtained an accuracy of about 90%.
CODE
Below I post all my openCv c++ code in a single class, following my instruction you should be able to achive my result.
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include <iostream>
#include <stdio.h>
#include <cmath>
#include <stdlib.h>
#include "opencv2/core/core.hpp"
#include "opencv2/highgui.hpp"
#include <string.h>
#include <opencv2/ml/ml.hpp>
using namespace std;
using namespace cv;
std::vector<cv::Rect> getRedObjects(cv::Mat image);
vector<Mat> detectAndDisplaySpeedLimit( Mat frame );
vector<Mat> detectAndDisplayNoParking( Mat frame );
vector<Mat> detectAndDisplayWarning( Mat frame );
void trainDigitClassifier();
string getDigits(Mat image);
vector<Mat> loadAllImage();
int getSpeedLimit(string speed);
//path of the haar cascade files
String no_parking_signs_cascade = "/Users/giuliopettenuzzo/Desktop/cascade_classifiers/no_parking_cascade.xml";
String speed_signs_cascade = "/Users/giuliopettenuzzo/Desktop/cascade_classifiers/speed_limit_cascade.xml";
String warning_signs_cascade = "/Users/giuliopettenuzzo/Desktop/cascade_classifiers/warning_cascade.xml";
CascadeClassifier speed_limit_cascade;
CascadeClassifier no_parking_cascade;
CascadeClassifier warning_cascade;
int main(int argc, char** argv)
{
//train the classifier for digit recognition, this require a manually train, read the report for more details
trainDigitClassifier();
cv::Mat sceneImage;
vector<Mat> allImages = loadAllImage();
for(int i = 0;i<=allImages.size();i++){
sceneImage = allImages[i];
//load the haar cascade files
if( !speed_limit_cascade.load( speed_signs_cascade ) ){ printf("--(!)Error loading\n"); return -1; };
if( !no_parking_cascade.load( no_parking_signs_cascade ) ){ printf("--(!)Error loading\n"); return -1; };
if( !warning_cascade.load( warning_signs_cascade ) ){ printf("--(!)Error loading\n"); return -1; };
Mat scene = sceneImage.clone();
//detect the red objects
std::vector<cv::Rect> allObj = getRedObjects(scene);
//use the three cascade classifier for each object detected by the getRedObjects() method
for(int j = 0;j<allObj.size();j++){
Mat img = sceneImage(Rect(allObj[j]));
vector<Mat> warningVec = detectAndDisplayWarning(img);
if(warningVec.size()>0){
Rect box = allObj[j];
}
vector<Mat> noParkVec = detectAndDisplayNoParking(img);
if(noParkVec.size()>0){
Rect box = allObj[j];
}
vector<Mat> speedLitmitVec = detectAndDisplaySpeedLimit(img);
if(speedLitmitVec.size()>0){
Rect box = allObj[j];
for(int i = 0; i<speedLitmitVec.size();i++){
//get speed limit and skatch it in the image
int digit = getSpeedLimit(getDigits(speedLitmitVec[i]));
if(digit > 0){
Point point = box.tl();
point.y = point.y + 30;
cv::putText(sceneImage,
"SPEED LIMIT " + to_string(digit),
point,
cv::FONT_HERSHEY_COMPLEX_SMALL,
0.7,
cv::Scalar(0,255,0),
1,
cv::CV__CAP_PROP_LATEST);
}
}
}
}
imshow("currentobj",sceneImage);
waitKey(0);
}
}
/*
* detect the red object in the image given in the param,
* return a vector containing all the Rect of the red objects
*/
std::vector<cv::Rect> getRedObjects(cv::Mat image)
{
Mat3b res = image.clone();
std::vector<cv::Rect> result;
cvtColor(image, image, COLOR_BGR2HSV);
Mat1b mask1, mask2;
//ranges of red color
inRange(image, Scalar(0, 70, 50), Scalar(10, 255, 255), mask1);
inRange(image, Scalar(170, 70, 50), Scalar(180, 255, 255), mask2);
Mat1b mask = mask1 | mask2;
Mat nonZeroCoordinates;
vector<Point> pts;
findNonZero(mask, pts);
for (int i = 0; i < nonZeroCoordinates.total(); i++ ) {
cout << "Zero#" << i << ": " << nonZeroCoordinates.at<Point>(i).x << ", " << nonZeroCoordinates.at<Point>(i).y << endl;
}
int th_distance = 2; // radius tolerance
// Apply partition
// All pixels within the radius tolerance distance will belong to the same class (same label)
vector<int> labels;
// With lambda function (require C++11)
int th2 = th_distance * th_distance;
int n_labels = partition(pts, labels, [th2](const Point& lhs, const Point& rhs) {
return ((lhs.x - rhs.x)*(lhs.x - rhs.x) + (lhs.y - rhs.y)*(lhs.y - rhs.y)) < th2;
});
// You can save all points in the same class in a vector (one for each class), just like findContours
vector<vector<Point>> contours(n_labels);
for (int i = 0; i < pts.size(); ++i){
contours[labels[i]].push_back(pts[i]);
}
// Get bounding boxes
vector<Rect> boxes;
for (int i = 0; i < contours.size(); ++i)
{
Rect box = boundingRect(contours[i]);
if(contours[i].size()>500){//prima era 1000
boxes.push_back(box);
Rect enlarged_box = box + Size(100,100);
enlarged_box -= Point(30,30);
if(enlarged_box.x<0){
enlarged_box.x = 0;
}
if(enlarged_box.y<0){
enlarged_box.y = 0;
}
if(enlarged_box.height + enlarged_box.y > res.rows){
enlarged_box.height = res.rows - enlarged_box.y;
}
if(enlarged_box.width + enlarged_box.x > res.cols){
enlarged_box.width = res.cols - enlarged_box.x;
}
Mat img = res(Rect(enlarged_box));
result.push_back(enlarged_box);
}
}
Rect largest_box = *max_element(boxes.begin(), boxes.end(), [](const Rect& lhs, const Rect& rhs) {
return lhs.area() < rhs.area();
});
//draw the rects in case you want to see them
for(int j=0;j<=boxes.size();j++){
if(boxes[j].area() > largest_box.area()/3){
rectangle(res, boxes[j], Scalar(0, 0, 255));
Rect enlarged_box = boxes[j] + Size(20,20);
enlarged_box -= Point(10,10);
rectangle(res, enlarged_box, Scalar(0, 255, 0));
}
}
rectangle(res, largest_box, Scalar(0, 0, 255));
Rect enlarged_box = largest_box + Size(20,20);
enlarged_box -= Point(10,10);
rectangle(res, enlarged_box, Scalar(0, 255, 0));
return result;
}
/*
* code for detect the speed limit sign , it draws a circle around the speed limit signs
*/
vector<Mat> detectAndDisplaySpeedLimit( Mat frame )
{
std::vector<Rect> signs;
vector<Mat> result;
Mat frame_gray;
cvtColor( frame, frame_gray, CV_BGR2GRAY );
//normalizes the brightness and increases the contrast of the image
equalizeHist( frame_gray, frame_gray );
//-- Detect signs
speed_limit_cascade.detectMultiScale( frame_gray, signs, 1.1, 3, 0|CV_HAAR_SCALE_IMAGE, Size(30, 30) );
cout << speed_limit_cascade.getFeatureType();
for( size_t i = 0; i < signs.size(); i++ )
{
Point center( signs[i].x + signs[i].width*0.5, signs[i].y + signs[i].height*0.5 );
ellipse( frame, center, Size( signs[i].width*0.5, signs[i].height*0.5), 0, 0, 360, Scalar( 255, 0, 255 ), 4, 8, 0 );
Mat resultImage = frame(Rect(center.x - signs[i].width*0.5,center.y - signs[i].height*0.5,signs[i].width,signs[i].height));
result.push_back(resultImage);
}
return result;
}
/*
* code for detect the warning sign , it draws a circle around the warning signs
*/
vector<Mat> detectAndDisplayWarning( Mat frame )
{
std::vector<Rect> signs;
vector<Mat> result;
Mat frame_gray;
cvtColor( frame, frame_gray, CV_BGR2GRAY );
equalizeHist( frame_gray, frame_gray );
//-- Detect signs
warning_cascade.detectMultiScale( frame_gray, signs, 1.1, 3, 0|CV_HAAR_SCALE_IMAGE, Size(30, 30) );
cout << warning_cascade.getFeatureType();
Rect previus;
for( size_t i = 0; i < signs.size(); i++ )
{
Point center( signs[i].x + signs[i].width*0.5, signs[i].y + signs[i].height*0.5 );
Rect newRect = Rect(center.x - signs[i].width*0.5,center.y - signs[i].height*0.5,signs[i].width,signs[i].height);
if((previus & newRect).area()>0){
previus = newRect;
}else{
ellipse( frame, center, Size( signs[i].width*0.5, signs[i].height*0.5), 0, 0, 360, Scalar( 0, 0, 255 ), 4, 8, 0 );
Mat resultImage = frame(newRect);
result.push_back(resultImage);
previus = newRect;
}
}
return result;
}
/*
* code for detect the no parking sign , it draws a circle around the no parking signs
*/
vector<Mat> detectAndDisplayNoParking( Mat frame )
{
std::vector<Rect> signs;
vector<Mat> result;
Mat frame_gray;
cvtColor( frame, frame_gray, CV_BGR2GRAY );
equalizeHist( frame_gray, frame_gray );
//-- Detect signs
no_parking_cascade.detectMultiScale( frame_gray, signs, 1.1, 3, 0|CV_HAAR_SCALE_IMAGE, Size(30, 30) );
cout << no_parking_cascade.getFeatureType();
Rect previus;
for( size_t i = 0; i < signs.size(); i++ )
{
Point center( signs[i].x + signs[i].width*0.5, signs[i].y + signs[i].height*0.5 );
Rect newRect = Rect(center.x - signs[i].width*0.5,center.y - signs[i].height*0.5,signs[i].width,signs[i].height);
if((previus & newRect).area()>0){
previus = newRect;
}else{
ellipse( frame, center, Size( signs[i].width*0.5, signs[i].height*0.5), 0, 0, 360, Scalar( 255, 0, 0 ), 4, 8, 0 );
Mat resultImage = frame(newRect);
result.push_back(resultImage);
previus = newRect;
}
}
return result;
}
/*
* train the classifier for digit recognition, this could be done only one time, this method save the result in a file and
* it can be used in the next executions
* in order to train user must enter manually the corrisponding digit that the program shows, press space if the red box is just a point (false positive)
*/
void trainDigitClassifier(){
Mat thr,gray,con;
Mat src=imread("/Users/giuliopettenuzzo/Desktop/all_numbers.png",1);
cvtColor(src,gray,CV_BGR2GRAY);
threshold(gray,thr,125,255,THRESH_BINARY_INV); //Threshold to find contour
imshow("ci",thr);
waitKey(0);
thr.copyTo(con);
// Create sample and label data
vector< vector <Point> > contours; // Vector for storing contour
vector< Vec4i > hierarchy;
Mat sample;
Mat response_array;
findContours( con, contours, hierarchy,CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE ); //Find contour
for( int i = 0; i< contours.size(); i=hierarchy[i][0] ) // iterate through first hierarchy level contours
{
Rect r= boundingRect(contours[i]); //Find bounding rect for each contour
rectangle(src,Point(r.x,r.y), Point(r.x+r.width,r.y+r.height), Scalar(0,0,255),2,8,0);
Mat ROI = thr(r); //Crop the image
Mat tmp1, tmp2;
resize(ROI,tmp1, Size(10,10), 0,0,INTER_LINEAR ); //resize to 10X10
tmp1.convertTo(tmp2,CV_32FC1); //convert to float
imshow("src",src);
int c=waitKey(0); // Read corresponding label for contour from keyoard
c-=0x30; // Convert ascii to intiger value
response_array.push_back(c); // Store label to a mat
rectangle(src,Point(r.x,r.y), Point(r.x+r.width,r.y+r.height), Scalar(0,255,0),2,8,0);
sample.push_back(tmp2.reshape(1,1)); // Store sample data
}
// Store the data to file
Mat response,tmp;
tmp=response_array.reshape(1,1); //make continuous
tmp.convertTo(response,CV_32FC1); // Convert to float
FileStorage Data("TrainingData.yml",FileStorage::WRITE); // Store the sample data in a file
Data << "data" << sample;
Data.release();
FileStorage Label("LabelData.yml",FileStorage::WRITE); // Store the label data in a file
Label << "label" << response;
Label.release();
cout<<"Training and Label data created successfully....!! "<<endl;
imshow("src",src);
waitKey(0);
}
/*
* get digit from the image given in param, using the classifier trained before
*/
string getDigits(Mat image)
{
Mat thr1,gray1,con1;
Mat src1 = image.clone();
cvtColor(src1,gray1,CV_BGR2GRAY);
threshold(gray1,thr1,125,255,THRESH_BINARY_INV); // Threshold to create input
thr1.copyTo(con1);
// Read stored sample and label for training
Mat sample1;
Mat response1,tmp1;
FileStorage Data1("TrainingData.yml",FileStorage::READ); // Read traing data to a Mat
Data1["data"] >> sample1;
Data1.release();
FileStorage Label1("LabelData.yml",FileStorage::READ); // Read label data to a Mat
Label1["label"] >> response1;
Label1.release();
Ptr<ml::KNearest> knn(ml::KNearest::create());
knn->train(sample1, ml::ROW_SAMPLE,response1); // Train with sample and responses
cout<<"Training compleated.....!!"<<endl;
vector< vector <Point> > contours1; // Vector for storing contour
vector< Vec4i > hierarchy1;
//Create input sample by contour finding and cropping
findContours( con1, contours1, hierarchy1,CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE );
Mat dst1(src1.rows,src1.cols,CV_8UC3,Scalar::all(0));
string result;
for( int i = 0; i< contours1.size(); i=hierarchy1[i][0] ) // iterate through each contour for first hierarchy level .
{
Rect r= boundingRect(contours1[i]);
Mat ROI = thr1(r);
Mat tmp1, tmp2;
resize(ROI,tmp1, Size(10,10), 0,0,INTER_LINEAR );
tmp1.convertTo(tmp2,CV_32FC1);
Mat bestLabels;
float p=knn -> findNearest(tmp2.reshape(1,1),4, bestLabels);
char name[4];
sprintf(name,"%d",(int)p);
cout << "num = " << (int)p;
result = result + to_string((int)p);
putText( dst1,name,Point(r.x,r.y+r.height) ,0,1, Scalar(0, 255, 0), 2, 8 );
}
imwrite("dest.jpg",dst1);
return result ;
}
/*
* from the digits detected, it returns a speed limit if it is detected correctly, -1 otherwise
*/
int getSpeedLimit(string numbers){
if ((numbers.find("30") != std::string::npos) || (numbers.find("03") != std::string::npos)) {
return 30;
}
if ((numbers.find("50") != std::string::npos) || (numbers.find("05") != std::string::npos)) {
return 50;
}
if ((numbers.find("80") != std::string::npos) || (numbers.find("08") != std::string::npos)) {
return 80;
}
if ((numbers.find("70") != std::string::npos) || (numbers.find("07") != std::string::npos)) {
return 70;
}
if ((numbers.find("90") != std::string::npos) || (numbers.find("09") != std::string::npos)) {
return 90;
}
if ((numbers.find("100") != std::string::npos) || (numbers.find("001") != std::string::npos)) {
return 100;
}
if ((numbers.find("130") != std::string::npos) || (numbers.find("031") != std::string::npos)) {
return 130;
}
return -1;
}
/*
* load all the image in the file with the path hard coded below
*/
vector<Mat> loadAllImage(){
vector<cv::String> fn;
glob("/Users/giuliopettenuzzo/Desktop/T1/dataset/*.jpg", fn, false);
vector<Mat> images;
size_t count = fn.size(); //number of png files in images folder
for (size_t i=0; i<count; i++)
images.push_back(imread(fn[i]));
return images;
}
maybe you should try implementing the ransac algorithm, if you are using color images, migt be a good idea (if you are in europe) to get the red channel only since the speed limits are surrounded by a red cricle (or a thin white i think also).
For that you need to filter the image to get the edges, (canny filter).
Here are some useful links:
OpenCV detect partial circle with noise
https://hal.archives-ouvertes.fr/hal-00982526/document
Finally for the numbers detection i think its ok. Other approach is to use something like Viola-Jones algorithm to detect the signals, with pretrained existing models... It's up to you!

How to detect all candies with opencv and count them

I have the following picture
And I try to count all candies, but some of them intersect. And my following code doesn't work. The Code below can only identify candies if they don't intersect. But I can't have any thoughts about what to do if they intersect
int main( int argc, char** argv )
{
Mat src = imread("C:\\data\\Assignment1A.jpg",0); // reads image from file
Mat dst = Mat::zeros(src.rows, src.cols, CV_8UC3);
cv::Scalar min(245, 0, 0);
cv::Scalar max(255, 255, 255);
cv::inRange( src, min, max, dst);
namedWindow( "Source", 1 );
imshow( "Source", dst );
vector<vector<Point> > contours;
vector<Vec4i> hierarchy;
findContours( dst, contours, hierarchy,
CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE );
cout <<contours.size();
size_t count = contours.size();
int i = 0;
for(size_t i = 0; i < contours.size(); i++) {
cv::Point2f c;
float r;
cv::minEnclosingCircle( contours[i], c, r);
if (r > 10) {
i++; }
}
cout << i << " candies";
waitKey(0);
return 0;
}
Can anyone help me how to solve this problem?
This is probably not a very good solution (it only works under assumption that candies of the same color never overlap). (There's also this article about using distance transform and watershed, but I couldn't make it fork for this, maybe the candies overlap too much?)
Remove the white background (especially because it has a lot of noise and JPEG artifacts).
Convert to HSV and take the H channel (as suggested in the comments). Hue is useful here, because it sort of represents a color with a single value (hue) instead of three (red, green, blue), ignoring, to a degree, all those surface irregularities. You probably can't distinguish some of the ellipses below from the background, but don't worry, your computer can.
For a narrow range of H (say, [1, 20]), find contours big enough (in terms of their area) to be a single candy, but small enough not to be more than one candy. Repeat for H in range [21, 40], then [41, 60], and so on, until you reach the top hue value of 179. The contours are not too good, but good enough for counting. You can probably improve them by doing dilate/erode at every step to get rid of the junk, or something, if you need to.
The Python code to show how it works:
import cv2
import numpy as np
#get only contours of more or less the single candy size
#noise is not included because its area is too small
#overlapping candies are not included because their area is too big
#probably there are smarter ways (area/circumference ratio, ellipse fitting, etc.)
def get_contours(img):
area_threshold_max = 120*120 #nice hard-coded empirical values
area_threshold_min = 50*50
_, contours, _ = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
areas = [cv2.contourArea(c) for c in contours]
contours = [c for n, c in enumerate(contours) if area_threshold_min < areas[n] < area_threshold_max]
return contours
#remove background
img_g = cv2.imread("candy.jpg", 0) #read as grayscale
_, thresh = cv2.threshold(img_g, 235, 1, cv2.THRESH_BINARY_INV) #mask to include everything that isn't too white
img = cv2.imread("candy.jpg") #read again as a color image
black = np.zeros(img.shape).astype(img.dtype) #black image to combine with
img = cv2.bitwise_or(img, black, mask = thresh)
#convert to HSV and take only the H value
img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)[:, :, 0]
#find contours of a more or less appropriate size within a narrow H range,
#remove them from the image, increment H, repeat
contours_all = []
h_step = 20
for h_start in range(0, 180, h_step):
rng = np.zeros(img.shape).astype(np.uint8)
rng = cv2.inRange(img, h_start + 1, h_start + h_step)
contours = get_contours(rng)
contours_all += contours
#draw our contours with their numbers on top of the original image
img_to_draw = cv2.imread("candy.jpg")
cv2.drawContours(img_to_draw, contours_all, -1, (0, 0, 0), 2)
for i, cnt in enumerate(contours_all):
(x, y), radius = cv2.minEnclosingCircle(cnt)
cv2.putText(img_to_draw, "%d" % (i + 1), (int(x-20), int(y+10)), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 3)
cv2.putText(img_to_draw, "%d candies" % len(contours_all), (5, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 3)
cv2.imwrite("result2.png", img_to_draw)

OpenCV Edge/Border detection based on color

I'm fairly new to OpenCV, and very excited to learn more. I've been toying with the idea of outlining edges, shapes.
I've come across this code (running on an iOS device), which uses Canny. I'd like to be able to render this in color, and circle each shape. Can someone point me in the right direction?
Thanks!
IplImage *grayImage = cvCreateImage(cvGetSize(iplImage), IPL_DEPTH_8U, 1);
cvCvtColor(iplImage, grayImage, CV_BGRA2GRAY);
cvReleaseImage(&iplImage);
IplImage* img_blur = cvCreateImage( cvGetSize( grayImage ), grayImage->depth, 1);
cvSmooth(grayImage, img_blur, CV_BLUR, 3, 0, 0, 0);
cvReleaseImage(&grayImage);
IplImage* img_canny = cvCreateImage( cvGetSize( img_blur ), img_blur->depth, 1);
cvCanny( img_blur, img_canny, 10, 100, 3 );
cvReleaseImage(&img_blur);
cvNot(img_canny, img_canny);
And example might be these burger patties. OpenCV would detect the patty, and outline it.
Original Image:
Color information is often handled by conversion to HSV color space which handles "color" directly instead of dividing color into R/G/B components which makes it easier to handle same colors with different brightness etc.
if you convert your image to HSV you'll get this:
cv::Mat hsv;
cv::cvtColor(input,hsv,CV_BGR2HSV);
std::vector<cv::Mat> channels;
cv::split(hsv, channels);
cv::Mat H = channels[0];
cv::Mat S = channels[1];
cv::Mat V = channels[2];
Hue channel:
Saturation channel:
Value channel:
typically, the hue channel is the first one to look at if you are interested in segmenting "color" (e.g. all red objects). One problem is, that hue is a circular/angular value which means that the highest values are very similar to the lowest values, which results in the bright artifacts at the border of the patties. To overcome this for a particular value, you can shift the whole hue space. If shifted by 50° you'll get something like this instead:
cv::Mat shiftedH = H.clone();
int shift = 25; // in openCV hue values go from 0 to 180 (so have to be doubled to get to 0 .. 360) because of byte range from 0 to 255
for(int j=0; j<shiftedH.rows; ++j)
for(int i=0; i<shiftedH.cols; ++i)
{
shiftedH.at<unsigned char>(j,i) = (shiftedH.at<unsigned char>(j,i) + shift)%180;
}
now you can use a simple canny edge detection to find edges in the hue channel:
cv::Mat cannyH;
cv::Canny(shiftedH, cannyH, 100, 50);
You can see that the regions are a little bigger than the real patties, that might be because of the tiny reflections on the ground around the patties, but I'm not sure about that. Maybe it's just because of jpeg compression artifacts ;)
If you instead use the saturation channel to extract edges, you'll end up with something like this:
cv::Mat cannyS;
cv::Canny(S, cannyS, 200, 100);
where the contours aren't completely closed. Maybe you can combine hue and saturation within preprocessing to extract edges in the hue channel but only where saturation is high enough.
At this stage you have edges. Regard that edges aren't contours yet. If you directly extract contours from edges they might not be closed/separated etc:
// extract contours of the canny image:
std::vector<std::vector<cv::Point> > contoursH;
std::vector<cv::Vec4i> hierarchyH;
cv::findContours(cannyH,contoursH, hierarchyH, CV_RETR_TREE , CV_CHAIN_APPROX_SIMPLE);
// draw the contours to a copy of the input image:
cv::Mat outputH = input.clone();
for( int i = 0; i< contoursH.size(); i++ )
{
cv::drawContours( outputH, contoursH, i, cv::Scalar(0,0,255), 2, 8, hierarchyH, 0);
}
you can remove those small contours by checking cv::contourArea(contoursH[i]) > someThreshold before drawing. But you see the two patties on the left to be connected? Here comes the hardest part... use some heuristics to "improve" your result.
cv::dilate(cannyH, cannyH, cv::Mat());
cv::dilate(cannyH, cannyH, cv::Mat());
cv::dilate(cannyH, cannyH, cv::Mat());
Dilation before contour extraction will "close" the gaps between different objects but increase the object size too.
if you extract contours from that it will look like this:
If you instead choose only the "inner" contours it is exactly what you like:
cv::Mat outputH = input.clone();
for( int i = 0; i< contoursH.size(); i++ )
{
if(cv::contourArea(contoursH[i]) < 20) continue; // ignore contours that are too small to be a patty
if(hierarchyH[i][3] < 0) continue; // ignore "outer" contours
cv::drawContours( outputH, contoursH, i, cv::Scalar(0,0,255), 2, 8, hierarchyH, 0);
}
mind that the dilation and inner contour stuff is a little fuzzy, so it might not work for different images and if the initial edges are placed better around the object border it might 1. not be necessary to do the dilate and inner contour thing and 2. if it is still necessary, the dilate will make the object smaller in this scenario (which luckily is great for the given sample image.).
EDIT: Some important information about HSV: The hue channel will give every pixel a color of the spectrum, even if the saturation is very low ( = gray/white) or if the color is very low (value) so often it is desired to threshold the saturation and value channels to find some specific color! This might be much easier and much more stavle to handle than the dilation I've used in my code.

Deconvolution with OpenCV?

Is there a way of doing deconvolution with OpenCV?
I'm just impressed by the improvement shown here
and would like to add this feature also to my software.
EDIT (Additional information for bounty.)
I still have not figured out how to implement the deconvolution.
This code helps me to sharpen the image, but I think the deconvolution could do it better.
void ImageProcessing::sharpen(QImage & img)
{
IplImage* cvimg = createGreyFromQImage( img );
if ( !cvimg ) return;
IplImage* gsimg = cvCloneImage(cvimg );
IplImage* dimg = cvCreateImage( cvGetSize(cvimg), IPL_DEPTH_8U, 1 );
IplImage* outgreen = cvCreateImage( cvGetSize(cvimg), IPL_DEPTH_8U, 3 );
IplImage* zeroChan = cvCreateImage( cvGetSize(cvimg), IPL_DEPTH_8U, 1 );
cvZero(zeroChan);
cv::Mat smat( gsimg, false );
cv::Mat dmat( dimg, false );
cv::GaussianBlur(smat, dmat, cv::Size(0, 0), 3);
cv::addWeighted(smat, 1.5, dmat, -0.5 ,0, dmat);
cvMerge( zeroChan, dimg, zeroChan, NULL, outgreen);
img = IplImage2QImage( outgreen );
cvReleaseImage( &gsimg );
cvReleaseImage( &cvimg );
cvReleaseImage( &dimg );
cvReleaseImage( &outgreen );
cvReleaseImage( &zeroChan );
}
Hoping for helpful hints!
Sure, you can write a deconvolution Code using OpenCV. But there are no ready to use Functions (yet).
To get started you can look at this Example that shows the implementation of Wiener Deconvolution in Python using OpenCV.
Here is another Example using C, but this is from 2012, so maybe it is outdated.
Nearest neighbor deconvolution is a technique which is used typically on a stack of images in the Z plane in optical microscopy. This review paper: Jean-Baptiste Sibarita. Deconvolution Microscopy. Adv Biochem Engin/Biotechnol (2005) 95: 201–243 covers quite a lot of the techniques used, including the one you are interested in. This is also a nice intro: http://blogs.fe.up.pt/BioinformaticsTools/microscopy/
This numpy+scipy python example shows how it works:
from pylab import *
import numpy
import scipy.ndimage
width = 100
height = 100
depth = 10
imgs = zeros((height, width, depth))
# prepare test input, a stack of images which is zero except for a point which has been blurred by a 3D gaussian
#sigma = 3
#imgs[height/2,width/2,depth/2] = 1
#imgs = scipy.ndimage.filters.gaussian_filter(imgs, sigma)
# read real input from stack of images img_0000.png, img_0001.png, ... (total number = depth)
# these must have the same dimensions equal to width x height above
# if imread reads them as having more than one channel, they need to be converted to one channel
for k in range(depth):
imgs[:,:,k] = scipy.ndimage.imread( "img_%04d.png" % (k) )
# prepare output array, top and bottom image in stack don't get filtered
out_imgs = zeros_like(imgs)
out_imgs[:,:,0] = imgs[:,:,0]
out_imgs[:,:,-1] = imgs[:,:,-1]
# apply nearest neighbor deconvolution
alpha = 0.4 # adjustabe parameter, strength of filter
sigma_estimate = 3 # estimate, just happens to be same as the actual
for k in range(1, depth-1):
# subtract blurred neighboring planes in the stack from current plane
# doesn't have to be gaussian, any other kind of blur may be used: this should approximate PSF
out_imgs[:,:,k] = (1+alpha) * imgs[:,:,k] \
- (alpha/2) * scipy.ndimage.filters.gaussian_filter(imgs[:,:,k-1], sigma_estimate) \
- (alpha/2) * scipy.ndimage.filters.gaussian_filter(imgs[:,:,k+1], sigma_estimate)
# show result, original on left, filtered on right
compare_img = copy(out_imgs[:,:,depth/2])
compare_img[:,:width/2] = imgs[:,:width/2,depth/2]
imshow(compare_img)
show()
The sample image you provided actually is a very good example of Lucy-Richardson deconvolution. There is not a built-in function in OpenCV libraries for this deconvolution method. In Matlab, you may use the deconvolution with "deconvlucy.m" function. Actually, you can see the source code for some of the functions in Matlab by typing "open " or "edit ".
Below, I tried to simplify the Matlab code in OpenCV.
// Lucy-Richardson Deconvolution Function
// input-1 img: NxM matrix image
// input-2 num_iterations: number of iterations
// input-3 sigma: sigma of point spread function (PSF)
// output result: deconvolution result
// Window size of PSF
int winSize = 10 * sigmaG + 1 ;
// Initializations
Mat Y = img.clone();
Mat J1 = img.clone();
Mat J2 = img.clone();
Mat wI = img.clone();
Mat imR = img.clone();
Mat reBlurred = img.clone();
Mat T1, T2, tmpMat1, tmpMat2;
T1 = Mat(img.rows,img.cols, CV_64F, 0.0);
T2 = Mat(img.rows,img.cols, CV_64F, 0.0);
// Lucy-Rich. Deconvolution CORE
double lambda = 0;
for(int j = 0; j < num_iterations; j++)
{
if (j>1) {
// calculation of lambda
multiply(T1, T2, tmpMat1);
multiply(T2, T2, tmpMat2);
lambda=sum(tmpMat1)[0] / (sum( tmpMat2)[0]+EPSILON);
// calculation of lambda
}
Y = J1 + lambda * (J1-J2);
Y.setTo(0, Y < 0);
// 1)
GaussianBlur( Y, reBlurred, Size(winSize,winSize), sigmaG, sigmaG );//applying Gaussian filter
reBlurred.setTo(EPSILON , reBlurred <= 0);
// 2)
divide(wI, reBlurred, imR);
imR = imR + EPSILON;
// 3)
GaussianBlur( imR, imR, Size(winSize,winSize), sigmaG, sigmaG );//applying Gaussian filter
// 4)
J2 = J1.clone();
multiply(Y, imR, J1);
T2 = T1.clone();
T1 = J1 - Y;
}
// output
result = J1.clone();
Here are some examples and results.
Example results with Lucy-Richardson deconvolution
Visit my blog Here where you may access the whole code.
I'm not sure you understand what deconvolution is. The idea behind deconvolution is to remove the detector response from the image. This is commonly done in astronomy.
For instance, if you have a CCD mounted to a telescope, then any image you take is a convolution of what you are looking at in the sky and the response of the optical system. The telescope (or camera lens or whatever) will have some point spread function (PSF). That is, if you look at a point source that is very far away, like a star, when you take an image of it, the star will be blurred over several pixels. This blurring -- the point spread -- is what you would like to remove. If you know the point spread function of your optical system very well, then you can deconvolve the PSF from your image and obtain a sharper image.
Unless you happen to know the PSF of your optics (nontrivial to measure!), you should seek out some other option for sharpening your image. I doubt OpenCV has anything like a Richardson-Lucy algorithm built-in.

OpenCV C++/Obj-C: Detecting a sheet of paper / Square Detection

I successfully implemented the OpenCV square-detection example in my test application, but now need to filter the output, because it's quite messy - or is my code wrong?
I'm interested in the four corner points of the paper for skew reduction (like that) and further processing …
Input & Output:
Original image:
click
Code:
double angle( cv::Point pt1, cv::Point pt2, cv::Point pt0 ) {
double dx1 = pt1.x - pt0.x;
double dy1 = pt1.y - pt0.y;
double dx2 = pt2.x - pt0.x;
double dy2 = pt2.y - pt0.y;
return (dx1*dx2 + dy1*dy2)/sqrt((dx1*dx1 + dy1*dy1)*(dx2*dx2 + dy2*dy2) + 1e-10);
}
- (std::vector<std::vector<cv::Point> >)findSquaresInImage:(cv::Mat)_image
{
std::vector<std::vector<cv::Point> > squares;
cv::Mat pyr, timg, gray0(_image.size(), CV_8U), gray;
int thresh = 50, N = 11;
cv::pyrDown(_image, pyr, cv::Size(_image.cols/2, _image.rows/2));
cv::pyrUp(pyr, timg, _image.size());
std::vector<std::vector<cv::Point> > contours;
for( int c = 0; c < 3; c++ ) {
int ch[] = {c, 0};
mixChannels(&timg, 1, &gray0, 1, ch, 1);
for( int l = 0; l < N; l++ ) {
if( l == 0 ) {
cv::Canny(gray0, gray, 0, thresh, 5);
cv::dilate(gray, gray, cv::Mat(), cv::Point(-1,-1));
}
else {
gray = gray0 >= (l+1)*255/N;
}
cv::findContours(gray, contours, CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE);
std::vector<cv::Point> approx;
for( size_t i = 0; i < contours.size(); i++ )
{
cv::approxPolyDP(cv::Mat(contours[i]), approx, arcLength(cv::Mat(contours[i]), true)*0.02, true);
if( approx.size() == 4 && fabs(contourArea(cv::Mat(approx))) > 1000 && cv::isContourConvex(cv::Mat(approx))) {
double maxCosine = 0;
for( int j = 2; j < 5; j++ )
{
double cosine = fabs(angle(approx[j%4], approx[j-2], approx[j-1]));
maxCosine = MAX(maxCosine, cosine);
}
if( maxCosine < 0.3 ) {
squares.push_back(approx);
}
}
}
}
}
return squares;
}
EDIT 17/08/2012:
To draw the detected squares on the image use this code:
cv::Mat debugSquares( std::vector<std::vector<cv::Point> > squares, cv::Mat image )
{
for ( int i = 0; i< squares.size(); i++ ) {
// draw contour
cv::drawContours(image, squares, i, cv::Scalar(255,0,0), 1, 8, std::vector<cv::Vec4i>(), 0, cv::Point());
// draw bounding rect
cv::Rect rect = boundingRect(cv::Mat(squares[i]));
cv::rectangle(image, rect.tl(), rect.br(), cv::Scalar(0,255,0), 2, 8, 0);
// draw rotated rect
cv::RotatedRect minRect = minAreaRect(cv::Mat(squares[i]));
cv::Point2f rect_points[4];
minRect.points( rect_points );
for ( int j = 0; j < 4; j++ ) {
cv::line( image, rect_points[j], rect_points[(j+1)%4], cv::Scalar(0,0,255), 1, 8 ); // blue
}
}
return image;
}
This is a recurring subject in Stackoverflow and since I was unable to find a relevant implementation I decided to accept the challenge.
I made some modifications to the squares demo present in OpenCV and the resulting C++ code below is able to detect a sheet of paper in the image:
void find_squares(Mat& image, vector<vector<Point> >& squares)
{
// blur will enhance edge detection
Mat blurred(image);
medianBlur(image, blurred, 9);
Mat gray0(blurred.size(), CV_8U), gray;
vector<vector<Point> > contours;
// find squares in every color plane of the image
for (int c = 0; c < 3; c++)
{
int ch[] = {c, 0};
mixChannels(&blurred, 1, &gray0, 1, ch, 1);
// try several threshold levels
const int threshold_level = 2;
for (int l = 0; l < threshold_level; l++)
{
// Use Canny instead of zero threshold level!
// Canny helps to catch squares with gradient shading
if (l == 0)
{
Canny(gray0, gray, 10, 20, 3); //
// Dilate helps to remove potential holes between edge segments
dilate(gray, gray, Mat(), Point(-1,-1));
}
else
{
gray = gray0 >= (l+1) * 255 / threshold_level;
}
// Find contours and store them in a list
findContours(gray, contours, CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE);
// Test contours
vector<Point> approx;
for (size_t i = 0; i < contours.size(); i++)
{
// approximate contour with accuracy proportional
// to the contour perimeter
approxPolyDP(Mat(contours[i]), approx, arcLength(Mat(contours[i]), true)*0.02, true);
// Note: absolute value of an area is used because
// area may be positive or negative - in accordance with the
// contour orientation
if (approx.size() == 4 &&
fabs(contourArea(Mat(approx))) > 1000 &&
isContourConvex(Mat(approx)))
{
double maxCosine = 0;
for (int j = 2; j < 5; j++)
{
double cosine = fabs(angle(approx[j%4], approx[j-2], approx[j-1]));
maxCosine = MAX(maxCosine, cosine);
}
if (maxCosine < 0.3)
squares.push_back(approx);
}
}
}
}
}
After this procedure is executed, the sheet of paper will be the largest square in vector<vector<Point> >:
I'm letting you write the function to find the largest square. ;)
Unless there is some other requirement not specified, I would simply convert your color image to grayscale and work with that only (no need to work on the 3 channels, the contrast present is too high already). Also, unless there is some specific problem regarding resizing, I would work with a downscaled version of your images, since they are relatively large and the size adds nothing to the problem being solved. Then, finally, your problem is solved with a median filter, some basic morphological tools, and statistics (mostly for the Otsu thresholding, which is already done for you).
Here is what I obtain with your sample image and some other image with a sheet of paper I found around:
The median filter is used to remove minor details from the, now grayscale, image. It will possibly remove thin lines inside the whitish paper, which is good because then you will end with tiny connected components which are easy to discard. After the median, apply a morphological gradient (simply dilation - erosion) and binarize the result by Otsu. The morphological gradient is a good method to keep strong edges, it should be used more. Then, since this gradient will increase the contour width, apply a morphological thinning. Now you can discard small components.
At this point, here is what we have with the right image above (before drawing the blue polygon), the left one is not shown because the only remaining component is the one describing the paper:
Given the examples, now the only issue left is distinguishing between components that look like rectangles and others that do not. This is a matter of determining a ratio between the area of the convex hull containing the shape and the area of its bounding box; the ratio 0.7 works fine for these examples. It might be the case that you also need to discard components that are inside the paper, but not in these examples by using this method (nevertheless, doing this step should be very easy especially because it can be done through OpenCV directly).
For reference, here is a sample code in Mathematica:
f = Import["http://thwartedglamour.files.wordpress.com/2010/06/my-coffee-table-1-sa.jpg"]
f = ImageResize[f, ImageDimensions[f][[1]]/4]
g = MedianFilter[ColorConvert[f, "Grayscale"], 2]
h = DeleteSmallComponents[Thinning[
Binarize[ImageSubtract[Dilation[g, 1], Erosion[g, 1]]]]]
convexvert = ComponentMeasurements[SelectComponents[
h, {"ConvexArea", "BoundingBoxArea"}, #1 / #2 > 0.7 &],
"ConvexVertices"][[All, 2]]
(* To visualize the blue polygons above: *)
Show[f, Graphics[{EdgeForm[{Blue, Thick}], RGBColor[0, 0, 1, 0.5],
Polygon ## convexvert}]]
If there are more varied situations where the paper's rectangle is not so well defined, or the approach confuses it with other shapes -- these situations could happen due to various reasons, but a common cause is bad image acquisition -- then try combining the pre-processing steps with the work described in the paper "Rectangle Detection based on a Windowed Hough Transform".
Well, I'm late.
In your image, the paper is white, while the background is colored. So, it's better to detect the paper is Saturation(饱和度) channel in HSV color space. Take refer to wiki HSL_and_HSV first. Then I'll copy most idea from my answer in this Detect Colored Segment in an image.
Main steps:
Read into BGR
Convert the image from bgr to hsv space
Threshold the S channel
Then find the max external contour(or do Canny, or HoughLines as you like, I choose findContours), approx to get the corners.
This is my result:
The Python code(Python 3.5 + OpenCV 3.3):
#!/usr/bin/python3
# 2017.12.20 10:47:28 CST
# 2017.12.20 11:29:30 CST
import cv2
import numpy as np
##(1) read into bgr-space
img = cv2.imread("test2.jpg")
##(2) convert to hsv-space, then split the channels
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h,s,v = cv2.split(hsv)
##(3) threshold the S channel using adaptive method(`THRESH_OTSU`) or fixed thresh
th, threshed = cv2.threshold(s, 50, 255, cv2.THRESH_BINARY_INV)
##(4) find all the external contours on the threshed S
#_, cnts, _ = cv2.findContours(threshed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cv2.findContours(threshed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2]
canvas = img.copy()
#cv2.drawContours(canvas, cnts, -1, (0,255,0), 1)
## sort and choose the largest contour
cnts = sorted(cnts, key = cv2.contourArea)
cnt = cnts[-1]
## approx the contour, so the get the corner points
arclen = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, 0.02* arclen, True)
cv2.drawContours(canvas, [cnt], -1, (255,0,0), 1, cv2.LINE_AA)
cv2.drawContours(canvas, [approx], -1, (0, 0, 255), 1, cv2.LINE_AA)
## Ok, you can see the result as tag(6)
cv2.imwrite("detected.png", canvas)
Related answers:
How to detect colored patches in an image using OpenCV?
Edge detection on colored background using OpenCV
OpenCV C++/Obj-C: Detecting a sheet of paper / Square Detection
How to use `cv2.findContours` in different OpenCV versions?
What you need is a quadrangle instead of a rotated rectangle.
RotatedRect will give you incorrect results. Also you will need a perspective projection.
Basicly what must been done is:
Loop through all polygon segments and connect those which are almost equel.
Sort them so you have the 4 most largest line segments.
Intersect those lines and you have the 4 most likely corner points.
Transform the matrix over the perspective gathered from the corner points and the aspect ratio of the known object.
I implemented a class Quadrangle which takes care of contour to quadrangle conversion and will also transform it over the right perspective.
See a working implementation here:
Java OpenCV deskewing a contour
Once you have detected the bounding box of the document, you can perform a four-point perspective transform to obtain a top-down birds eye view of the image. This will fix the skew and isolate only the desired object.
Input image:
Detected text object
Top-down view of text document
Code
from imutils.perspective import four_point_transform
import cv2
import numpy
# Load image, grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread("1.png")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (7,7), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
# Find contours and sort for largest contour
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
displayCnt = None
for c in cnts:
# Perform contour approximation
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
if len(approx) == 4:
displayCnt = approx
break
# Obtain birds' eye view of image
warped = four_point_transform(image, displayCnt.reshape(4, 2))
cv2.imshow("thresh", thresh)
cv2.imshow("warped", warped)
cv2.imshow("image", image)
cv2.waitKey()
Detecting sheet of paper is kinda old school. If you want to tackle skew detection then it is better if you straightaway aim for text line detection. With this you will get the extremas left, right, top and bottom. Discard any graphics in the image if you dont want and then do some statistics on the text line segments to find the most occurring angle range or rather angle. This is how you will narrow down to a good skew angle. Now after this you put these parameters the skew angle and the extremas to deskew and chop the image to what is required.
As for the current image requirement, it is better if you try CV_RETR_EXTERNAL instead of CV_RETR_LIST.
Another method of detecting edges is to train a random forests classifier on the paper edges and then use the classifier to get the edge Map. This is by far a robust method but requires training and time.
Random forests will work with low contrast difference scenarios for example white paper on roughly white background.