I need to get contour from hand image, usually I process image with 4 steps:
get raw RGB gray image from 3 channels to 1 channel:
cvtColor(sourceGrayImage, sourceGrayImage, COLOR_BGR2GRAY);
use Gaussian blur to filter gray image:
GaussianBlur(sourceGrayImage, sourceGrayImage, Size(3,3), 0);
binary gray image, I split image by height, normally I split image to 6 images by its height, then each one I do threshold process:
// we split source picture to binaryImageSectionCount(here it's 8) pieces by its height,
// then we for every piece, we do threshold,
// and at last we combine them agin to binaryImage
const binaryImageSectionCount = 8;
void GetBinaryImage(Mat &grayImage, Mat &binaryImage)
{
// get every partial gray image's height
int partImageHeight = grayImage.rows / binaryImageSectionCount;
for (int i = 0; i < binaryImageSectionCount; i++)
{
Mat partialGrayImage;
Mat partialBinaryImage;
Rect partialRect;
if (i != binaryImageSectionCount - 1)
{
// if it's not last piece, Rect's height should be partImageHeight
partialRect = Rect(0, i * partImageHeight, grayImage.cols, partImageHeight);
}
else
{
// if it's last piece, Rect's height should be (grayImage.rows - i * partImageHeight)
partialRect = Rect(0, i * partImageHeight, grayImage.cols, grayImage.rows - i * partImageHeight);
}
Mat partialResource = grayImage(partialRect);
partialResource.copyTo(partialGrayImage);
threshold( partialGrayImage, partialBinaryImage, 0, 255, THRESH_OTSU);
// combin partial binary image to one piece
partialBinaryImage.copyTo(binaryImage(partialRect));
///*stringstream resultStrm;
//resultStrm << "partial_" << (i + 1);
//string string = resultStrm.str();
//imshow(string, partialBinaryImage);
//waitKey(0);*/
}
imshow("result binary image.", binaryImage);
waitKey(0);
return;
}
use findcontour to get biggest area contour:
vector<vector<Point> > contours;
findContours(binaryImage, contours, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE);
normally it works well,
But for some low quality gray image, it doesn't work,like below:
the complete code is here:
#include <opencv2/imgproc/imgproc.hpp>
#include<opencv2/opencv.hpp>
#include <opencv2/highgui/highgui.hpp>
using namespace std;
using namespace cv;
// we split source picture to binaryImageSectionCount(here it's 8) pieces by its height,
// then we for every piece, we do threshold,
// and at last we combine them agin to binaryImage
const binaryImageSectionCount = 8;
void GetBinaryImage(Mat &grayImage, Mat &binaryImage)
{
// get every partial gray image's height
int partImageHeight = grayImage.rows / binaryImageSectionCount;
for (int i = 0; i < binaryImageSectionCount; i++)
{
Mat partialGrayImage;
Mat partialBinaryImage;
Rect partialRect;
if (i != binaryImageSectionCount - 1)
{
// if it's not last piece, Rect's height should be partImageHeight
partialRect = Rect(0, i * partImageHeight, grayImage.cols, partImageHeight);
}
else
{
// if it's last piece, Rect's height should be (grayImage.rows - i * partImageHeight)
partialRect = Rect(0, i * partImageHeight, grayImage.cols, grayImage.rows - i * partImageHeight);
}
Mat partialResource = grayImage(partialRect);
partialResource.copyTo(partialGrayImage);
threshold( partialGrayImage, partialBinaryImage, 0, 255, THRESH_OTSU);
// combin partial binary image to one piece
partialBinaryImage.copyTo(binaryImage(partialRect));
///*stringstream resultStrm;
//resultStrm << "partial_" << (i + 1);
//string string = resultStrm.str();
//imshow(string, partialBinaryImage);
//waitKey(0);*/
}
imshow("result binary image.", binaryImage);
waitKey(0);
return;
}
int main(int argc, _TCHAR* argv[])
{
// get image path
string imgPath("C:\\Users\\Alfred\\Desktop\\gray.bmp");
// read image
Mat src = imread(imgPath);
imshow("Source", src);
//medianBlur(src, src, 7);
cvtColor(src, src, COLOR_BGR2GRAY);
imshow("gray", src);
// do filter
GaussianBlur(src, src, Size(3,3), 0);
// binary image
Mat threshold_output(src.rows, src.cols, CV_8UC1, Scalar(0, 0, 0));
GetBinaryImage(src, threshold_output);
imshow("binaryImage", threshold_output);
// get biggest contour
vector<vector<Point> > contours;
findContours(threshold_output,contours, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE);
int biggestContourIndex = 0;
int maxContourArea = -1000;
for (int i = 0; i < contours.size(); i++)
{
if (contourArea(contours[i]) > maxContourArea)
{
maxContourArea = contourArea(contours[i]);
biggestContourIndex = i;
}
}
// show biggest contour
Mat biggestContour(threshold_output.rows, threshold_output.cols, CV_8UC1, Scalar(0, 0, 0));
drawContours(biggestContour, contours, biggestContourIndex, cv::Scalar(255,255,255), 2, 8, vector<Vec4i>(), 0, Point());
imshow("maxContour", biggestContour);
waitKey(0);
}
could anybody please help me to get a better hand contour result?
thanks!!!
I have the code snippet in python, you can follow the same approach in C:
img = cv2.imread(x, 1)
cv2.imshow("img",img)
imgray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
cv2.imshow("gray",imgray)
#Code for histogram equalization
equ = cv2.equalizeHist(imgray)
cv2.imshow('equ', equ)
#Code for contrast limited adaptive histogram equalization
#clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
#cl2 = clahe.apply(imgray)
#cv2.imshow('clahe2', cl2)
This is the result I obtained:
If you're image is horribly bad you could try the code that I commented involving contrast limited adaptive histogram equalization.
Related
How to remove all vertical and horizontal lines that form boxes/tables
I have searched and tried.. But can't make it work
Have tried to search for it the last couple of days.. have found a few examples which doesn't work.. Have tried to get the pieces together..
cv:Mat img = cv::imread(input, CV_LOAD_IMAGE_GRAYSCALE);
cv::Mat grad;
cv::Mat morphKernel = cv::getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(3, 3));
cv::morphologyEx(img, grad, cv::MORPH_GRADIENT, morphKernel);
cv::Mat res;
cv::threshold(grad, res, 0, 255, cv::THRESH_BINARY | cv::THRESH_OTSU);
// find contours
cv::Mat mask = cv::Mat::zeros(res.size(), CV_8UC1);
std::vector<std::vector<cv::Point>> contours;
std::vector<cv::Vec4i> hierarchy;
cv::findContours(res, contours, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE);
for(int i = 0; i < contours.size(); i++){
cv::Mat approx;
double peri = cv::arcLength(contours[i], true);
cv::approxPolyDP(contours[i], approx, 0.04 * peri, true);
int num_vertices = approx.rows;
if(num_vertices == 4){
cv::Rect rect = cv::boundingRect(contours[i]);
// this is a rectangle
}
}
You could try something like that :
threshold your image
compute connected components
remove particules for which at least 3 of 4 bounding box tops are in touch with particule
This should give you something like that :
Here is the associated source code :
#include <iostream>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <limits>
using namespace cv;
struct BBox {
BBox() :
_xMin(std::numeric_limits<int>::max()),
_xMax(std::numeric_limits<int>::min()),
_yMin(std::numeric_limits<int>::max()),
_yMax(std::numeric_limits<int>::min())
{}
int _xMin;
int _xMax;
int _yMin;
int _yMax;
};
int main()
{
// read input image
Mat inputImg = imread("test3_1.tif", IMREAD_GRAYSCALE);
// create binary image
Mat binImg;
threshold(inputImg, binImg, 254, 1, THRESH_BINARY_INV);
// compute connected components
Mat labelImg;
const int nbComponents = connectedComponents(binImg, labelImg, 8, CV_32S);
// compute associated bboxes
std::vector<BBox> bboxColl(nbComponents);
for (int y = 0; y < labelImg.rows; ++y) {
for (int x = 0; x < labelImg.cols; ++x) {
const int curLabel = labelImg.at<int>(y, x);
BBox& curBBox = bboxColl[curLabel];
if (curBBox._xMin > x)
curBBox._xMin = x;
if (curBBox._xMax < x)
curBBox._xMax = x;
if (curBBox._yMin > y)
curBBox._yMin = y;
if (curBBox._yMax < y)
curBBox._yMax = y;
}
}
// parse all labels
std::vector<bool> lutTable(nbComponents);
for (int i=0; i<nbComponents; ++i) {
// check current label width
const BBox& curBBox = bboxColl[i];
if (curBBox._xMax - curBBox._xMin > labelImg.cols * 0.3)
lutTable[i] = false;
else
lutTable[i] = true;
}
// create output image
Mat resImg(binImg);
MatConstIterator_<int> iterLab = labelImg.begin<int>();
MatIterator_<unsigned char> iterRes = resImg.begin<unsigned char>();
while (iterLab != labelImg.end<int>()) {
if (lutTable[*iterLab] == true)
*iterRes = 1;
else
*iterRes = 0;
++iterLab;
++iterRes;
}
// write result
imwrite("resImg3_1.tif", resImg);
}
I simply remove all labels for which with is greater than 30% of image total width. Your image is quite noisy so I can't use bounding box tops touches as said before, sorry...
Don't know if this will match with all your images but you could add some geometrical filters to improve this first version.
Regards,
You can use LineSegmentDetector for this purpose:
import numpy as np
import cv2
image = cv2.imread("image.png")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# This is the detector, you might have to play with the parameters
lsd = cv2.createLineSegmentDetector(0, _scale=0.6)
lines, widths, _, _ = lsd.detect(gray)
if lines is not None:
for i in range(0, len(lines)):
l = lines[i][0]
# Much slower version of Euclidean distance
if np.sqrt((l[0]-l[2])**2 + (l[1]-l[3])**2) > 50:
# You might have to tweak the threshold as well for other images
cv2.line(image, (l[0], l[1]), (l[2], l[3]), (255, 255, 255), 3,
cv2.LINE_AA)
cv2.imwrite("result.png", image)
Output:
As you can see, the lines aren't completely removed in the top image so I am leaving the tweaking part to you. Hope it helps!
I'd like to use this answer box to make a few comments.
First off, its way easier to see progress, if you can easily see what the output looks like visually. With that in mind, here is an update to your code with an emphasis on viewing interim results. I'm using VS Studio Community 2017, and OpenCV version 4.0.1 (64bit) in Win10 for anyone who wants to repeat this exercise. There were a few routines used that required updates for OpenCV 4...
#include "pch.h"
#include <iostream>
#include <opencv2/opencv.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
int main()
{
cv::Mat img = cv::imread("0zx9Q.png", cv::IMREAD_GRAYSCALE ); // --> Contour size = 0x000000e7 hex (231 each)
// cv::Mat img = cv::imread("0zx9Q.png", cv::IMREAD_REDUCED_GRAYSCALE_2); // --> Contour size = 0x00000068 hex (104 each)
// cv::Mat img = cv::imread("0zx9Q.png", cv::IMREAD_REDUCED_GRAYSCALE_4); // --> Contour size = 0x0000001f hex (31 each)
// cv::Mat img = cv::imread("0zx9Q.png", cv::IMREAD_REDUCED_GRAYSCALE_8); // --> Contour size = 0x00000034 hex (52 each)
if (!img.data) // Check for invalid input
{
std::cout << "Could not open or find the image" << std::endl;
return -1;
}
// cv::namedWindow("Display Window - GrayScale Image", cv::WINDOW_NORMAL); // Create a window for display.
// cv::imshow("Display Window - GrayScale Image", img); // Show our image inside it.
// cv::waitKey(0); // Wait for a keystroke in the window
cv::Mat imgOriginal = cv::imread("0zx9Q.png", cv::IMREAD_UNCHANGED);
cv::namedWindow("Display Window of Original Document", cv::WINDOW_NORMAL); // Create a window for display.
cv::Mat grad;
cv::Mat morphKernel = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(25, 25));
// MORPH_ELLIPSE, contourSize: 0x00000005 when 60,60... but way slow...
// MORPH_ELLIPSE, contourSize: 0x00000007 when 30,30...
// MORPH_ELLIPSE, contourSize: 0x00000007 when 20,20...
// MORPH_ELLIPSE, contourSize: 0x0000000a when 15,15...
// MORPH_ELLIPSE, contourSize: 0x0000007a when 5,5...
// MORPH_ELLIPSE, contourSize: 0x000000e7 when 3,3 and IMREAD_GRAYSCALE
// MORPH_CROSS, contourSize: 0x0000008e when 5,5
// MORPH_CROSS, contourSize: 0x00000008 when 25,25
// MORPH_RECT, contourSize: 0x00000007 when 25,25
cv::morphologyEx(img, grad, cv::MORPH_GRADIENT, morphKernel);
cv::Mat res;
cv::threshold(grad, res, 0, 255, cv::THRESH_BINARY | cv::THRESH_OTSU);
// find contours
cv::Mat mask = cv::Mat::zeros(res.size(), CV_8UC1);
std::vector<std::vector<cv::Point>> contours;
std::vector<cv::Vec4i> hierarchy;
cv::findContours(res, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE);
int contourSize = contours.size();
std::cout << " There are a total of " << contourSize << " contours. \n";
for (int i = 0; i < contourSize; i++) {
cv::Mat approx;
double peri = cv::arcLength(contours[i], true);
cv::approxPolyDP(contours[i], approx, 0.04 * peri, true);
int num_vertices = approx.rows;
std::cout << " Contour # " << i << " has " << num_vertices << " vertices.\n";
if (num_vertices == 4) {
cv::Rect rect = cv::boundingRect(contours[i]);
cv::rectangle(imgOriginal, rect, cv::Scalar(255, 0, 0), 4);
}
}
cv::imshow("Display Window of Original Document", imgOriginal); // Show our image inside it.
cv::waitKey(0); // Wait for a keystroke in the window
}
With that said, the parameters for getStructuringElement() matter huge. I spent a lot of time trying different choices, with very mixed results. And it turns out there are a whole lot of findContours() responses that don't have four vertices. I suspect the whole findContours() approach is probably flawed. I often would get false rectangles identified around text characters in words and phrases. Additionally the lighter lines surrounding some boxed areas would be ignored.
Instead, I think I'd be looking hard at straight line detection, via techniques discussed here, if such a response exists for a C++ and not python. Perhaps here, or here? I hoping line detection techniques would ultimately get better results. And hey, if the documents / images selected always include a white background, it would be pretty easy to solid rectangle them OUT of the image, via LineTypes: cv::FILLED
Info here provided, not as an answer to the posted question but as a methodology to visually determine success in the future.
I have a project that I need to make for classes and I chose task that is a bit out of my skills.
Target is to count result of dice rolls.
For now, I'am trying to make it work on a sample pic:
sample pic of dices
and my current code is added below:
#include "opencv2/core/core.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "iostream"
using namespace cv;
using namespace std;
Mat KostkaFunkcja(Mat image, Mat in, Scalar low, Scalar high);
int getMaxAreaContourId(vector <vector<cv::Point>> contours);
vector<Point> contoursConvexHull(vector<vector<Point> > contours, int index);
Mat ZnakiFunkcja(Mat image, Mat in, Scalar low, Scalar high);
int main(int argc, char** argv)
{
Mat image;
image = imread("kostki.jpg", CV_LOAD_IMAGE_COLOR);
if (!image.data)
{
cout << "Could not open or find the image" << std::endl;
return -1;
}
Mat imgHSV;
Mat workimage = image;
cvtColor(workimage, imgHSV, COLOR_BGR2HSV); //Convert the captured frame from BGR to HSV
//red dice
workimage = KostkaFunkcja(workimage, imgHSV, Scalar(146, 0, 31), Scalar(179, 255, 255));
//green dice
workimage = KostkaFunkcja(workimage, imgHSV, Scalar(25, 147, 0), Scalar(98, 255, 154));
//yellow dice
workimage = KostkaFunkcja(workimage, imgHSV, Scalar(22, 45, 161), Scalar(91, 255, 255));
//black dice
workimage = KostkaFunkcja(workimage, imgHSV, Scalar(98, 0, 0), Scalar(179, 232, 107));
//white symbols
workimage = ZnakiFunkcja(workimage, imgHSV, Scalar(58, 0, 183), Scalar(179, 145, 255));
namedWindow("Kostki_kontur", CV_WINDOW_AUTOSIZE);
imshow("Kostki_kontur", workimage);
waitKey(0);
return 0;
}
Mat KostkaFunkcja(Mat image, Mat in, Scalar low, Scalar high)
{
Mat temp;
inRange(in, low, high, temp);
erode(temp, temp, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)));
dilate(temp, temp, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)));
dilate(temp, temp, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)));
erode(temp, temp, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)));
Mat srcBlur, srcCanny;
blur(temp, srcBlur, Size(3, 3));
Canny(srcBlur, srcCanny, 0, 100, 3, true);
vector<vector<Point> > contours;
findContours(srcCanny, contours, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE);
int largest_contour_index = getMaxAreaContourId(contours);
Mat drawing = Mat::zeros(srcCanny.size(), CV_8UC3);
for (int i = 0; i< contours.size(); i++)
{
Scalar color = Scalar(255, 255, 255);
drawContours(drawing, contours, i, color, 2);
}
vector<Point> ConvexHullPoints = contoursConvexHull(contours, largest_contour_index);
polylines(image, ConvexHullPoints, true, Scalar(0, 0, 255), 2);
return image;
}
vector<Point> contoursConvexHull(vector<vector<Point> > contours, int index)
{
vector<Point> result;
vector<Point> pts;
for (size_t j = 0; j< contours[index].size(); j++)
pts.push_back(contours[index][j]);
convexHull(pts, result);
return result;
}
int getMaxAreaContourId(vector <vector<cv::Point>> contours)
{
double maxArea = 0;
int maxAreaContourId = -1;
for (int j = 0; j < contours.size(); j++) {
double newArea = cv::contourArea(contours.at(j));
if (newArea > maxArea) {
maxArea = newArea;
maxAreaContourId = j;
}
return maxAreaContourId;
}
}
Mat ZnakiFunkcja(Mat image, Mat in, Scalar low, Scalar high)
{
Mat temp;
inRange(in, low, high, temp);
erode(temp, temp, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)));
dilate(temp, temp, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)));
dilate(temp, temp, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)));
erode(temp, temp, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)));
Mat srcBlur, srcCanny;
blur(temp, srcBlur, Size(3, 3));
Canny(srcBlur, srcCanny, 0, 100, 3, true);
vector<vector<Point> > contours;
findContours(srcCanny, contours, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE);
Mat drawing = Mat::zeros(srcCanny.size(), CV_8UC3);
for (int i = 0; i< contours.size(); i++)
{
Scalar color = Scalar(255, 255, 255);
drawContours(drawing, contours, i, color, 2);
polylines(image, contours, true, Scalar(0, 0, 255), 2);
return image;
}
}
Yet I have no idea how to count different shapes (hearts, lightnings, shields, numbers).
I will be greatfull if anybody would give me a tip or solution of how to do the job.
1) sorry for bad english
2) we had no openCV in classes [only basic c++]
3) tryed to found anything usefull on internet, but even if I found anything, I could't understand what was happening in the code
Your project can be splited in three steps:
find the dices.
extract the shapes from the visible face of
the dices.
count the faces.
For the first step among all the possible approaches I think saliency map approaches can help.
Saliency map are a family of segmentation algorithm that aim to detect the parts in the image which are more likely to attract visual attention.
OpenCV have a saliency API that already implement several saliency algorithm and for each of them you can get an segmentation map.
It is highlikely considering the example image you gave the saliency will be focus on the dices.
From this you can so extract the dices as rois from the original image.
For the step 2) saliency algorithms may also fit... or not that depend a lot of the statistical criterions that are used by the algorithm.
However the previously extracted rois should only contain the face of the dice that does contain the shapes you want to count in step 3) so approaches based on contours detection may give quite good result.
Once you get the shapes among the way to count each shape you can use templateMatching (that is also already implement in OpenCV),a clustering approach based on the a shape sensitive metric (Hausdorff, Dice, ...), or many other.
Here is a code that can help you to deal with the two first step.
#ifndef _DEBUG
#define _DEBUG
#endif
#include <iostream>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/saliency.hpp>
#include <opencv2/highgui.hpp>
#include <list>
CV_EXPORTS_W void get_regions_of_interest(cv::InputArray _src, cv::OutputArrayOfArrays mv, cv::OutputArrayOfArrays mv2 = cv::noArray());
int main()
{
cv::Mat tmp = cv::imread("C:\Desktop\dices.jpg");
if(!tmp.empty())
{
cv::imshow("source",tmp);
cv::waitKey(-1);
}
std::vector<cv::Mat> rois;
get_regions_of_interest(tmp,rois);
std::cout << "Hello World!" << std::endl;
return 0;
}
void get_regions_of_interest(cv::InputArray _src, cv::OutputArrayOfArrays _rois, cv::OutputArrayOfArrays _contours)
{
// Check that the first argument is an image and the second a vector of images.
CV_Assert(_src.isMat() && !_src.depth() && (_src.channels() == 1 || _src.channels() == 3) && _rois.isMatVector() && (!_contours.needed() || (_contours.needed() && _contours.isMatVector()) ) );
static cv::Ptr<cv::saliency::StaticSaliencySpectralResidual> saliency;
if(!saliency)
saliency = cv::saliency::StaticSaliencySpectralResidual::create();
cv::Mat src = _src.getMat();
cv::Mat gray;
if(src.depth() == src.type())
gray = src;
else
cv::cvtColor(src,gray,cv::COLOR_BGR2GRAY);
bool is_ctr_needed = _contours.needed();
std::list<cv::Mat> final_ctrs;
// Step 1) Process the saliency in order to segment the dices.
cv::Mat saliency_map;
cv::Mat binary_map;
saliency->computeSaliency(src,saliency_map);
saliency->computeBinaryMap(saliency_map,binary_map);
saliency_map.release();
// Step 2) From the binary map get the regions of interest.
cv::Mat1i stats;
std::vector<cv::Mat> rois;
cv::Mat labels;
cv::Mat centroids;
cv::connectedComponentsWithStats(binary_map, labels, stats, centroids);
labels.release();
centroids.release();
// prepare the memory
rois.reserve(stats.rows-1);
// Sort the stats in order to remove the background.
stats = stats.colRange(0,stats.cols-1);
// Extract the rois.
for(int i=0;i<stats.rows;i++)
{
cv::Rect roi = *reinterpret_cast<cv::Rect*>(stats.ptr<int>(i));
if(static_cast<std::size_t>(roi.area()) == gray.total())
continue;
rois.push_back(gray(roi));
#ifdef _DEBUG
cv::imshow("roi_"+std::to_string(i),gray(roi));
#endif
}
// Step 3) Refine.
// Because the final number of shape cannot be determine in advance it is better to use a linked list than a vector.
// In practice except if there is a huge number of elements to work with the performance will be almost the same.
std::list<cv::Mat> shapes;
int cnt=0;
for(const cv::Mat& roi : rois)
{
cv::Mat tmp = roi.clone();
// Slightly sharpen the regions contours
cv::morphologyEx(tmp,tmp, cv::MORPH_CLOSE, cv::noArray());
// Reduce the influence of local unhomogeneous illumination.
cv::GaussianBlur(tmp,tmp,cv::Size(31,31), 5);
cv::Mat thresh;
// Binarize the image.
cv::threshold(roi,thresh,0.,255.,cv::THRESH_BINARY | cv::THRESH_OTSU);
#ifdef _DEBUG
cv::imshow("thresh"+std::to_string(cnt++),thresh);
#endif
// Find the contours of each sub region on interest
std::vector<cv::Mat> contours;
cv::findContours(thresh, contours, cv::RETR_TREE, cv::CHAIN_APPROX_SIMPLE);
cv::Mat dc;
cv::merge(std::vector<cv::Mat>(3,thresh),dc);
// cv::drawContours(dc, contours,-1,cv::Scalar(0.,0.,255),2);
// cv::imshow("ctrs"+std::to_string(cnt),dc);
// Extract the sub-regions
if(is_ctr_needed)
{
for(const cv::Mat& ctrs: contours)
{
cv::Rect croi = cv::boundingRect(ctrs);
// If the sub region is to big or to small it is depreate
if(static_cast<std::size_t>(croi.area()) == roi.total() || croi.area()<50)
continue;
final_ctrs.push_back(ctrs);
shapes.push_back(roi(croi));
#ifdef _DEBUG
cv::rectangle(dc,croi,cv::Scalar(0.,0.,255.));
cv::imshow("sub_roi_"+std::to_string(cnt++),roi(croi));
#endif
}
}
else
{
for(const cv::Mat& ctrs: contours)
{
cv::Rect croi = cv::boundingRect(ctrs);
// If the sub region is to big or to small it is depreate
if(static_cast<std::size_t>(croi.area()) == roi.total() || croi.area()<50)
continue;
shapes.push_back(roi(croi));
#ifdef _DEBUG
cv::rectangle(dc,croi,cv::Scalar(0.,0.,255.));
cv::imshow("sub_roi_"+std::to_string(cnt++),roi(croi));
#endif
}
}
}
#ifdef _DEBUG
cv::waitKey(-1);
#endif
// Final Step: set the output
_rois.create(shapes.size(),1,CV_8U);
_rois.assign(std::vector<cv::Mat>(shapes.begin(),shapes.end()));
if(is_ctr_needed)
{
_contours.create(final_ctrs.size(),1,CV_32SC2);
_contours.assign(std::vector<cv::Mat>(final_ctrs.begin(), final_ctrs.end()));
}
}
I am building a scanner feature for my app and binarize the photo of the document with OpenCV:
// convert to greyscale
cv::Mat converted, blurred, blackAndWhite;
converted = cv::Mat(inputMatrix.rows, inputMatrix.cols, CV_8UC1);
cv::cvtColor(inputMatrix, converted, CV_BGR2GRAY );
// remove noise
cv::GaussianBlur(converted, blurred, cvSize(3,3), 0);
// adaptive threshold
cv::adaptiveThreshold(blackAndWhite, blackAndWhite, 255, cv::ADAPTIVE_THRESH_GAUSSIAN_C, cv::THRESH_BINARY, 15 , 9);
The result is okay, but scans from different scanner apps are much better. Especially very small, tiny sized text is much better:
Processed with opencv
Scanned With DropBox
What can I do, to improve my result?
May be the apps are using anti-aliasing to make their binarized output look nicer. To obtain a similar effect, I first tried binarizing the image, but the result didn't look very nice with all the jagged edges. Then I applied pyramid upsampling and then downsampling to the result, and the output was better.
I didn't use adaptive thresholding however. I segmented the text-like regions and processed those regions only, then pasted them to form the final images. It is a kind of local thresholding using the Otsu method or the k-means (using combinations of thr_roi_otsu, thr_roi_kmeans and proc_parts in the code). Below are some results.
Apply Otsu threshold to all text regions, then upsample followed by downsample:
Some text:
Full image:
Upsample input image, apply Otsu threshold to individual text regions, downsample the result:
Some text:
Full image:
/*
apply Otsu threshold to the region in mask
*/
Mat thr_roi_otsu(Mat& mask, Mat& im)
{
Mat bw = Mat::ones(im.size(), CV_8U) * 255;
vector<unsigned char> pixels(countNonZero(mask));
int index = 0;
for (int r = 0; r < mask.rows; r++)
{
for (int c = 0; c < mask.cols; c++)
{
if (mask.at<unsigned char>(r, c))
{
pixels[index++] = im.at<unsigned char>(r, c);
}
}
}
// threshold pixels
threshold(pixels, pixels, 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
// paste pixels
index = 0;
for (int r = 0; r < mask.rows; r++)
{
for (int c = 0; c < mask.cols; c++)
{
if (mask.at<unsigned char>(r, c))
{
bw.at<unsigned char>(r, c) = pixels[index++];
}
}
}
return bw;
}
/*
apply k-means to the region in mask
*/
Mat thr_roi_kmeans(Mat& mask, Mat& im)
{
Mat bw = Mat::ones(im.size(), CV_8U) * 255;
vector<float> pixels(countNonZero(mask));
int index = 0;
for (int r = 0; r < mask.rows; r++)
{
for (int c = 0; c < mask.cols; c++)
{
if (mask.at<unsigned char>(r, c))
{
pixels[index++] = (float)im.at<unsigned char>(r, c);
}
}
}
// cluster pixels by gray level
int k = 2;
Mat data(pixels.size(), 1, CV_32FC1, &pixels[0]);
vector<float> centers;
vector<int> labels(countNonZero(mask));
kmeans(data, k, labels, TermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 10, 1.0), k, KMEANS_PP_CENTERS, centers);
// examine cluster centers to see which pixels are dark
int label0 = centers[0] > centers[1] ? 1 : 0;
// paste pixels
index = 0;
for (int r = 0; r < mask.rows; r++)
{
for (int c = 0; c < mask.cols; c++)
{
if (mask.at<unsigned char>(r, c))
{
bw.at<unsigned char>(r, c) = labels[index++] != label0 ? 255 : 0;
}
}
}
return bw;
}
/*
apply procfn to each connected component in the mask,
then paste the results to form the final image
*/
Mat proc_parts(Mat& mask, Mat& im, Mat (procfn)(Mat&, Mat&))
{
Mat tmp = mask.clone();
vector<vector<Point>> contours;
vector<Vec4i> hierarchy;
findContours(tmp, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, Point(0, 0));
Mat byparts = Mat::ones(im.size(), CV_8U) * 255;
for(int idx = 0; idx >= 0; idx = hierarchy[idx][0])
{
Rect rect = boundingRect(contours[idx]);
Mat msk = mask(rect);
Mat img = im(rect);
// process the rect
Mat roi = procfn(msk, img);
// paste it to the final image
roi.copyTo(byparts(rect));
}
return byparts;
}
int _tmain(int argc, _TCHAR* argv[])
{
Mat im = imread("1.jpg", 0);
// detect text regions
Mat morph;
Mat kernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3));
morphologyEx(im, morph, CV_MOP_GRADIENT, kernel, Point(-1, -1), 1);
// prepare a mask for text regions
Mat bw;
threshold(morph, bw, 0, 255, THRESH_BINARY | THRESH_OTSU);
morphologyEx(bw, bw, CV_MOP_DILATE, kernel, Point(-1, -1), 10);
Mat bw2x, im2x;
pyrUp(bw, bw2x);
pyrUp(im, im2x);
// apply Otsu threshold to all text regions, then upsample followed by downsample
Mat otsu1x = thr_roi_otsu(bw, im);
pyrUp(otsu1x, otsu1x);
pyrDown(otsu1x, otsu1x);
// apply k-means to all text regions, then upsample followed by downsample
Mat kmeans1x = thr_roi_kmeans(bw, im);
pyrUp(kmeans1x, kmeans1x);
pyrDown(kmeans1x, kmeans1x);
// upsample input image, apply Otsu threshold to all text regions, downsample the result
Mat otsu2x = thr_roi_otsu(bw2x, im2x);
pyrDown(otsu2x, otsu2x);
// upsample input image, apply k-means to all text regions, downsample the result
Mat kmeans2x = thr_roi_kmeans(bw2x, im2x);
pyrDown(kmeans2x, kmeans2x);
// apply Otsu threshold to individual text regions, then upsample followed by downsample
Mat otsuparts1x = proc_parts(bw, im, thr_roi_otsu);
pyrUp(otsuparts1x, otsuparts1x);
pyrDown(otsuparts1x, otsuparts1x);
// apply k-means to individual text regions, then upsample followed by downsample
Mat kmeansparts1x = proc_parts(bw, im, thr_roi_kmeans);
pyrUp(kmeansparts1x, kmeansparts1x);
pyrDown(kmeansparts1x, kmeansparts1x);
// upsample input image, apply Otsu threshold to individual text regions, downsample the result
Mat otsuparts2x = proc_parts(bw2x, im2x, thr_roi_otsu);
pyrDown(otsuparts2x, otsuparts2x);
// upsample input image, apply k-means to individual text regions, downsample the result
Mat kmeansparts2x = proc_parts(bw2x, im2x, thr_roi_kmeans);
pyrDown(kmeansparts2x, kmeansparts2x);
return 0;
}
I'm trying to count object from image. I use logs photo, and I use some steps to get a binary image.
This is my code:
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <iostream>
#include <features2d.hpp>
using namespace cv;
using namespace std;
int main(int argc, char *argv[])
{
//load image
Mat img = imread("kayu.jpg", CV_LOAD_IMAGE_COLOR);
if(img.empty())
return -1;
//namedWindow( "kayu", CV_WINDOW_AUTOSIZE );
imshow("kayu", img);
//convert to b/w
Mat bw;
cvtColor(img, bw, CV_BGR2GRAY);
imshow("bw1", bw);
threshold(bw, bw, 40, 255, CV_THRESH_BINARY);
imshow("bw", bw);
//distance transform & normalisasi
Mat dist;
distanceTransform(bw, dist, CV_DIST_L2, 3);
normalize(dist, dist, 0, 2., NORM_MINMAX);
imshow("dist", dist);
//threshold to draw line
threshold(dist, dist, .5, 1., CV_THRESH_BINARY);
imshow("dist2", dist);
//dist = bw;
//dilasi
Mat dilation, erotion, element;
int dilation_type = MORPH_ELLIPSE;
int dilation_size = 17;
element = getStructuringElement(dilation_type, Size(2*dilation_size + 1, 2*dilation_size+1), Point(dilation_size, dilation_size ));
erode(dist, erotion, element);
int erotionCount = 0;
for(int i=0; i<erotionCount; i++){
erode(erotion, erotion, element);
}
imshow("erotion", erotion);
dilate(erotion, dilation, element);
imshow("dilation", dilation);
waitKey(0);
return 0;
}
As you can see, I use Erosion and Dilation to get better circular object of log. My problem is, I'm stuck at counting the object. I tried SimpleBlobDetector but I got nothing, because when I try to convert the result of "dilation" step to CV_8U, the white object disappear. I got error too when I use findContours(). It say something about channel of image. I can't show the error here, because that's too many step and I already delete it from my code.
Btw, at the end, i got 1 channel of image.
Can i just use it to counting, or am i have to convert it and what is the best method to do it?
Two simple steps:
Find contours for the binarized image.
Get the count of the contours.
Code:
int count_trees(const cv::Mat& bin_image){
cv::Mat img;
if(bin_image.channels()>1){
cv::cvtColor(bin_image,img,cv::COLOR_BGR2GRAY);
}
else{
img=bin_image.clone();;
}
if(img.type()!=CV_8UC1){
img*=255.f; //This could be stupid, but I do not have an environment to try it
img.convertTo(img,CV_8UC1);
}
std::vector<std::vector<cv::Point>> contours
std::vector<Vec4i> hierarchy;
cv::findContours( img, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE);
return contours.size();
}
I have the same problem, here's an idea I'm about to implement.
1) Represent your image as an array of integers; 0 = black, 1 = white.
2) set N = 2;
3) Scan your image, pixel-by-pixel. Whenever you find a white pixel, activate a flood-fill algorithm, starting at the pixel just found; paint the region with the value of N++;
4) Iterate 3 until you reach the last pixel. (N-2) is the number of regions found.
This method depends on the shape of the objects; mine are more chaotic than yours (wish me luck..). I'll make use of a recursive flood-fill recipe found somewhere (maybe Rosetta Code).
This solution also makes it easy to compute the size of each region.
try to apply that on the your deleted img
// count
for (int i = 0; i< contours.size(); i = hierarchy[i][0]) // iteration sur chaque contour .
{
Rect r = boundingRect(contours[i]);
if (hierarchy[i][2]<0) {
rectangle(canny_output, Point(r.x, r.y), Point(r.x + r.width, r.y + r.height), Scalar(20, 50, 255), 3, 8, 0);
count++;
}
}
cout << "Numeber of contour = " << count << endl;
imshow("src", src);
imshow("contour", dst);
waitKey(0);
friends, could you please help with my questions?
I am using opencv in c++.
I am randomly cropping a small picture from a camera view. I want to find the word located at the bottom of this cropped picture, and this word should also be penetrated by the vertical center line (imaginary) of this cropped picture. please see the following code :
char* my_word = do_ocr(my_cropped_image);
and the do_ocr function is like this:
char* do_ocr(cv::Mat im)
{
cv::Mat gray;
cv::cvtColor(im, gray, CV_BGR2GRAY);
// ...other image pre-processing here...
// Pass it to Tesseract API
tesseract::TessBaseAPI tess;
tess.Init(NULL, "eng", tesseract::OEM_DEFAULT);
tess.SetPageSegMode(tesseract::PSM_SINGLE_BLOCK);
tess.SetImage((uchar*)gray.data, gray.cols, gray.rows, 1, gray.cols);
// Get the text
char* out = tess.GetUTF8Text();
std::cout << out << std::endl;
return out;
}
The following is the schematic diagram and some samples of my_cropped_image :
my_cropped_image sample # 1, the letter "preceding" should be detected:
my_cropped_image sample # 2, the letter "advantageous" should be detected:
my_cropped_image sample # 3, the letter "Correlation" should be detected:
my_cropped_image sample # 4, the letter "density" should be detected:
my_cropped_image sample # 5, the letter "time" should be detected:
I'll appreciate the helps from you to update my do_ocr function.
Thank you and have a great day!
Are these the results you were looking for?
Methodology:
1) Binaryze the image, white is foreground. Here is simply done with img = img < 150;. You can use more sophisticated methods, like adaptiveThreshold.
You get something like:
2) Apply a open morphological operation, so that all the letters in a single word for a single blob:
3) Find the rectangle of each connected component:
4) Take the bottom one, in the center.
Here the full code:
#include <opencv2\opencv.hpp>
#include <vector>
using namespace std;
using namespace cv;
Mat3b dbg;
int main()
{
Mat1b img = imread("path_to_image", IMREAD_GRAYSCALE);
cvtColor(img, dbg, COLOR_GRAY2BGR);
Mat3b result;
cvtColor(img, result, COLOR_GRAY2BGR);
Mat1b img2;
img2 = img < 150;
Mat kernel = getStructuringElement(MORPH_ELLIPSE, Size(5,3));
morphologyEx(img2, img2, MORPH_DILATE, kernel);
// Apply a small border
copyMakeBorder(img2, img2, 5, 5, 5, 5, BORDER_CONSTANT, Scalar(0));
// Take the bounding boxes of all connected components
vector<vector<Point>> contours;
findContours(img2.clone(), contours, CV_RETR_LIST, CHAIN_APPROX_NONE);
int minArea = 60;
vector<Rect> rects;
for (int i = 0; i < contours.size(); ++i)
{
Rect r = boundingRect(contours[i]);
if (r.area() >= minArea)
{
// Account for border
r -= Point(5,5);
rects.push_back(r);
}
}
int middle = img.cols / 2;
// Keep bottom rect, containig middle point
if (rects.empty()) return -1;
Rect word;
for (int i = 1; i < rects.size(); ++i)
{
Point pt(middle, rects[i].y + rects[i].height/2);
if (rects[i].contains(pt))
{
if (rects[i].y > word.y)
{
word = rects[i];
}
}
}
// Show results
Mat3b res;
cvtColor(img, res, COLOR_GRAY2BGR);
for (int i = 0; i < rects.size(); ++i)
{
rectangle(res, rects[i], Scalar(0, 255, 0));
}
rectangle(result, word, Scalar(0, 0, 255), 2);
imshow("Rects", res);
imshow("Result", result);
waitKey();
return 0;
}