Why does Mat.forEach doesn't change itself? - c++

Basically, I'm trying to use less pixels to represent an image itself.
The steps are below:
Say I will input an image with size [1000*600], then I got 600_000 pixels(rgb), which could be [600_000, 3] vectors. K-Means is used to get its cluster centers.
The each pixel in the image will be placed with its nearest neighbor among the clusters found via K-Means.
The source is:
template <typename T>
void NN(Point3_<T>& pixel, const Mat& points)
{
vector<T> vt {pixel.x, pixel.x, pixel.z};
double min_dist = LDBL_MAX;
int min_index = -1;
for (int i = 0; i < points.rows; ++ i)
{
double dist = norm(vt, points.row(i), NORM_L2);
if (dist < min_dist)
{
min_dist = dist;
min_index = i;
}
}
// assert(min_index != -1);
pixel.x = points.at<T>(min_index, 0);
pixel.y = points.at<T>(min_index, 1);
pixel.z = points.at<T>(min_index, 2);
}
template <typename T>
void NN(Mat& img, const Mat& points)
{
timer::start("assign");
img.forEach<Point3_<T>>([&points](Point3_<T> &pixel, const int position[])
{
NN(pixel, points);
});
timer::stop<ms>();
}
Mat kmeans(const Mat& original_img, const int K)
{
Mat img;
original_img.reshape(3, original_img.rows * original_img.cols)
.convertTo(img, CV_32FC3);
timer::start("K-means cluster");
// Require img.type() == CV_32F
Mat clusters = BOWKMeansTrainer(K).cluster(img);
timer::stop<ms>();
// Type 5 -> Type 0: 32FC1 -> 8UC1
// K rows, 3 cols, 8UC1
clusters.convertTo(clusters, CV_8UC1);
Mat output_img = original_img;
NN<uchar>(output_img, clusters);
// assert won't fire, why?
assert(equal(original_img.begin<uchar>(), original_img.end<uchar>(),
output_img.begin<uchar>()));
return output_img;
}
int main(int argc, char* argv[])
{
vector<int> ks {2, 16};
string filename = "1";
string pathname = string("./img/") + filename + ".jpg";
Mat img = imread(pathname);
for (const int& K: ks)
{
imshow(int_to_string(K), kmeans(img, K));
// write_img(filename, "kmeans", K, kmeans(img, K));
}
std::cout << "Press enter to continue...";
cin.get();
}
The questions are:
The assert() in kmeans() won't fire. That is, the mat object original_img is identical to output_img. How could this happen?
The two imwrite() in main() will show two identical 2-value images. That is, the K-Means with K=2 works, while the following with K=16 does not. Note that if we output one image per execution, everything is fine.
The buggy output is below:
The original image and K-Means with K=16 could be seen below:

Thank god! I've found the cause.
In kmeans(), the below code will call Mat's copy constructor, which costs O(1) to assign original_img's header to output_img's.
Mat output_img = original_img;
This is the reason why the assert won't fire.

Related

Number and character recognition using ANN OpenCV 3.1

I have implemented Neural network using OpenCV ANN Library. I am newbie in this field and I learn everything about it online (Mostly StackOverflow).
I am using this ANN for detection of number plate. I did segmentation part using OpenCV image processing library and it is working good. It performs character segmentation and gives it to the NN part of the project. NN is going to recognize the number plate.
I have sample images of 20x30, therefore I have 600 neurons in input layer. As there are 36 possibilities (0-9,A-Z) I have 36 output neurons. I kept 100 neurons in hidden layer. The predict function of OpenCV is giving me the same output for every segmented image. That output is also showing some large negative(< -1). I have used cv::ml::ANN_MLP::SIGMOID_SYM as an activation function.
Please don't mind as there is lot of code wrongly commented (I am doing trial and error).
I need to find out what is the output of predict function. Thank you for your help.
#include <opencv2/opencv.hpp>
int inputLayerSize = 1;
int outputLayerSize = 1;
int numSamples = 2;
Mat layers = Mat(3, 1, CV_32S);
layers.row(0) =Scalar(600) ;
layers.row(1) = Scalar(20);
layers.row(2) = Scalar(36);
vector<int> layerSizes = { 600,100,36 };
Ptr<ml::ANN_MLP> nnPtr = ml::ANN_MLP::create();
vector <int> n;
//nnPtr->setLayerSizes(3);
nnPtr->setLayerSizes(layers);
nnPtr->setTrainMethod(ml::ANN_MLP::BACKPROP);
nnPtr->setTermCriteria(TermCriteria(cv::TermCriteria::COUNT | cv::TermCriteria::EPS, 1000, 0.00001f));
nnPtr->setActivationFunction(cv::ml::ANN_MLP::SIGMOID_SYM, 1, 1);
nnPtr->setBackpropWeightScale(0.5f);
nnPtr->setBackpropMomentumScale(0.5f);
/*CvANN_MLP_TrainParams params = CvANN_MLP_TrainParams(
// terminate the training after either 1000
// iterations or a very small change in the
// network wieghts below the specified value
cvTermCriteria(CV_TERMCRIT_ITER + CV_TERMCRIT_EPS, 1000, 0.000001),
// use backpropogation for training
CvANN_MLP_TrainParams::BACKPROP,
// co-efficents for backpropogation training
// (refer to manual)
0.1,
0.1);*/
/* Mat samples(Size(inputLayerSize, numSamples), CV_32F);
samples.at<float>(Point(0, 0)) = 0.1f;
samples.at<float>(Point(0, 1)) = 0.2f;
Mat responses(Size(outputLayerSize, numSamples), CV_32F);
responses.at<float>(Point(0, 0)) = 0.2f;
responses.at<float>(Point(0, 1)) = 0.4f;
*/
//reading chaos image
// we will read the classification numbers into this variable as though it is a vector
// close the traning images file
/*vector<int> layerInfo;
layerInfo=nnPtr->get;
for (int i = 0; i < layerInfo.size(); i++) {
cout << "size of 0" <<layerInfo[i] << endl;
}*/
cv::imshow("chaos", matTrainingImagesAsFlattenedFloats);
// cout <<abc << endl;
matTrainingImagesAsFlattenedFloats.convertTo(matTrainingImagesAsFlattenedFloats, CV_32F);
//matClassificationInts.reshape(1, 496);
matClassificationInts.convertTo(matClassificationInts, CV_32F);
matSamples.convertTo(matSamples, CV_32F);
std::cout << matClassificationInts.rows << " " << matClassificationInts.cols << " ";
std::cout << matTrainingImagesAsFlattenedFloats.rows << " " << matTrainingImagesAsFlattenedFloats.cols << " ";
std::cout << matSamples.rows << " " << matSamples.cols;
imshow("Samples", matSamples);
imshow("chaos", matTrainingImagesAsFlattenedFloats);
Ptr<ml::TrainData> trainData = ml::TrainData::create(matTrainingImagesAsFlattenedFloats, ml::SampleTypes::ROW_SAMPLE, matSamples);
nnPtr->train(trainData);
bool m = nnPtr->isTrained();
if (m)
std::cout << "training complete\n\n";
// cv::Mat matCurrentChar = Mat(cv::Size(matTrainingImagesAsFlattenedFloats.cols, matTrainingImagesAsFlattenedFloats.rows), CV_32F);
// cout << "samples:\n" << samples << endl;
//cout << "\nresponses:\n" << responses << endl;
/* if (!nnPtr->train(trainData))
return 1;*/
/* cout << "\nweights[0]:\n" << nnPtr->getWeights(0) << endl;
cout << "\nweights[1]:\n" << nnPtr->getWeights(1) << endl;
cout << "\nweights[2]:\n" << nnPtr->getWeights(2) << endl;
cout << "\nweights[3]:\n" << nnPtr->getWeights(3) << endl;*/
//predicting
std::vector <cv::String> filename;
cv::String folder = "./plate/";
cv::glob(folder, filename);
if (filename.empty()) { // if unable to open image
std::cout << "error: image not read from file\n\n"; // show error message on command line
return(0); // and exit program
}
String strFinalString;
for (int i = 0; i < filename.size(); i++) {
cv::Mat matTestingNumbers = cv::imread(filename[i]);
cv::Mat matGrayscale; //
cv::Mat matBlurred; // declare more image variables
cv::Mat matThresh; //
cv::Mat matThreshCopy;
cv::Mat matCanny;
//
cv::cvtColor(matTestingNumbers, matGrayscale, CV_BGR2GRAY); // convert to grayscale
matThresh = cv::Mat(cv::Size(matGrayscale.cols, matGrayscale.rows), CV_8UC1);
for (int i = 0; i < matGrayscale.cols; i++) {
for (int j = 0; j < matGrayscale.rows; j++) {
if (matGrayscale.at<uchar>(j, i) <= 130) {
matThresh.at<uchar>(j, i) = 255;
}
else {
matThresh.at<uchar>(j, i) = 0;
}
}
}
// blur
cv::GaussianBlur(matThresh, // input image
matBlurred, // output image
cv::Size(5, 5), // smoothing window width and height in pixels
0); // sigma value, determines how much the image will be blurred, zero makes function choose the sigma value
// filter image from grayscale to black and white
/* cv::adaptiveThreshold(matBlurred, // input image
matThresh, // output image
255, // make pixels that pass the threshold full white
cv::ADAPTIVE_THRESH_GAUSSIAN_C, // use gaussian rather than mean, seems to give better results
cv::THRESH_BINARY_INV, // invert so foreground will be white, background will be black
11, // size of a pixel neighborhood used to calculate threshold value
2); */ // constant subtracted from the mean or weighted mean
// cv::imshow("thresh" + std::to_string(i), matThresh);
matThreshCopy = matThresh.clone();
std::vector<std::vector<cv::Point> > ptContours; // declare a vector for the contours
std::vector<cv::Vec4i> v4iHierarchy;// make a copy of the thresh image, this in necessary b/c findContours modifies the image
cv::Canny(matBlurred, matCanny, 20, 40, 3);
/*std::vector<std::vector<cv::Point> > ptContours; // declare a vector for the contours
std::vector<cv::Vec4i> v4iHierarchy; // declare a vector for the hierarchy (we won't use this in this program but this may be helpful for reference)
cv::findContours(matThreshCopy, // input image, make sure to use a copy since the function will modify this image in the course of finding contours
ptContours, // output contours
v4iHierarchy, // output hierarchy
cv::RETR_EXTERNAL, // retrieve the outermost contours only
cv::CHAIN_APPROX_SIMPLE); // compress horizontal, vertical, and diagonal segments and leave only their end points
/*std::vector<std::vector<cv::Point> > contours_poly(ptContours.size());
std::vector<cv::Rect> boundRect(ptContours.size());
for (int i = 0; i < ptContours.size(); i++)
{
approxPolyDP(cv::Mat(ptContours[i]), contours_poly[i], 3, true);
boundRect[i] = cv::boundingRect(cv::Mat(contours_poly[i]));
}*/
/*for (int i = 0; i < ptContours.size(); i++) { // for each contour
ContourWithData contourWithData; // instantiate a contour with data object
contourWithData.ptContour = ptContours[i]; // assign contour to contour with data
contourWithData.boundingRect = cv::boundingRect(contourWithData.ptContour); // get the bounding rect
contourWithData.fltArea = cv::contourArea(contourWithData.ptContour); // calculate the contour area
allContoursWithData.push_back(contourWithData); // add contour with data object to list of all contours with data
}
for (int i = 0; i < allContoursWithData.size(); i++) { // for all contours
if (allContoursWithData[i].checkIfContourIsValid()) { // check if valid
validContoursWithData.push_back(allContoursWithData[i]); // if so, append to valid contour list
}
}
//sort contours from left to right
std::sort(validContoursWithData.begin(), validContoursWithData.end(), ContourWithData::sortByBoundingRectXPosition);
// std::string strFinalString; // declare final string, this will have the final number sequence by the end of the program
*/
/*for (int i = 0; i < validContoursWithData.size(); i++) { // for each contour
// draw a green rect around the current char
cv::rectangle(matTestingNumbers, // draw rectangle on original image
validContoursWithData[i].boundingRect, // rect to draw
cv::Scalar(0, 255, 0), // green
2); // thickness
cv::Mat matROI = matThresh(validContoursWithData[i].boundingRect); // get ROI image of bounding rect
cv::Mat matROIResized;
cv::resize(matROI, matROIResized, cv::Size(RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT)); // resize image, this will be more consistent for recognition and storage
*/
cv::Mat matROIFloat;
cv::resize(matThresh, matThresh, cv::Size(RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT));
matThresh.convertTo(matROIFloat, CV_32FC1, 1.0 / 255.0); // convert Mat to float, necessary for call to find_nearest
cv::Mat matROIFlattenedFloat = matROIFloat.reshape(1, 1);
cv::Point maxLoc = { 0,0 };
cv::Point minLoc;
cv::Mat output = cv::Mat(cv::Size(36, 1), CV_32F);
vector<float>output2;
// cv::Mat output2 = cv::Mat(cv::Size(36, 1), CV_32F);
nnPtr->predict(matROIFlattenedFloat, output2);
// float max = output.at<float>(0, 0);
int fo = 0;
float m = output2[0];
imshow("predicted input", matROIFlattenedFloat);
// float b = output.at<float>(0, 0);
// cout <<"\n output0,0:"<<b<<endl;
// minMaxLoc(output, 0, 0, &minLoc, &maxLoc, Mat());
// cout << "\noutput:\n" << maxLoc.x << endl;
for (int j = 1; j < 36; j++) {
float value =output2[j];
if (value > m) {
m = value;
fo = j;
}
}
float * p = 0;
p = &m;
cout << "j value in output " << fo << " Max value " << p << endl;
//imshow("output image" + to_string(i), output);
// cout << "\noutput:\n" << minLoc.x << endl;
//float fltCurrentChar = (float)maxLoc.x;
output.release();
m = 0;
fo = 0;
}
// strFinalString = strFinalString + char(int(fltCurrentChar)); // append current char to full string
// cv::imshow("Predict output", output);
/*cv::Point maxLoc = {0,0};
Mat output=Mat (cv::Size(matSamples.cols,matSamples.rows),CV_32F);
nnPtr->predict(matTrainingImagesAsFlattenedFloats, output);
minMaxLoc(output, 0, 0, 0, &maxLoc, 0);
cout << "\noutput:\n" << maxLoc.x << endl;*/
// getchar();
/*for (int i = 0; i < 10;i++) {
for (int j = 0; j < 36; j++) {
if (matCurrentChar.at<float>(i, j) >= 0.6) {
cout << " "<<j<<" ";
}
}
}*/
waitKey(0);
return(0);
}
void gen() {
std::string dir, filepath;
int num, imgArea, minArea;
int pos = 0;
bool f = true;
struct stat filestat;
cv::Mat imgTrainingNumbers;
cv::Mat imgGrayscale;
cv::Mat imgBlurred;
cv::Mat imgThresh;
cv::Mat imgThreshCopy;
cv::Mat matROIResized=cv::Mat (cv::Size(RESIZED_IMAGE_WIDTH,RESIZED_IMAGE_HEIGHT),CV_8UC1);
cv::Mat matROI;
std::vector <cv::String> filename;
std::vector<std::vector<cv::Point> > ptContours;
std::vector<cv::Vec4i> v4iHierarchy;
int count = 0, contoursCount = 0;
matSamples = cv::Mat(cv::Size(36, 496), CV_32FC1);
matTrainingImagesAsFlattenedFloats = cv::Mat(cv::Size(600, 496), CV_32FC1);
for (int j = 0; j <= 35; j++) {
int tmp = j;
cv::String folder = "./Training Data/" + std::to_string(tmp);
cv::glob(folder, filename);
for (int k = 0; k < filename.size(); k++) {
count++;
// If the file is a directory (or is in some way invalid) we'll skip it
// if (stat(filepath.c_str(), &filestat)) continue;
//if (S_ISDIR(filestat.st_mode)) continue;
imgTrainingNumbers = cv::imread(filename[k]);
imgArea = imgTrainingNumbers.cols*imgTrainingNumbers.rows;
// read in training numbers image
minArea = imgArea * 50 / 100;
if (imgTrainingNumbers.empty()) {
std::cout << "error: image not read from file\n\n";
//return(0);
}
cv::cvtColor(imgTrainingNumbers, imgGrayscale, CV_BGR2GRAY);
//cv::equalizeHist(imgGrayscale, imgGrayscale);
imgThresh = cv::Mat(cv::Size(imgGrayscale.cols, imgGrayscale.rows), CV_8UC1);
/*cv::adaptiveThreshold(imgGrayscale,
imgThresh,
255,
cv::ADAPTIVE_THRESH_GAUSSIAN_C,
cv::THRESH_BINARY_INV,
3,
0);
*/
for (int i = 0; i < imgGrayscale.cols; i++) {
for (int j = 0; j < imgGrayscale.rows; j++) {
if (imgGrayscale.at<uchar>(j, i) <= 130) {
imgThresh.at<uchar>(j, i) = 255;
}
else {
imgThresh.at<uchar>(j, i) = 0;
}
}
}
// cv::imshow("imgThresh"+std::to_string(count), imgThresh);
imgThreshCopy = imgThresh.clone();
cv::GaussianBlur(imgThreshCopy,
imgBlurred,
cv::Size(5, 5),
0);
cv::Mat imgCanny;
// cv::Canny(imgBlurred,imgCanny,20,40,3);
cv::findContours(imgBlurred,
ptContours,
v4iHierarchy,
cv::RETR_EXTERNAL,
cv::CHAIN_APPROX_SIMPLE);
for (int i = 0; i < ptContours.size(); i++) {
if (cv::contourArea(ptContours[i]) > MIN_CONTOUR_AREA) {
contoursCount++;
cv::Rect boundingRect = cv::boundingRect(ptContours[i]);
cv::rectangle(imgTrainingNumbers, boundingRect, cv::Scalar(0, 0, 255), 2); // draw red rectangle around each contour as we ask user for input
matROI = imgThreshCopy(boundingRect); // get ROI image of bounding rect
std::string path = "./" + std::to_string(contoursCount) + ".JPG";
cv::imwrite(path, matROI);
// cv::imshow("matROI" + std::to_string(count), matROI);
cv::resize(matROI, matROIResized, cv::Size(RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT)); // resize image, this will be more consistent for recognition and storage
std::cout << filename[k] << " " << contoursCount << "\n";
//cv::imshow("matROI", matROI);
//cv::imshow("matROIResized"+std::to_string(count), matROIResized);
// cv::imshow("imgTrainingNumbers" + std::to_string(contoursCount), imgTrainingNumbers);
int intChar;
if (j<10)
intChar = j + 48;
else {
intChar = j + 55;
}
/*if (intChar == 27) { // if esc key was pressed
return(0); // exit program
}*/
// if (std::find(intValidChars.begin(), intValidChars.end(), intChar) != intValidChars.end()) { // else if the char is in the list of chars we are looking for . . .
// append classification char to integer list of chars
cv::Mat matImageFloat;
matROIResized.convertTo(matImageFloat,CV_32FC1);// now add the training image (some conversion is necessary first) . . .
//matROIResized.convertTo(matImageFloat, CV_32FC1); // convert Mat to float
cv::Mat matImageFlattenedFloat = matImageFloat.reshape(1, 1);
//matTrainingImagesAsFlattenedFloats.push_back(matImageFlattenedFloat);// flatten
try {
//matTrainingImagesAsFlattenedFloats.push_back(matImageFlattenedFloat);
std::cout << matTrainingImagesAsFlattenedFloats.rows << " " << matTrainingImagesAsFlattenedFloats.cols;
//unsigned char* re;
int ii = 0; // Current column in training_mat
for (int i = 0; i<matImageFloat.rows; i++) {
for (int j = 0; j < matImageFloat.cols; j++) {
matTrainingImagesAsFlattenedFloats.at<float>(contoursCount-1, ii++) = matImageFloat.at<float>(i,j);
}
}
}
catch (std::exception &exc) {
f = false;
exc.what();
}
if (f) {
matClassificationInts.push_back((float)intChar);
matSamples.at<float>(contoursCount-1, j) = 1.0;
}
f = true;
// add to Mat as though it was a vector, this is necessary due to the
// data types that KNearest.train accepts
} // end if
//} // end if
} // end for
}//end i
}//end j
}
Output of predict function
Unfortunately, I don't have the necessary time to really review the code, but I can say off the top that to train a model that performs well for prediction with 36 classes, you will need several things:
A large number of good quality images. Ideally, you'd want thousands of images for each class. Of course, you can see somewhat decent results with less than that, but if you only have a few images per class, it's never going to be able to generalize adequately.
You need a model that is large and sophisticated enough to provide the necessary expressiveness to solve the problem. For a problem like this, a plain old multi-layer perceptron with one hidden layer with 100 units may not be enough. This is actually a problem that would benefit from using a Convolutional Neural Net (CNN) with a couple layers just to extract useful features first. But assuming you don't want to go down that path, you may at least want to tweak the size of your hidden layer.
To even get to a point where the training process converges, you will probably need to experiment and crucially, you need an effective way to test the accuracy of the ANN after each experiment. Ideally, you want to observe the loss as the training is proceeding, but I'm not sure whether that's possible using OpenCV's ML functionality. At a minimum, you should fully expect to have to play around with the various so-called "hyper-parameters" and run many experiments before you have a reasonable model.
Anyway, the most important thing is to make sure you have a solid mechanism for validating the accuracy of the model after training. If you aren't already doing so, set aside some images as a separate test set, and after each experiment, use the trained ANN to predict each test image to see the accuracy.
One final general note: what you're trying to do is complex. You will save yourself a huge number of headaches if you take the time early and often to refactor your code. No matter how many experiments you run, if there's some defect causing (for example) your training data to be fundamentally different in some way than your test data, you will never see good results.
Good luck!
EDIT: I should also point out that seeing the same result for every input image is a classic sign that training failed. Unfortunately, there are many reasons why that might happen and it will be very difficult for anyone to isolate that for you without some cleaner code and access to your image data.
I have solved the issue of not getting the output of predict. The issue was created because of the input Mat image to train (ie. matTrainingImagesAsFlattenedFloats) was having values 255.0 for a white pixel. This happened because I haven't use convertTo() properly. You need to use convertTo(OutputImage name, CV_32FC1, 1.0 / 255.0); like this which will convert all the pixel values with 255.0 to 1.0 and after that I am getting the correct output.
Thank you for all the help.
This is too broad to be in one question. Sorry for the bad news. I tried this over and over and couldn't find a solution. I recommend that you implement a simple AND, OR or XOR first just to make sure that the learning part is working and that you are getting better results the more passes you do. Also I suggest to try the Tangent Hyperbolic as a Transfer Function instead of Sigmoid. And Good luck!
Here is some of my own posts that might help you:
Exact results as yours: HERE
Some codes: HERE
I don't want to say that, but several professors I met said Backpropagation just doesn't work and they had (and me have) to implement my own method of teaching the network.

Local Binary Patterns Matched with Known Image

I am currently looking for a way to implement Local Binary Patterns using OpenCV and C++.
Currently I have found this: https://github.com/bytefish/opencv/tree/master/lbp
However, I need to compare 2 images or LBP Histograms with each other and give some similarity index.
Here is my modified code:
#include <opencv/cv.h>
#include <opencv/highgui.h>
#include "lbp.hpp"
#include "histogram.hpp"
using namespace cv;
int main(int argc, const char *argv[]) {
int deviceId = 0;
if(argc > 1)
deviceId = atoi(argv[1]);
VideoCapture cap(deviceId);
if(!cap.isOpened()) {
cerr << "Capture Device ID " << deviceId << "cannot be opened." << endl;
return -1;
}
// initial values
int radius = 1;
int neighbors = 8;
// windows
namedWindow("original",CV_WINDOW_AUTOSIZE);
namedWindow("lbp",CV_WINDOW_AUTOSIZE);
// matrices used
Mat test;
Mat test1;
Mat frame; // always references the last frame
Mat dst; // image after preprocessing
Mat dst1;
Mat lbp; // lbp image
Mat lbp1;
// just to switch between possible lbp operators
vector<string> lbp_names;
lbp_names.push_back("Extended LBP"); // 0
lbp_names.push_back("Fixed Sampling LBP"); // 1
lbp_names.push_back("Variance-based LBP"); // 2
int lbp_operator=1;
bool running=true;
while(running) {
//cap >> frame;
dst = imread("Coin1.jpg", CV_LOAD_IMAGE_GRAYSCALE); //Known Image
dst1 = imread("Coin2.jpg", CV_LOAD_IMAGE_GRAYSCALE); //Compared to
switch(lbp_operator) {
case 0:
lbp::ELBP(test, lbp, radius, neighbors); // use the extended operator
break;
case 1:
lbp::OLBP(dst, lbp); // use the original operator
lbp::OLBP(dst1, lbp1); // use the original operator
break;
case 2:
lbp::VARLBP(dst, lbp, radius, neighbors);
break;
}
// now to show the patterns a normalization is necessary
// a simple min-max norm will do the job...
normalize(lbp, lbp, 0, 255, NORM_MINMAX, CV_8UC1);
Mat lbp_hist, lbp1_hist;
int histSize[] = {256};
float s_ranges[] = { 0, 256 };
const float* ranges[] = { s_ranges };
// Use the o-th and 1-st channels
int channels[] = { 0 };
calcHist( &lbp, 1, channels, Mat(), lbp_hist, 1, histSize, ranges, true, false );
normalize( lbp1_hist, lbp1_hist, 0, 1, NORM_MINMAX, -1, Mat() );
calcHist( &lbp1, 1, channels, Mat(), lbp1_hist, 1, histSize, ranges, true, false );
normalize( lbp_hist, lbp_hist, 0, 1, NORM_MINMAX, -1, Mat() );
double base_base = compareHist( lbp_hist, lbp1_hist, 0 );
printf("%f\n",base_base); //get a similarity
//imshow("original", lbp);
//imshow("lbp", lbp1);
imshow("1", lbp_hist);
imshow("2", lbp1_hist);
char key = (char) waitKey(0);;
}
return 0; // success
}
However I do not think it is working correctly. I am not getting an accurate histogram. So I can't compare.
Please help.
I remember having a similar problem when starting with OpenCV LBPH
Try this function for histogram
void lbp::histogram(const Mat& src, Mat& hist, int numPatterns) {
switch(src.type()) {
case CV_8SC1: histogram_<char>(src, hist, numPatterns); break;
case CV_8UC1: histogram_<unsigned char>(src, hist, numPatterns); break;
case CV_16SC1: histogram_<short int>(src, hist, numPatterns); break;
case CV_16UC1: histogram_<unsigned short>(src, hist, numPatterns); break;
case CV_32SC1: histogram_<int>(src, hist, numPatterns); break;
}
}
template <typename _Tp>
void lbp::histogram_(const Mat& src, Mat& hist, int numPatterns) {
hist = Mat::zeros(1, numPatterns, CV_32SC1);
for(int i = 0; i < src.rows; i++) {
for(int j = 0; j < src.cols; j++) {
int bin = src.at<_Tp>(i,j);
hist.at<int>(0,bin) += 1;
}
}
}
//Manual normalization
cv::Mat hist_norm=cv::Mat::zeros(1,hist.cols,CV_32F);
int sum=0;
for(int j=0;j<hist.cols;j++){sum+=hist.at<int>(0,j);}
for(int j=0;j<hist.cols;j++){hist_norm.at<float>(0,j)+= (float)hist.at<int>(0,j)/(float)sum;}
This works on my computer for basic LBPH. I used an implementation of LBP from another librairy, maybe it's the same as you.
Tell me if it's fine for you.

Preparing for OCR OpenCV

I am making an application that uses OCR and I am using OpenCV to threshold the image to improve the OCR results, I have gotten pretty good results but I want to know if anyone has any suggestions for improvement.
Here is what I've done so far:
// Convert to grayscale.
cv::cvtColor(cvMat, cvMat, CV_RGB2GRAY);
// Apply adaptive threshold.
cv::adaptiveThreshold(cvMat, cvMat, 255, CV_ADAPTIVE_THRESH_GAUSSIAN_C, CV_THRESH_BINARY, 3, 5);
// Attempt to sharpen the image.
cv::GaussianBlur(cvMat, cvMat, cv::Size(0, 0), 3);
cv::addWeighted(cvMat, 1.5, cvMat, -0.5, 0, cvMat);
Let me know if you have any suggestions to improve results, thanks.
Sample Images:
After:
One of the best algorithms for thresholding problem in the OCR field is sauvola method.You can use the below code.
#ifndef _THRESHOLDER
#define _THRESHOLDER
#include <cv.h>
#include "type.h"
using namespace cv;
enum class BhThresholdMethod{OTSU,NIBLACK,SAUVOLA,WOLFJOLION};
class BhThresholder
{
public :
void doThreshold(InputArray src ,OutputArray dst,const BhThresholdMethod &method);
private:
};
#endif //_THRESHOLDER
thresholder.cpp
#include "stdafx.h"
#define uget(x,y) at<unsigned char>(y,x)
#define uset(x,y,v) at<unsigned char>(y,x)=v;
#define fget(x,y) at<float>(y,x)
#define fset(x,y,v) at<float>(y,x)=v;
// *************************************************************
// glide a window across the image and
// create two maps: mean and standard deviation.
// *************************************************************
//#define BINARIZEWOLF_VERSION "2.3 (February 26th, 2013)"
double calcLocalStats (Mat &im, Mat &map_m, Mat &map_s, int win_x, int win_y) {
double m,s,max_s, sum, sum_sq, foo;
int wxh = win_x / 2;
int wyh = win_y / 2;
int x_firstth = wxh;
int y_lastth = im.rows-wyh-1;
int y_firstth= wyh;
double winarea = win_x*win_y;
max_s = 0;
for (int j = y_firstth ; j<=y_lastth; j++)
{
// Calculate the initial window at the beginning of the line
sum = sum_sq = 0;
for (int wy=0 ; wy<win_y; wy++)
for (int wx=0 ; wx<win_x; wx++) {
foo = im.uget(wx,j-wyh+wy);
sum += foo;
sum_sq += foo*foo;
}
m = sum / winarea;
s = sqrt ((sum_sq - (sum*sum)/winarea)/winarea);
if (s > max_s)
max_s = s;
map_m.fset(x_firstth, j, m);
map_s.fset(x_firstth, j, s);
// Shift the window, add and remove new/old values to the histogram
for (int i=1 ; i <= im.cols -win_x; i++) {
// Remove the left old column and add the right new column
for (int wy=0; wy<win_y; ++wy) {
foo = im.uget(i-1,j-wyh+wy);
sum -= foo;
sum_sq -= foo*foo;
foo = im.uget(i+win_x-1,j-wyh+wy);
sum += foo;
sum_sq += foo*foo;
}
m = sum / winarea;
s = sqrt ((sum_sq - (sum*sum)/winarea)/winarea);
if (s > max_s)
max_s = s;
map_m.fset(i+wxh, j, m);
map_s.fset(i+wxh, j, s);
}
}
return max_s;
}
void NiblackSauvolaWolfJolion (InputArray _src, OutputArray _dst,const BhThresholdMethod &version,int winx, int winy, double k, double dR) {
Mat src = _src.getMat();
Mat dst = _dst.getMat();
double m, s, max_s;
double th=0;
double min_I, max_I;
int wxh = winx/2;
int wyh = winy/2;
int x_firstth= wxh;
int x_lastth = src.cols-wxh-1;
int y_lastth = src.rows-wyh-1;
int y_firstth= wyh;
int mx, my;
// Create local statistics and store them in a double matrices
Mat map_m = Mat::zeros (src.size(), CV_32FC1);
Mat map_s = Mat::zeros (src.size(), CV_32FC1);
max_s = calcLocalStats (src, map_m, map_s, winx, winy);
minMaxLoc(src, &min_I, &max_I);
Mat thsurf (src.size(), CV_32FC1);
// Create the threshold surface, including border processing
// ----------------------------------------------------
for (int j = y_firstth ; j<=y_lastth; j++) {
// NORMAL, NON-BORDER AREA IN THE MIDDLE OF THE WINDOW:
for (int i=0 ; i <= src.cols-winx; i++) {
m = map_m.fget(i+wxh, j);
s = map_s.fget(i+wxh, j);
// Calculate the threshold
switch (version) {
case BhThresholdMethod::NIBLACK:
th = m + k*s;
break;
case BhThresholdMethod::SAUVOLA:
th = m * (1 + k*(s/dR-1));
break;
case BhThresholdMethod::WOLFJOLION:
th = m + k * (s/max_s-1) * (m-min_I);
break;
default:
cerr << "Unknown threshold type in ImageThresholder::surfaceNiblackImproved()\n";
exit (1);
}
thsurf.fset(i+wxh,j,th);
if (i==0) {
// LEFT BORDER
for (int i=0; i<=x_firstth; ++i)
thsurf.fset(i,j,th);
// LEFT-UPPER CORNER
if (j==y_firstth)
for (int u=0; u<y_firstth; ++u)
for (int i=0; i<=x_firstth; ++i)
thsurf.fset(i,u,th);
// LEFT-LOWER CORNER
if (j==y_lastth)
for (int u=y_lastth+1; u<src.rows; ++u)
for (int i=0; i<=x_firstth; ++i)
thsurf.fset(i,u,th);
}
// UPPER BORDER
if (j==y_firstth)
for (int u=0; u<y_firstth; ++u)
thsurf.fset(i+wxh,u,th);
// LOWER BORDER
if (j==y_lastth)
for (int u=y_lastth+1; u<src.rows; ++u)
thsurf.fset(i+wxh,u,th);
}
// RIGHT BORDER
for (int i=x_lastth; i<src.cols; ++i)
thsurf.fset(i,j,th);
// RIGHT-UPPER CORNER
if (j==y_firstth)
for (int u=0; u<y_firstth; ++u)
for (int i=x_lastth; i<src.cols; ++i)
thsurf.fset(i,u,th);
// RIGHT-LOWER CORNER
if (j==y_lastth)
for (int u=y_lastth+1; u<src.rows; ++u)
for (int i=x_lastth; i<src.cols; ++i)
thsurf.fset(i,u,th);
}
cerr << "surface created" << endl;
for (int y=0; y<src.rows; ++y)
for (int x=0; x<src.cols; ++x)
{
if (src.uget(x,y) >= thsurf.fget(x,y))
{
dst.uset(x,y,255);
}
else
{
dst.uset(x,y,0);
}
}
}
void BhThresholder::doThreshold(InputArray _src ,OutputArray _dst,const BhThresholdMethod &method)
{
Mat src = _src.getMat();
int winx = 0;
int winy = 0;
float optK=0.5;
if (winx==0 || winy==0) {
winy = (int) (2.0 * src.rows - 1)/3;
winx = (int) src.cols-1 < winy ? src.cols-1 : winy;
// if the window is too big, than we asume that the image
// is not a single text box, but a document page: set
// the window size to a fixed constant.
if (winx > 100)
winx = winy = 40;
}
// Threshold
_dst.create(src.size(), CV_8UC1);
Mat dst = _dst.getMat();
//medianBlur(src,dst,5);
GaussianBlur(src,dst,Size(5,5),0);
//#define _BH_SHOW_IMAGE
#ifdef _BH_DEBUG
#define _BH_SHOW_IMAGE
#endif
//medianBlur(src,dst,7);
switch (method)
{
case BhThresholdMethod::OTSU :
threshold(dst,dst,128,255,CV_THRESH_OTSU);
break;
case BhThresholdMethod::SAUVOLA :
case BhThresholdMethod::WOLFJOLION :
NiblackSauvolaWolfJolion (src, dst, method, winx, winy, optK, 128);
}
bitwise_not(dst,dst);
#ifdef _BH_SHOW_IMAGE
#undef _BH_SHOW_IMAGE
#endif
}
Here is comparsion table for thresholding methods: http://clweb.csa.iisc.ernet.in/rahulsharma/binarize/set1.php?id=set1%2Fimage00b
A few thoughts:
Since you're starting with a rectangular object that may be viewed at a non-normal angle, use an affine transform to warp the image so that it appears rectangular with right angle corners.
Before the affine transform, you should probably remove barrel distortion (the curviness of the card edges).
Consider using an adaptive threshold rather than a simple global binarization threshold.
If you can find a proper OCR algorithm that doesn't require binary images, use that. Although binarization will work well for black text on a white background, in general binarization presents a lot of problems if you want to achieve high accuracy (i.e., character recognition approaching 98%+ for arbitrary strings of characters)
Try to sample with better resolution.

opencv, accessing element for downsampling but white window appear

i'm learning c++ api of opencv, and for a simple approach i've started with try to downsample image (ok i know that there is pyrDown with gaussian resampling but it's for learning how to access element in Mat class)
this is my code:
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <iostream>
#define original_window "original"
#define manual_window "manual"
using namespace cv;
using namespace std;
Mat img, manual;
void downsample(Mat src, Mat &dst, const Size& s) {
float factor = src.rows/(float)s.width;
Mat_<Vec3f> _dst = Mat(s, src.type());
Mat_<Vec3f> _src = src;
for(int i=0; i<src.cols; i+=factor) {
int _i = i/factor;
for(int j=0; j<src.rows; j+=factor) {
int _j = j/factor;
_dst (_j, _i) = _src(j,i);
}
}
cout << "downsample image size: " << _dst.rows << " " << _dst.cols << endl;
dst = Mat(_dst);
}
int main(int /*argc*/, char** /*argv*/) {
img = imread("lena.jpg");
cout << "original image size: " << img.rows << " " << img.cols << endl;
downsample(img, manual, Size(img.cols/2, img.rows/2));
namedWindow(original_window, CV_WINDOW_AUTOSIZE);
namedWindow(manual_window, CV_WINDOW_AUTOSIZE);
while( true )
{
char c = (char)waitKey(10);
if( c == 27 )
{ break; }
imshow( original_window, img );
imshow( manual_window, manual );
}
return 0;
}
now, i'm doing a downsampling in a fool way: i'm just deleting elements. and i'm try to use c++ api with Mat_.
in manual window i get a white window, and i don't understand why. event if i try to cout manual i'seeing different values.. what's wrong with this piece of code?
EDIT 1
i've found a solution:
dst.convertTo(dst, src.type()); // in this particular case: src.type() == CV_8UC3
at the end of downsample()
now my question is: why that? i declare Mat(s, src.type()); why it is modified?
EDIT 2
if i use #go4sri answer with this line
_dst (_j, _i) = src.at<Vec3f>(j, i);
i get this output:
i really does not understand why..
The way to access an element in OpenCV's Mat is as follows:
for a single channel matrix(tmp)
Matrix_Name.at<dataType>(row, col)
For a three channel matrix( as is the case for a color image), you will need to use the Vec3b/Vec3f type depending upon if yours is a unsigned char/float matrix.
As yours is a unsigned char 3Dimensional matrix:
you will have to access it as src.at<Vec3b>(i, j)
Your downsample should have been:
void downsample(const Mat& src, Mat &dst, const Size& s) {
float factor = src.rows/(float)s.height;
Mat _dst = Mat(s, src.type());
for(int i=0; i < src.cols; i += factor) {
int _i = i/factor;
for(int j=0; j<src.rows; j+=factor) {
int _j = j/factor;
_dst.at<Vec3b> (_j, _i) = src.at<Vec3b>(j, i);
}
}
cout << "downsample image size: " << _dst.rows << " " << _dst.cols << endl;
dst = Mat(_dst);
}

Square detection doesn't find squares

I'm using the program squares.c available in the samples of OpenCV libraries. It works well with every image, but I really can't figure it out why it doesn't recognize the square drawn in that image
http://desmond.imageshack.us/Himg12/scaled.php?server=12&filename=26725680.jpg&res=medium
After CANNY:
After DILATE:
The RESULT image (in red)
http://img267.imageshack.us/img267/8016/resultuq.jpg
As you can see, the square is NOT detected.
After the detection I need to extract the area contained in the square...How is it possible without a ROI?
The source code below presents a small variation of the Square Detector program. It's not perfect, but it illustrates one way to approach your problem.
You can diff this code to the original and check all the changes that were made, but the main ones are:
Decrease the number of threshold levels to 2.
In the beginning of findSquares(), dilate the image to detect the thin white square, and then blur the entire image so the algorithm doesn't detect the sea and the sky as individual squares.
Once compiled, run the application with the following syntax: ./app <image>
// The "Square Detector" program.
// It loads several images sequentially and tries to find squares in
// each image
#include "highgui.h"
#include "cv.h"
#include <iostream>
#include <math.h>
#include <string.h>
using namespace cv;
using namespace std;
void help()
{
cout <<
"\nA program using pyramid scaling, Canny, contours, contour simpification and\n"
"memory storage (it's got it all folks) to find\n"
"squares in a list of images pic1-6.png\n"
"Returns sequence of squares detected on the image.\n"
"the sequence is stored in the specified memory storage\n"
"Call:\n"
"./squares\n"
"Using OpenCV version %s\n" << CV_VERSION << "\n" << endl;
}
int thresh = 50, N = 2; // karlphillip: decreased N to 2, was 11.
const char* wndname = "Square Detection Demo";
// helper function:
// finds a cosine of angle between vectors
// from pt0->pt1 and from pt0->pt2
double angle( Point pt1, Point pt2, Point pt0 )
{
double dx1 = pt1.x - pt0.x;
double dy1 = pt1.y - pt0.y;
double dx2 = pt2.x - pt0.x;
double dy2 = pt2.y - pt0.y;
return (dx1*dx2 + dy1*dy2)/sqrt((dx1*dx1 + dy1*dy1)*(dx2*dx2 + dy2*dy2) + 1e-10);
}
// returns sequence of squares detected on the image.
// the sequence is stored in the specified memory storage
void findSquares( const Mat& image, vector<vector<Point> >& squares )
{
squares.clear();
Mat pyr, timg, gray0(image.size(), CV_8U), gray;
// karlphillip: dilate the image so this technique can detect the white square,
Mat out(image);
dilate(out, out, Mat(), Point(-1,-1));
// then blur it so that the ocean/sea become one big segment to avoid detecting them as 2 big squares.
medianBlur(out, out, 7);
// down-scale and upscale the image to filter out the noise
pyrDown(out, pyr, Size(out.cols/2, out.rows/2));
pyrUp(pyr, timg, out.size());
vector<vector<Point> > contours;
// find squares in every color plane of the image
for( int c = 0; c < 3; c++ )
{
int ch[] = {c, 0};
mixChannels(&timg, 1, &gray0, 1, ch, 1);
// try several threshold levels
for( int l = 0; l < N; l++ )
{
// hack: use Canny instead of zero threshold level.
// Canny helps to catch squares with gradient shading
if( l == 0 )
{
// apply Canny. Take the upper threshold from slider
// and set the lower to 0 (which forces edges merging)
Canny(gray0, gray, 0, thresh, 5);
// dilate canny output to remove potential
// holes between edge segments
dilate(gray, gray, Mat(), Point(-1,-1));
}
else
{
// apply threshold if l!=0:
// tgray(x,y) = gray(x,y) < (l+1)*255/N ? 255 : 0
gray = gray0 >= (l+1)*255/N;
}
// find contours and store them all as a list
findContours(gray, contours, CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE);
vector<Point> approx;
// test each contour
for( size_t i = 0; i < contours.size(); i++ )
{
// approximate contour with accuracy proportional
// to the contour perimeter
approxPolyDP(Mat(contours[i]), approx, arcLength(Mat(contours[i]), true)*0.02, true);
// square contours should have 4 vertices after approximation
// relatively large area (to filter out noisy contours)
// and be convex.
// Note: absolute value of an area is used because
// area may be positive or negative - in accordance with the
// contour orientation
if( approx.size() == 4 &&
fabs(contourArea(Mat(approx))) > 1000 &&
isContourConvex(Mat(approx)) )
{
double maxCosine = 0;
for( int j = 2; j < 5; j++ )
{
// find the maximum cosine of the angle between joint edges
double cosine = fabs(angle(approx[j%4], approx[j-2], approx[j-1]));
maxCosine = MAX(maxCosine, cosine);
}
// if cosines of all angles are small
// (all angles are ~90 degree) then write quandrange
// vertices to resultant sequence
if( maxCosine < 0.3 )
squares.push_back(approx);
}
}
}
}
}
// the function draws all the squares in the image
void drawSquares( Mat& image, const vector<vector<Point> >& squares )
{
for( size_t i = 0; i < squares.size(); i++ )
{
const Point* p = &squares[i][0];
int n = (int)squares[i].size();
polylines(image, &p, &n, 1, true, Scalar(0,255,0), 3, CV_AA);
}
imshow(wndname, image);
}
int main(int argc, char** argv)
{
if (argc < 2)
{
cout << "Usage: ./program <file>" << endl;
return -1;
}
// static const char* names[] = { "pic1.png", "pic2.png", "pic3.png",
// "pic4.png", "pic5.png", "pic6.png", 0 };
static const char* names[] = { argv[1], 0 };
help();
namedWindow( wndname, 1 );
vector<vector<Point> > squares;
for( int i = 0; names[i] != 0; i++ )
{
Mat image = imread(names[i], 1);
if( image.empty() )
{
cout << "Couldn't load " << names[i] << endl;
continue;
}
findSquares(image, squares);
drawSquares(image, squares);
imwrite("out.jpg", image);
int c = waitKey();
if( (char)c == 27 )
break;
}
return 0;
}
Outputs:
I would suggest that your square in this image is too thin. The first step in squares.c is to scale the image down and back up to reduce noise before passing to the Canny edge detector.
The scaling convolves with a 5x5 kernel, so in your case this could result in losing any gradient in such a thin edge.
Try making your square's edges at least 5 pixels if you are going to overlay them on a continuous background.