OpenCV Multi-threading giving errors - c++

I am running OpenCV code on multiple threads in the following fashion:
std::thread t1(runOnSingleCamera, alphaFile, featureToUse, classifier,0);
std::thread t2(runOnSingleCamera, betaFile, featureToUse, classifier,1);
std::thread t3(runOnSingleCamera, gammaFile, featureToUse, classifier,2);
std::thread t4(runOnSingleCamera, deltaFile, featureToUse, classifier,3);
t1.join();
t2.join();
t3.join();
t4.join();
This compiles fine, but when I run it I get a variety of errors, and it even occasionally works...
Here is a sample of some of the errors I get:
tom#thinkpad:~/Documents/Project/reidThermal/src$ ./main -d=1 -c=0 -f=1
Segmentation fault (core dumped)
tom#thinkpad:~/Documents/Project/reidThermal/src$ ./main -d=1 -c=0 -f=1
(betaInput.webm:8571): GLib-GObject-WARNING **: cannot register existing type 'CvImageWidget'
(betaInput.webm:8571): GLib-GObject-WARNING **: cannot register existing type 'CvImageWidget'
(betaInput.webm:8571): GLib-GObject-WARNING **: cannot register existing type 'CvImageWidget'
(betaInput.webm:8571): GLib-GObject-WARNING **: cannot register existing type 'CvImageWidget'
(betaInput.webm:8571): Gtk-CRITICAL **: IA__gtk_widget_new: assertion 'g_type_is_a (type, GTK_TYPE_WIDGET)' failed
(betaInput.webm:8571): Gtk-CRITICAL **: IA__gtk_widget_new: assertion 'g_type_is_a (type, GTK_TYPE_WIDGET)' failed
Segmentation fault (core dumped)
tom#thinkpad:~/Documents/Project/reidThermal/src$ ./main -d=1 -c=0 -f=1
(alphaInput.webm:8593): GLib-GObject-WARNING **: invalid cast from 'CvImageWidget' to 'CvImageWidget'
** (alphaInput.webm:8593): CRITICAL **: void cvImageWidget_size_allocate(GtkWidget*, GtkAllocation*): assertion 'CV_IS_IMAGE_WIDGET (widget)' failed
** (alphaInput.webm:8593): CRITICAL **: void cvImageWidget_realize(GtkWidget*): assertion 'CV_IS_IMAGE_WIDGET (widget)' failed
**
Gtk:ERROR:/build/gtk+2.0-KsZKkB/gtk+2.0-2.24.30/gtk/gtkwidget.c:8861:gtk_widget_real_map: assertion failed: (gtk_widget_get_realized (widget))
Aborted (core dumped)
tom#thinkpad:~/Documents/Project/reidThermal/src$ ./main -d=1 -c=0 -f=1
/usr/share/themes/Ambiance/gtk-2.0/gtkrc:720: Unable to find include file: "apps/ff.rc"
(betaInput.webm:8615): GLib-GObject-WARNING **: cannot register existing type 'CvImageWidget'
(betaInput.webm:8615): GLib-GObject-WARNING **: cannot register existing type 'CvImageWidget'
(betaInput.webm:8615): Gtk-CRITICAL **: IA__gtk_widget_new: assertion 'g_type_is_a (type, GTK_TYPE_WIDGET)' failed
Segmentation fault (core dumped)
Has anyone seen this before/know what is going wrong and how to fix it?
Running using gdb gives the following:
Thread 4 "main" received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7fffdb7fe700 (LWP 29317)]
0x0000000000000000 in ?? ()
I am on the most up to date version of Ubuntu using the most up to date version of OpenCV at the time of asking.
Full code below as requested, it is very long and made up of multiple elements, I imagine the issue would be early or in the declarations, or possibly an incompatability with the imshow function:
int runOnSingleCamera(String file, int featureToUse, int classifier, int cameraID)
{
//enable velocity
int timeSteps = 0;
string windowName = file; // window name
Mat img, outputImage, foreground; // image objects
VideoCapture cap;
bool keepProcessing = true; // loop control flag
unsigned char key; // user input
int EVENT_LOOP_DELAY = 40; // delay for GUI window, 40 ms equates to 1000ms/25fps = 40ms per frame
vector<vector<Point> > contours;
vector<Vec4i> hierarchy;
int width = 40;
int height = 100;
int learning = 1000;
int padding = 40;
// if command line arguments are provided try to read image/video_name
// otherwise default to capture from attached H/W camera
if((cap.open(file) == true))
{
// create window object (use flag=0 to allow resize, 1 to auto fix size)
namedWindow(windowName, 1);
// create background / foreground Mixture of Gaussian (MoG) model
Ptr<BackgroundSubtractorMOG2> MoG = createBackgroundSubtractorMOG2(500,25,false);
HOGDescriptor hog;
hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());
CascadeClassifier cascade = CascadeClassifier(CASCADE_TO_USE);
Ptr<SuperpixelSEEDS> seeds;
// start main loop
while(keepProcessing)
{
int64 timeStart = getTickCount();
if (cap.isOpened())
{
cap >> img;
if(img.empty())
{
std::cerr << "End of video file reached" << std::endl;
exit(0);
}
outputImage = img.clone();
cvtColor(img, img, CV_BGR2GRAY);
}
else
{
// if not a capture object set event delay to zero so it waits
// indefinitely (as single image file, no need to loop)
EVENT_LOOP_DELAY = 0;
}
// update background model and get background/foreground
MoG->apply(img, foreground, (double)(1.0/learning));
//imshow("old foreground", foreground);
/////////////////////////////////////////////////////////////////////////////////SUPERPIXELS
int useSuperpixels = 0;
if(useSuperpixels == 1)
{
Mat seedMask, labels, result;
result = img.clone();
int width = img.size().width;
int height = img.size().height;
seeds = createSuperpixelSEEDS(width, height, 1, 2000, 10, 2, 5, true);
seeds->iterate(img, 10);
seeds->getLabels(labels);
vector<int> counter(seeds->getNumberOfSuperpixels(),0);
vector<int> numberOfPixelsPerSuperpixel(seeds->getNumberOfSuperpixels(),0);
vector<bool> useSuperpixel(seeds->getNumberOfSuperpixels(),false);
for(int i = 0; i<foreground.rows; i++)
{
for(int j = 0; j<foreground.cols; j++)
{
numberOfPixelsPerSuperpixel[labels.at<int>(i,j)] += 1;
if(foreground.at<unsigned char>(i,j)==255)
{
counter[labels.at<int>(i,j)] += 1;
}
}
}
for(int i = 0; i<counter.size(); i++)
{
if(counter[i]/numberOfPixelsPerSuperpixel[i] > 0.0001)
{
useSuperpixel[i] = true;
}
}
for(int i = 0; i<foreground.rows; i++)
{
for(int j = 0; j<foreground.cols; j++)
{
if(useSuperpixel[labels.at<int>(i,j)] == true)
{
foreground.at<unsigned char>(i,j) = 255;
}
else
{
foreground.at<unsigned char>(i,j) = 0;
}
}
}
}
/////////////////////////////////////////////////////////////////////////////////
else
{
// perform erosion - removes boundaries of foreground object
erode(foreground, foreground, Mat(),Point(),1);
// perform morphological closing
dilate(foreground, foreground, Mat(),Point(),5);
erode(foreground, foreground, Mat(),Point(),1);
}
//imshow("foreground", foreground);
// get connected components from the foreground
findContours(foreground, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE);
// iterate through all the top-level contours,
// and get bounding rectangles for them (if larger than given value)
for(int idx = 0; idx >=0; idx = hierarchy[idx][0])
{
Rect r = boundingRect(contours[idx]);
// adjust bounding rectangle to be padding% larger
// around the object
r.x = max(0, r.x - (int) (padding/100.0 * (double) r.width));
r.y = max(0, r.y - (int) (padding/100.0 * (double) r.height));
r.width = min(img.cols - 1, (r.width + 2 * (int) (padding/100.0 * (double) r.width)));
r.height = min(img.rows - 1, (r.height + 2 * (int) (padding/100.0 * (double) r.height)));
// draw rectangle if greater than width/height constraints and if
// also still inside image
if ((r.width >= width) && (r.height >= height) && (r.x + r.width < img.cols) && (r.y + r.height < img.rows))
{
vector<Rect> found, found_filtered;
Mat roi = outputImage(r);
if (classifier == 1)
{
//changing last parameter helps deal with multiple rectangles per person
if (cameraID == 3)
{
hog.detectMultiScale(roi, found, 0, Size(8,8), Size(32,32), 1.05, 5);
}
else
{
hog.detectMultiScale(roi, found, 0, Size(8,8), Size(64,64), 1.05, 5);
}
}
else
{
if (cameraID == 3)
{
cascade.detectMultiScale(roi, found, 1.1, 4, CV_HAAR_DO_CANNY_PRUNING, cvSize(32,32));
}
else
{
cascade.detectMultiScale(roi, found, 1.1, 4, CV_HAAR_DO_CANNY_PRUNING, cvSize(64,64));
}
}
for(size_t i = 0; i < found.size(); i++ )
{
Rect rec = found[i];
rec.x += r.x;
rec.y += r.y;
size_t j;
// Do not add small detections inside a bigger detection.
for ( j = 0; j < found.size(); j++ )
{
if ( j != i && (rec & found[j]) == rec )
{
break;
}
}
if (j == found.size())
{
found_filtered.push_back(rec);
}
}
for (size_t i = 0; i < found_filtered.size(); i++)
{
Rect rec = found_filtered[i];
// The HOG/Cascade detector returns slightly larger rectangles than the real objects,
// so we slightly shrink the rectangles to get a nicer output.
rec.x += rec.width*0.1;
rec.width = rec.width*0.8;
rec.y += rec.height*0.1;
rec.height = rec.height*0.8;
// rectangle(img, rec.tl(), rec.br(), cv::Scalar(0,255,0), 3);
Point2f center = Point2f(float(rec.x + rec.width/2.0), float(rec.y + rec.height/2.0));
Mat regionOfInterest;
Mat regionOfInterestOriginal = img(rec);
//Mat regionOfInterestOriginal = img(r);
Mat regionOfInterestForeground = foreground(rec);
//Mat regionOfInterestForeground = foreground(r);
bitwise_and(regionOfInterestOriginal, regionOfInterestForeground, regionOfInterest);
Mat clone = regionOfInterest.clone();
resize(clone, regionOfInterest, Size(64,128), CV_INTER_CUBIC);
imshow("roi", regionOfInterest);
double huMoments[7];
vector<double> hu(7);
Mat hist;
vector<float> descriptorsValues;
Mat feature;
if(featureToUse == 1) //HuMoments
{
vector<vector<Point> > contoursHu;
vector<Vec4i> hierarchyHu;
findContours(regionOfInterest, contoursHu, hierarchyHu, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE);
double largestSize,size;
int largestContour;
for(int i = 0; i < contoursHu.size(); i++)
{
size = contoursHu[i].size();
if(size > largestSize)
{
largestSize = size;
largestContour = i;
}
}
Moments contourMoments;
contourMoments = moments(contoursHu[largestContour]);
HuMoments(contourMoments, huMoments);
hu.assign(huMoments,huMoments+7);
feature = Mat(hu);
feature = feature.t();
}
else if(featureToUse == 2) //HistogramOfIntensities
{
int histSize = 16; // bin size - need to determine which pixel threshold to use
float range[] = {0,255};
const float *ranges[] = {range};
int channels[] = {0, 1};
calcHist(&regionOfInterest, 1, channels, Mat(), hist, 1, &histSize, ranges, true, false);
feature = hist.clone();
feature = feature.t();
}
else if(featureToUse == 3) //HOG
{
//play with these parameters to change HOG size
cv::HOGDescriptor descriptor(Size(64, 128), Size(16, 16), Size(16, 16), Size(16, 16), 4, -1, 0.2, true, 64);
descriptor.compute(regionOfInterest, descriptorsValues);
feature = Mat(descriptorsValues);
feature = feature.t();
}
else if(featureToUse == 4) //Correlogram
{
Mat correlogram(8,8,CV_64F);
Mat occurances(8,8,CV_8U);
int xIntensity, yIntensity;
for(int i = 0; i<regionOfInterest.rows; i++)
{
for(int j = 0; j<regionOfInterest.cols; j++)
{
xIntensity = floor(regionOfInterest.at<unsigned char>(i,j)/32);
for(int k = i; k<regionOfInterest.rows; k++)
{
for(int l = 0; l<regionOfInterest.cols; l++)
{
if((k == i && l > j) || k > i)
{
yIntensity = floor(regionOfInterest.at<unsigned char>(k,l)/32);
correlogram.at<double>(xIntensity,yIntensity) += (norm(Point(i,j)-Point(k,l)));
correlogram.at<double>(yIntensity,xIntensity) += (norm(Point(i,j)-Point(k,l)));
occurances.at<unsigned char>(xIntensity,yIntensity) += 1;
occurances.at<unsigned char>(yIntensity,xIntensity) += 1;
}
}
}
}
}
//average it out
for(int i = 0; i<correlogram.rows; i++)
{
for(int j = 0; j<correlogram.cols; j++)
{
correlogram.at<double>(i,j) = occurances.at<unsigned char>(i,j);
}
}
feature = correlogram.reshape(1,1);
}
else if(featureToUse == 5) //Flow
{
}
feature.convertTo(feature, CV_64F);
normalize(feature, feature, 1, 0, NORM_L1, -1, Mat());
cout << "New Feature" << endl << feature << endl;
//classify first target
if(targets.size() == 0) //if first target found
{
Person person(0, center.x, center.y, timeSteps, rec.width, rec.height);
person.kalmanCorrect(center.x, center.y, timeSteps, rec.width, rec.height);
Rect p = person.kalmanPredict();
person.updateFeatures(feature);
person.setCurrentCamera(cameraID);
rectangle(outputImage, p.tl(), p.br(), cv::Scalar(255,0,0), 3);
char str[200];
sprintf(str,"Person %d",person.getIdentifier());
putText(outputImage, str, center, FONT_HERSHEY_SIMPLEX,1,(0,0,0));
targets.push_back(person);
}
else
{
vector<double> mDistances;
bool singleEntry = false;
for(int i = 0; i<targets.size(); i++)
{
if(targets[i].getFeatures().rows == 1)
{
singleEntry = true;
}
}
for(int i = 0; i<targets.size(); i++)
{
Mat covar, mean;
Mat data = targets[i].getFeatures();
calcCovarMatrix(data,covar,mean,CV_COVAR_NORMAL|CV_COVAR_ROWS);
// cout << i << " data" << endl << data << endl;
// cout << i << " Covar" << endl << covar << endl;
// cout << i << " mean" << endl << mean << endl;
double mDistance;
if(singleEntry == false)
{
Mat invCovar;
invert(covar,invCovar,DECOMP_SVD);
mDistance = Mahalanobis(feature,mean,invCovar);
cout << i << " Mahalanobis Distance" << endl << mDistance << endl;
}
else
{
mDistance = norm(feature,mean,NORM_L1);
cout << i << " Norm Distance" << endl << mDistance << endl;
}
mDistances.push_back(mDistance);
}
Mat test = Mat(mDistances);
cout << "Distances" << endl << test << endl;
double sum = 0.0;
for(int i = 0; i<mDistances.size(); i++)
{
sum += mDistances[i];
}
for(int i = 0; i<mDistances.size(); i++)
{
mDistances[i] = sum/mDistances[i];
}
normalize(mDistances,mDistances,1,0,NORM_L1,-1,Mat());
Mat probabilities = Mat(mDistances);
cout << "Probabilities" << endl << probabilities << endl;
//special case to classify second target
if(targets.size() == 1)
{
if(fabs(center.x-targets[0].getLastPosition().x)<100 and fabs(center.y-targets[0].getLastPosition().y)<100)
{
targets[0].kalmanCorrect(center.x, center.y, timeSteps, rec.width, rec.height);
Rect p = targets[0].kalmanPredict();
targets[0].updateFeatures(feature);
targets[0].setCurrentCamera(cameraID);
rectangle(outputImage, p.tl(), p.br(), cv::Scalar(255,0,0), 3);
char str[200];
sprintf(str,"Person %d",targets[0].getIdentifier());
putText(outputImage, str, center, FONT_HERSHEY_SIMPLEX,1,(0,0,0));
}
else
{
Person person(1, center.x, center.y, timeSteps, rec.width, rec.height);
person.kalmanCorrect(center.x, center.y, timeSteps, rec.width, rec.height);
Rect p = person.kalmanPredict();
person.updateFeatures(feature);
person.setCurrentCamera(cameraID);
rectangle(outputImage, p.tl(), p.br(), cv::Scalar(255,0,0), 3);
char str[200];
sprintf(str,"Person %d",person.getIdentifier());
putText(outputImage, str, center, FONT_HERSHEY_SIMPLEX,1,(0,0,0));
targets.push_back(person);
}
}
else
{
double greatestProbability = 0.0;
int identifier = 0;
double min, max;
Point min_loc, max_loc;
minMaxLoc(probabilities, &min, &max, &min_loc, &max_loc);
greatestProbability = max;
identifier = max_loc.y;
cout << greatestProbability << " at " << identifier << endl;
if(greatestProbability >= 0.5)
{
targets[identifier].kalmanCorrect(center.x, center.y, timeSteps, rec.width, rec.height);
Rect p = targets[identifier].kalmanPredict();
targets[identifier].updateFeatures(feature);
targets[identifier].setCurrentCamera(cameraID);
rectangle(outputImage, p.tl(), p.br(), cv::Scalar(255,0,0), 3);
char str[200];
sprintf(str,"Person %d",targets[identifier].getIdentifier());
putText(outputImage, str, center, FONT_HERSHEY_SIMPLEX,1,(0,0,0));
}
else
{
int identifier = targets.size();
Person person(identifier, center.x, center.y, timeSteps, rec.width, rec.height);
person.kalmanCorrect(center.x, center.y, timeSteps, rec.width, rec.height);
Rect p = person.kalmanPredict();
person.updateFeatures(feature);
person.setCurrentCamera(cameraID);
rectangle(outputImage, p.tl(), p.br(), cv::Scalar(255,0,0), 3);
char str[200];
sprintf(str,"Person %d",person.getIdentifier());
putText(outputImage, str, center, FONT_HERSHEY_SIMPLEX,1,(0,0,0));
targets.push_back(person);
}
}
}
}
rectangle(outputImage, r, Scalar(0,0,255), 2, 8, 0);
}
}
// display image in window
imshow(windowName, outputImage);
key = waitKey((int) std::max(2.0, EVENT_LOOP_DELAY - (((getTickCount() - timeStart) / getTickFrequency())*1000)));
if (key == 'x')
{
// if user presses "x" then exit
std::cout << "Keyboard exit requested : exiting now - bye!" << std::endl;
keepProcessing = false;
}
timeSteps += 1;
}
// the camera will be deinitialized automatically in VideoCapture destructor
// all OK : main returns 0
return 0;
}
// not OK : main returns -1
return -1;
}

What's happening is that you're abusing some feature of OpenCV that doesn't support running in a multi-threaded environment, or else you're not making proper use of control mechanisms such as mutexes and monitors to restrict access to critical sections of your code to one thread at a time. We won't be able to tell you what you're doing wrong unless you share more of your code though. From your log it seems as though some sort of initialization is being run more than once.
From your code and your log, two things come to mind:
Are you accidentally trying to access video capture hardware on multiple threads?
Maybe creating a new window causes initialization of something in GTK. Try to create your windows on the main thread, see if it helps. Note that OpenCV or not, having more than one thread for your UI is a bad idea.
If none of this helps, try adding some log output to your code so we can be sure which line is causing the errors.
So it was #2 after all. To fix it, you must move all the namedWindow calls to the main thread. Afterwards, if it still fails on the imshow calls, you'll have to move that to the main thread as well. You'll need a condition variable for each thread, and global variables which the threads write to and the main thread uses to update the windows. I'd provide the code, but I don't know much about c++ concurrency. You can read more about this task here: waiting thread until a condition has been occurred

Related

Color to grayscale conversion

Im using a C++ openCV program for first principles Algorithm development for HDL(Verilog) image object detection. I've finally managed to get HDL version up to the point of canny detection. In order to validate the two, both need to have identical output. I have found their are subtle differences that I thing are being contributed to by the openCV imread colour to grayscale conversion biasing green. The smoothed image is overall brighter in the openCV C++ method. From looking at the rgb2gray method it appears openCV used a bias ie (RX+GY+B*Z)/3 while in HDL I have been using (R+G+B)/3 as I require it to complete Gaussian, Sobel and Canny filters. Human visualisation is secondary and multiplication by a non-int is undesirable.
Is there a standard linear grayscale conversion for conversion or a means to override the existing method?
...
int main()
{
int thold = 15;
clock_t start;
double duration;
const int sobelX[3][3] = { {-1, 0, 1}, {-2, 0, 2}, {-1, 0, 1} }; //Where origionally floats in python
const int sobelY[3][3] = { {-1, -2, -1}, {0, 0, 0}, {1, 2, 1} }; //Where origionally floats in python
const int kernel[5][5] = { {1,6,12,6,1},
{6,42,79,42,6},
{12,79,148,79,12},
{6,42,79,42,6},
{1,6,12,6,1} };// 1/732
// Above normalised kernal for smoothing, see origional python script for method
start = std::clock();
int height, width, intPixel, tSx, tSy, tS, dirE, dirEE, maxDir, curPoint, contDirection, cannyImgPix, nd, tl, tm, tr, mr, br, bm, bl, ml = 0;
int contNum = 128;
int contPixCount = 0;
int curContNum = 0;
int contPlace = 0;
int oldContPlace = 0;
int g = 0;
bool maxPoint;
struct pixel {
int number;
int h;
int w;
};
std::vector<pixel> contourList;
//double floatPixel = 0.0;
int kernalCumulator = 0;
const int mp = 3;
// Scalar color(0, 0, 255);
// duration = ((clock()) - start) / (double)CLOCKS_PER_SEC;
// start = clock();
// cout << "Start image in" << duration << '\n';
// Mat dst;
Mat rawImg = imread("C:\\Users\\&&&\\Documents\\pycode\\paddedGS.png",0);
printf("%d",rawImg.type());
// Mat rawImg = imread("C:\\Users\\&&&\\Documents\\openCV_Master\\openCVexample\\openCVexample\\brace200.jpg ", 0);
height = rawImg.rows;
width = rawImg.cols;
cout << "Height of image " << height << '\n';
cout << "Width of image " << width << '\n';
Mat filteredImg = Mat::zeros(height, width, CV_8U);
printf("%d", filteredImg.type());
Mat sobelImg = Mat::zeros(height, width, CV_8U);
Mat directionImg = Mat::zeros(height, width, CV_8U);
Mat cannyImg = Mat::zeros(height, width, CV_8U);
Mat contourImg = Mat::zeros(height, width, CV_16U);
// rawImg.convertTo(rawImg, CV_8UC1);
duration = ((clock()) - start) / (double)CLOCKS_PER_SEC;
start = clock();
cout << "Start image in" << duration << '\n';
// Loop to threshold already grayscaled image
/*
for (int h = 0; h < (height); h++)
{
for (int w = 0; w < (width); w++)
{
g = (int)rawImg.at<uchar>(h, w,0);
cout << g << "g";
g+= (int)rawImg.at<uchar>(h, w, 1);
cout << g << "g";
g+= (int)rawImg.at<uchar>(h, w, 2);
cout << g << "g";
g = g/3;
rawGImg.at<uchar>(h,w) = g;
}
}
*/
// imshow("thresholded Image", rawImg);
// waitKey();
// Loop to smooth using Gausian 5 x 5 kernal
// imshow("raw Image", rawImg);
for (int h = 3; h < (height - 3); h++)
{
for (int w = 3; w < (width - 3); w++)
{
if (rawImg.at<uchar>(h, w) >=6 )//Thresholding included
{
for (int xk = 0; xk < 5; xk++)
{
for (int yk = 0; yk < 5; yk++)
{
intPixel = rawImg.at<uchar>((h + (xk - mp)), (w + (yk - mp)));
kernalCumulator += intPixel*(kernel[xk][yk]);//Mutiplier required as rounding is making number go above 255, better solution?
}
}
}
else
kernalCumulator = 0;
kernalCumulator = kernalCumulator / 732;
if (kernalCumulator < 0 || kernalCumulator > 255)
{
// cout << "kernal Value: " << kernalCumulator;
// cout << " intPixel:" << intPixel << '\n';
}
filteredImg.at<uchar>(h, w) = (uchar)kernalCumulator;
kernalCumulator = 0;
}
}
Our vision does not perceive linearly the brightness, so it makes sense for usual applications to use some sort of transformation that tries to mimic the human perception.
For your application, you have 2 options: either use a similar transformation in HDL (which might not be easy or desired), or make a custom rgb to grayscale for OpenCV which uses the same transformation you use.
A short snippet (more like pseudocode, you'll have to figure out the details) for this would be something like:
cv::Mat linearRgbToGray(const cv::Mat &color) {
cv::Mat gray(color.size(), CV_8UC1);
for (int i = 0; i < color.rows; i++)
for (int j = 0; j < color.cols; j++)
gray.at(i, j) = (color.at(i, j)[0] + color.at(i, j)[1] + color.at(i, j)[2]) / 3;
}
As per Paul92's advice above
cv::Mat linearRgbToGray(const cv::Mat &color) {
cv::Mat gray(color.size(), CV_8UC1);
for (int i = 0; i < color.rows; i++)
for (int j = 0; j < color.cols; j++)
gray.at<uchar>(i, j) = ((color.at<cv::Vec3b>(i, j)[0] + color.at<cv::Vec3b>(i, j)[1] + color.at<cv::Vec3b>(i, j)[2]) / 3);
return gray;
}
The above code worked and overcame out of bounds errors I experienced earlier. Thank you, Rob.

Compute coordinates from source images after stitching

I use an algorithm of panorama stitching from opencv, in order to stitch 2 or 3 images into one new result image.
I have coordinates of points in each source image. I need to calculate what are the new coordinates for these points in the result image.
I describe below the algorithm. My code is similar to a sample "stitching_detailed" from opencv (branch 3.4). A result_mask of type Mat is produced, maybe it is the solution? But I don't know how to use it. I found a related question here but not on stitching.
Any idea?
Here is the algorithm (for detailed code: stitching_detailed.cpp):
Find features for each image:
Ptr<FeaturesFinder> finder = makePtr<SurfFeaturesFinder>()
vector<ImageFeatures> features(num_images);
for (int i = 0; i < num_images; ++i)
{
(*finder)(images[i], features[i]);
}
Make pairwise_matches:
vector<MatchesInfo> pairwise_matches;
Ptr<FeaturesMatcher> matcher = makePtr<BestOf2NearestMatcher>(false, match_conf);
(*matcher)(features, pairwise_matches);
Reorder the images:
vector<int> indices = leaveBiggestComponent(features, pairwise_matches, conf_thresh);
# here some code to reorder 'images'
Estimate an homography in cameras:
vector<CameraParams> cameras;
Ptr<Estimator> estimator = makePtr<HomographyBasedEstimator>();
(*estimator)(features, pairwise_matches, cameras);
Convert to CV_32F:
for (size_t i = 0; i < cameras.size(); ++i)
{
Mat R;
cameras[i].R.convertTo(R, CV_32F);
cameras[i].R = R;
}
Execute a BundleAdjuster:
Ptr<detail::BundleAdjusterBase> adjuster = makePtr<detail::BundleAdjusterRay>();
adjuster->setConfThresh(conf_thresh);
adjuster->setRefinementMask(refine_mask);
(*adjuster)(features, pairwise_matches, cameras);
Compute a value for warped_image_scale:
for (int i = 0; i < cameras.size(); ++i)
focals.push_back(cameras[i].focal);
float warped_image_scale = static_cast<float>(focals[focals.size() / 2 - 1] + focals[focals.size() / 2]) * 0.5f;
Do wave correction:
vector<Mat> rmats;
for (size_t i = 0; i < cameras.size(); ++i)
rmats.push_back(cameras[i].R.clone());
waveCorrect(rmats, wave_correct);
for (size_t i = 0; i < cameras.size(); ++i)
cameras[i].R = rmats[i];
Create a warper:
Ptr<WarperCreator> warper_creator = makePtr<cv::SphericalWarper>();
Ptr<RotationWarper> warper = warper_creator->create(static_cast<float>(warped_image_scale * seam_work_aspect));
Create a blender and feed it:
Ptr<Blender> blender;
for (size_t i = 0; i < cameras.size(); ++i)
{
full_img = input_imgs[img_idx];
if (!is_compose_scale_set)
{
is_compose_scale_set = true;
compose_scale = /* … */
}
if (abs(compose_scale - 1) > 1e-1)
resize(full_img, img, Size(), compose_scale, compose_scale, INTER_LINEAR_EXACT);
else
img = full_img;
// Warp the current image
warper->warp(img, K, cameras[img_idx].R, INTER_LINEAR, BORDER_REFLECT, img_warped);
// Warp the current image mask
mask.create(img_size, CV_8U);
mask.setTo(Scalar::all(255));
warper->warp(mask, K, cameras[img_idx].R, INTER_NEAREST, BORDER_CONSTANT, mask_warped);
// Compensate exposure
compensator->apply(img_idx, corners[img_idx], img_warped, mask_warped);
dilate(masks_warped[img_idx], dilated_mask, Mat());
resize(dilated_mask, seam_mask, mask_warped.size(), 0, 0, INTER_LINEAR_EXACT);
mask_warped = seam_mask & mask_warped;
if (!blender)
{
blender = Blender::createDefault(blend_type, try_gpu);
Size dst_sz = resultRoi(corners, sizes).size();
float blend_width = sqrt(static_cast<float>(dst_sz.area())) * blend_strength / 100.f;
MultiBandBlender *mb = dynamic_cast<MultiBandBlender *>(blender.get());
mb->setNumBands(static_cast<int>(ceil(log(blend_width) / log(2.)) - 1.));
blender->prepare(corners, sizes);
}
// Blend the current image
blender->feed(img_warped_s, mask_warped, corners[i]);
}
Then, use the blender:
Mat result, result_mask;
blender->blend(result, result_mask);
// The result image is in 'result'
When I was a school boy, I foundopencv/samples/cpp/stitching_detailed.cpp in OpenCV samples folder. At that time, my programming skills were very poor. I can't understand it even though I racked my brains. This question attracts my attention, and arouses my memory. After a whole night of hard work and debugging, I finally get it.
Basic steps:
Given the three images: blue.png, green.png, and red.png
We can get the stitching result(result.png) using the stitching_detailed.cpp.
.
blender->blend(result, result_mask);
imwrite("result.png", result);
imwrite("result_mask.png", result_mask);
I choose the centers from the three images, and calculate the corresponding coordinates (warped) on the stitching image, and draw in solid as follow:
Warping images (auxiliary)...
Compensating exposure...
Blending ...
Warp each center point, and draw solid circle.
[408, 204] => [532, 224]
[408, 204] => [359, 301]
[408, 204] => [727, 320]
Check `result.png`, `result_mask.png` and `result2.png`!
Done!
This is the function calcWarpedPoint I wrote to calculate the warped point on the stitching image:
cv::Point2f calcWarpedPoint(
const cv::Point2f& pt,
InputArray K, // Camera K parameter
InputArray R, // Camera R parameter
Ptr<RotationWarper> warper, // The Rotation Warper
const std::vector<cv::Point> &corners,
const std::vector<cv::Size> &sizes)
{
// Calculate the wrapped point using camera parameter.
cv::Point2f dst = warper->warpPoint(pt, K, R);
// Calculate the stitching image roi using corners and sizes.
// the corners and sizes have already been calculated.
cv::Point2f tl = cv::detail::resultRoi(corners, sizes).tl();
// Finally adjust the wrapped point to the stitching image.
return cv::Point2f(dst.x - tl.x, dst.y - tl.y);
}
This is example code snippet:
std::cout << "\nWarp each center point, and draw solid circle.\n";
std::vector<cv::Scalar> colors = { {255,0,0}, {0, 255, 0}, {0, 0, 255} };
for (int idx = 0; idx < img_names.size(); ++idx) {
img = cv::imread(img_names[idx]);
Mat K;
cameras[idx].K().convertTo(K, CV_32F);
Mat R = cameras[idx].R;
cv::Point2f cpt = cv::Point2f(img.cols / 2, img.rows / 2);
cv::Point pt = calcWarpedPoint(cpt, K, R, warper, corners, sizes);
cv::circle(result, pt, 5, colors[idx], -1, cv::LINE_AA);
std::cout << cpt << " => " << pt << std::endl;
}
std::cout << "\nCheck `result.png`, `result_mask.png` and `result2.png`!\n";
imwrite("result2.png", result);
The full code:
/*
* Author : Kinght-金(https://stackoverflow.com/users/3547485/)
* Created : 2019/03/01 23:00 (CST)
* Finished : 2019/03/01 07:50 (CST)
*
* Modified on opencv401/samples/cpp/stitching_detailed.cpp
* From https://github.com/opencv/opencv/blob/4.0.1/samples/cpp/stitching_detailed.cpp
*
*
* Description: A simple opencv(4.0.1) image stitching code for Stack Overflow answers.
* For https://stackoverflow.com/questions/54904718/compute-coordinates-from-source-images-after-stitching/54953792#comment96681412_54953792
*
*/
#include <iostream>
#include <fstream>
#include <string>
#include "opencv2/opencv_modules.hpp"
#include <opencv2/core/utility.hpp>
#include "opencv2/imgcodecs.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/stitching/detail/autocalib.hpp"
#include "opencv2/stitching/detail/blenders.hpp"
#include "opencv2/stitching/detail/camera.hpp"
#include "opencv2/stitching/detail/exposure_compensate.hpp"
#include "opencv2/stitching/detail/matchers.hpp"
#include "opencv2/stitching/detail/motion_estimators.hpp"
#include "opencv2/stitching/detail/seam_finders.hpp"
#include "opencv2/stitching/detail/warpers.hpp"
#include "opencv2/stitching/warpers.hpp"
using namespace std;
using namespace cv;
using namespace cv::detail;
//! img_names are the input image (full) paths
// You can download from using the links from the answer.
//! Blue: https://i.stack.imgur.com/Yz3U1.png
//! Green: https://i.stack.imgur.com/AbUTH.png
//! Red: https://i.stack.imgur.com/9wcGc.png
vector<String> img_names = {"D:/stitching/blue.png", "D:/stitching/green.png", "D:/stitching/red.png"};
//! The function to calculate the warped point on the stitching image.
cv::Point2f calcWarpedPoint(
const cv::Point2f& pt,
InputArray K, // Camera K parameter
InputArray R, // Camera R parameter
Ptr<RotationWarper> warper, // The Rotation Warper
const std::vector<cv::Point> &corners,
const std::vector<cv::Size> &sizes)
{
// Calculate the wrapped point
cv::Point2f dst = warper->warpPoint(pt, K, R);
// Calculate the stitching image roi using corners and sizes,
// the corners and sizes have already been calculated.
cv::Point2f tl = cv::detail::resultRoi(corners, sizes).tl();
// Finally adjust the wrapped point
return cv::Point2f(dst.x - tl.x, dst.y - tl.y);
}
int main(int argc, char* argv[])
{
double work_megapix = 0.6;
double seam_megapix = 0.1;
double compose_megapix = -1;
float conf_thresh = 1.f;
float match_conf = 0.3f;
float blend_strength = 5;
// Check if have enough images
int num_images = static_cast<int>(img_names.size());
if (num_images < 2)
{
std::cout << "Need more images\n";
return -1;
}
double work_scale = 1, seam_scale = 1, compose_scale = 1;
bool is_work_scale_set = false, is_seam_scale_set = false, is_compose_scale_set = false;
//(1) 创建特征查找器
Ptr<Feature2D> finder = ORB::create();
// (2) 读取图像,适当缩放,并计算图像的特征描述
Mat full_img, img;
vector<ImageFeatures> features(num_images);
vector<Mat> images(num_images);
vector<Size> full_img_sizes(num_images);
double seam_work_aspect = 1;
for (int i = 0; i < num_images; ++i)
{
full_img = imread(img_names[i]);
full_img_sizes[i] = full_img.size();
if (full_img.empty())
{
cout << "Can't open image " << img_names[i] << std::endl;
return -1;
}
if (!is_work_scale_set)
{
work_scale = min(1.0, sqrt(work_megapix * 1e6 / full_img.size().area()));
is_work_scale_set = true;
}
resize(full_img, img, Size(), work_scale, work_scale, INTER_LINEAR_EXACT);
if (!is_seam_scale_set)
{
seam_scale = min(1.0, sqrt(seam_megapix * 1e6 / full_img.size().area()));
seam_work_aspect = seam_scale / work_scale;
is_seam_scale_set = true;
}
computeImageFeatures(finder, img, features[i]);
features[i].img_idx = i;
std::cout << "Features in image #" << i + 1 << ": " << features[i].keypoints.size() << std::endl;
resize(full_img, img, Size(), seam_scale, seam_scale, INTER_LINEAR_EXACT);
images[i] = img.clone();
}
full_img.release();
img.release();
// (3) 创建图像特征匹配器,计算匹配信息
vector<MatchesInfo> pairwise_matches;
Ptr<FeaturesMatcher> matcher = makePtr<BestOf2NearestMatcher>(false, match_conf);
(*matcher)(features, pairwise_matches);
matcher->collectGarbage();
//! (4) 剔除外点,保留最确信的大成分
// Leave only images we are sure are from the same panorama
vector<int> indices = leaveBiggestComponent(features, pairwise_matches, conf_thresh);
vector<Mat> img_subset;
vector<String> img_names_subset;
vector<Size> full_img_sizes_subset;
for (size_t i = 0; i < indices.size(); ++i)
{
img_names_subset.push_back(img_names[indices[i]]);
img_subset.push_back(images[indices[i]]);
full_img_sizes_subset.push_back(full_img_sizes[indices[i]]);
}
images = img_subset;
img_names = img_names_subset;
full_img_sizes = full_img_sizes_subset;
// Check if we still have enough images
num_images = static_cast<int>(img_names.size());
if (num_images < 2)
{
std::cout << "Need more images\n";
return -1;
}
//!(5) 估计 homography
Ptr<Estimator> estimator = makePtr<HomographyBasedEstimator>();
vector<CameraParams> cameras;
if (!(*estimator)(features, pairwise_matches, cameras))
{
cout << "Homography estimation failed.\n";
return -1;
}
for (size_t i = 0; i < cameras.size(); ++i)
{
Mat R;
cameras[i].R.convertTo(R, CV_32F);
cameras[i].R = R;
std::cout << "\nInitial camera intrinsics #" << indices[i] + 1 << ":\nK:\n" << cameras[i].K() << "\nR:\n" << cameras[i].R << std::endl;
}
//(6) 创建约束调整器
Ptr<detail::BundleAdjusterBase> adjuster = makePtr<detail::BundleAdjusterRay>();
adjuster->setConfThresh(conf_thresh);
Mat_<uchar> refine_mask = Mat::zeros(3, 3, CV_8U);
refine_mask(0, 0) = 1;
refine_mask(0, 1) = 1;
refine_mask(0, 2) = 1;
refine_mask(1, 1) = 1;
refine_mask(1, 2) = 1;
adjuster->setRefinementMask(refine_mask);
if (!(*adjuster)(features, pairwise_matches, cameras))
{
cout << "Camera parameters adjusting failed.\n";
return -1;
}
// Find median focal length
vector<double> focals;
for (size_t i = 0; i < cameras.size(); ++i)
{
focals.push_back(cameras[i].focal);
}
sort(focals.begin(), focals.end());
float warped_image_scale;
if (focals.size() % 2 == 1)
warped_image_scale = static_cast<float>(focals[focals.size() / 2]);
else
warped_image_scale = static_cast<float>(focals[focals.size() / 2 - 1] + focals[focals.size() / 2]) * 0.5f;
std::cout << "\nWarping images (auxiliary)... \n";
vector<Point> corners(num_images);
vector<UMat> masks_warped(num_images);
vector<UMat> images_warped(num_images);
vector<Size> sizes(num_images);
vector<UMat> masks(num_images);
// Preapre images masks
for (int i = 0; i < num_images; ++i)
{
masks[i].create(images[i].size(), CV_8U);
masks[i].setTo(Scalar::all(255));
}
// Warp images and their masks
Ptr<WarperCreator> warper_creator = makePtr<cv::CylindricalWarper>();
if (!warper_creator)
{
cout << "Can't create the warper \n";
return 1;
}
//! Create RotationWarper
Ptr<RotationWarper> warper = warper_creator->create(static_cast<float>(warped_image_scale * seam_work_aspect));
//! Calculate warped corners/sizes/mask
for (int i = 0; i < num_images; ++i)
{
Mat_<float> K;
cameras[i].K().convertTo(K, CV_32F);
float swa = (float)seam_work_aspect;
K(0, 0) *= swa; K(0, 2) *= swa;
K(1, 1) *= swa; K(1, 2) *= swa;
corners[i] = warper->warp(images[i], K, cameras[i].R, INTER_LINEAR, BORDER_REFLECT, images_warped[i]);
sizes[i] = images_warped[i].size();
warper->warp(masks[i], K, cameras[i].R, INTER_NEAREST, BORDER_CONSTANT, masks_warped[i]);
}
vector<UMat> images_warped_f(num_images);
for (int i = 0; i < num_images; ++i)
images_warped[i].convertTo(images_warped_f[i], CV_32F);
std::cout << "Compensating exposure... \n";
//! 计算曝光度,调整图像曝光,减少亮度差异
Ptr<ExposureCompensator> compensator = ExposureCompensator::createDefault(ExposureCompensator::GAIN_BLOCKS);
if (dynamic_cast<BlocksCompensator*>(compensator.get()))
{
BlocksCompensator* bcompensator = dynamic_cast<BlocksCompensator*>(compensator.get());
bcompensator->setNrFeeds(1);
bcompensator->setNrGainsFilteringIterations(2);
bcompensator->setBlockSize(32, 32);
}
compensator->feed(corners, images_warped, masks_warped);
Ptr<SeamFinder> seam_finder = makePtr<detail::GraphCutSeamFinder>(GraphCutSeamFinderBase::COST_COLOR);
seam_finder->find(images_warped_f, corners, masks_warped);
// Release unused memory
images.clear();
images_warped.clear();
images_warped_f.clear();
masks.clear();
Mat img_warped, img_warped_s;
Mat dilated_mask, seam_mask, mask, mask_warped;
Ptr<Blender> blender;
double compose_work_aspect = 1;
for (int img_idx = 0; img_idx < num_images; ++img_idx)
{
// Read image and resize it if necessary
full_img = imread(img_names[img_idx]);
if (!is_compose_scale_set)
{
is_compose_scale_set = true;
compose_work_aspect = compose_scale / work_scale;
// Update warped image scale
warped_image_scale *= static_cast<float>(compose_work_aspect);
warper = warper_creator->create(warped_image_scale);
// Update corners and sizes
for (int i = 0; i < num_images; ++i)
{
cameras[i].focal *= compose_work_aspect;
cameras[i].ppx *= compose_work_aspect;
cameras[i].ppy *= compose_work_aspect;
Size sz = full_img_sizes[i];
if (std::abs(compose_scale - 1) > 1e-1)
{
sz.width = cvRound(full_img_sizes[i].width * compose_scale);
sz.height = cvRound(full_img_sizes[i].height * compose_scale);
}
Mat K;
cameras[i].K().convertTo(K, CV_32F);
Rect roi = warper->warpRoi(sz, K, cameras[i].R);
corners[i] = roi.tl();
sizes[i] = roi.size();
}
}
if (abs(compose_scale - 1) > 1e-1)
resize(full_img, img, Size(), compose_scale, compose_scale, INTER_LINEAR_EXACT);
else
img = full_img;
full_img.release();
Size img_size = img.size();
Mat K, R;
cameras[img_idx].K().convertTo(K, CV_32F);
R = cameras[img_idx].R;
// Warp the current image : img => img_warped
warper->warp(img, K, cameras[img_idx].R, INTER_LINEAR, BORDER_REFLECT, img_warped);
// Warp the current image mask
mask.create(img_size, CV_8U);
mask.setTo(Scalar::all(255));
warper->warp(mask, K, cameras[img_idx].R, INTER_NEAREST, BORDER_CONSTANT, mask_warped);
compensator->apply(img_idx, corners[img_idx], img_warped, mask_warped);
img_warped.convertTo(img_warped_s, CV_16S);
img_warped.release();
img.release();
mask.release();
dilate(masks_warped[img_idx], dilated_mask, Mat());
resize(dilated_mask, seam_mask, mask_warped.size(), 0, 0, INTER_LINEAR_EXACT);
mask_warped = seam_mask & mask_warped;
if (!blender)
{
blender = Blender::createDefault(Blender::MULTI_BAND, false);
Size dst_sz = resultRoi(corners, sizes).size();
float blend_width = sqrt(static_cast<float>(dst_sz.area())) * blend_strength / 100.f;
if (blend_width < 1.f){
blender = Blender::createDefault(Blender::NO, false);
}
else
{
MultiBandBlender* mb = dynamic_cast<MultiBandBlender*>(blender.get());
mb->setNumBands(static_cast<int>(ceil(log(blend_width) / log(2.)) - 1.));
}
blender->prepare(corners, sizes);
}
blender->feed(img_warped_s, mask_warped, corners[img_idx]);
}
/* ===========================================================================*/
// Blend image
std::cout << "\nBlending ...\n";
Mat result, result_mask;
blender->blend(result, result_mask);
imwrite("result.png", result);
imwrite("result_mask.png", result_mask);
std::cout << "\nWarp each center point, and draw solid circle.\n";
std::vector<cv::Scalar> colors = { {255,0,0}, {0, 255, 0}, {0, 0, 255} };
for (int idx = 0; idx < img_names.size(); ++idx) {
img = cv::imread(img_names[idx]);
Mat K;
cameras[idx].K().convertTo(K, CV_32F);
Mat R = cameras[idx].R;
cv::Point2f cpt = cv::Point2f(img.cols / 2, img.rows / 2);
cv::Point pt = calcWarpedPoint(cpt, K, R, warper, corners, sizes);
cv::circle(result, pt, 5, colors[idx], -1, cv::LINE_AA);
std::cout << cpt << " => " << pt << std::endl;
}
std::cout << "\nCheck `result.png`, `result_mask.png` and `result2.png`!\n";
imwrite("result2.png", result);
std::cout << "\nDone!\n";
/* ===========================================================================*/
return 0;
}
Some links maybe useful:
stitching_detailed.cpp : https://github.com/opencv/opencv/blob/4.0.1/samples/cpp/stitching_detailed.cpp
waper->warp(), warpPoint(), warpRoi() https://github.com/opencv/opencv/blob/master/modules/stitching/src/warpers.cpp#L153
resultRoi() https://github.com/opencv/opencv/blob/master/modules/stitching/src/util.cpp#L116
Other links maybe interesting:
Converting opencv remap code from c++ to python
Split text lines in scanned document
How do I use the relationships between Flann matches to determine a sensible homography?

Segmentation fault after upgrading to openCV 3.4.1 from 3.2

After upgrading from openCV 3.2 to openCV 3.4.1 I get Segmentation fault when running my program
Full source code below (I'm not a C expert)
running the program ./txtbin input.png output.png
Hope someone would be helpful :)
error
> Error: signal 11:
> #1 ./txtbin(_Z6bTracei+0x1c) [0x5596628d8e68]
> #2 /lib/x86_64-linux-gnu/libc.so.6(+0x33060) [0x7fa1b8481060]
> #3 /usr/local/lib/libopencv_world.so.3.4(+0xa3f7da) [0x7fa1b9ac97da]
> #4 /usr/local/lib/libopencv_world.so.3.4(+0xa2782d) [0x7fa1b9ab182d]
> #5 /usr/local/lib/libopencv_world.so.3.4(_ZN2cv7imwriteERKNS_6StringERKNS_11_InputArrayERKSt6vectorIiSaIiEE+0x8e) [0x7fa1b9ab4a4e]
> #6 ./txtbin(_ZN6Txtbin8binarizeEv+0x12d) [0x5596628d67c3]
> #7 ./txtbin(_ZN6Txtbin3runEv+0x698) [0x5596628d8c10]
> #8 ./txtbin(main+0x3fa) [0x5596628d92db]
> #9 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf1) [0x7fa1b846e2e1]
input.png
txtbin.cpp
/*
* Compile
* # g++ -rdynamic -std=c++11 txtbin.cpp -o txtbin `pkg-config opencv --cflags --libs`
*
* Get opencv version
* # pkg-config --modversion opencv
*/
#include <signal.h>
#include "../backtrace/backtrace.h"
#include "txtbin.hpp"
Backtrace b;
void bTrace(int sig){
b.trace(sig);
}
void usage(const std::string VERSION){
std::cout << "txtbin: " << VERSION << "\nOpenCV: " << CV_VERSION << "\n\nUsage: txtbin input [options] output\n"
"Options:\n"
"\t-w <number> -- Set max width (keeps aspect ratio)\n"
"\t-h <number> -- Set max height (keeps aspect ratio)\n"
"\t-c -- Crop text contours\n"
"\t-r -- Rotate contents (deskew)\n"
"\t-m <number> -- Set margins (%)\n"
"\t-b <number> -- Set blockside (pixel)\n"
"\t Default: 9 pixel\n"
"\t-t <number> -- Set threshold (%)\n"
"\t Default: 85 %\n"
"\t-v -- Verbose\n" << std::endl;
}
int main(int argc, char* argv[]){
signal(SIGSEGV, &bTrace);
Txtbin a;
try{
// Parse arguments
for(int i = 1; i < argc; i++){
std::string arg = std::string(argv[i]);
if(i == 1){
a.set_input(arg);
}
else if(arg == "-w"){
a.set_width(atoi(argv[++i]));
}
else if(arg == "-h"){
a.set_height(atoi(argv[++i]));
}
else if(arg == "-c"){
a.set_crop(true);
}
else if(arg == "-r"){
a.set_rotate(true);
}
else if(arg == "-m"){
a.set_margin(atoi(argv[++i]));
}
else if(arg == "-b"){
a.set_blockside(atoi(argv[++i]));
}
else if(arg == "-t"){
a.set_threshold(atoi(argv[++i]));
}
else if(arg == "-v"){
a.set_verbose(true);
}
else if(i == argc - 1){
a.set_output(arg);
}
else{
throw std::runtime_error("Argument '"+arg+"' is invalid");
}
}
a.run();
}
catch(std::exception& e){
std::cerr << "Error: " << e.what() << "\n" << std::endl;
usage(a.VERSION);
return 1;
}
return 0;
}
txtbin.h
struct textblock{
int left = 0;
int top = 0;
int right = 0;
int bottom = 0;
};
class Txtbin{
private:
std::string input = "";
std::string output = "output.png";
int max_width = 0;
int max_height = 0;
bool is_crop = false;
bool is_rotate = false;
float margin = 0;
int blockside = 9; // set greater for larger fonts in image and vice versa
bool is_verbose = false;
float contrast = 0.01; // set smaller for lower contrast image
float thresholding = 0.85;
void binarize ();
cv::Mat src;
cv::Mat calc_block_mean_variance(cv::Mat& img);
textblock detect_text_block (bool test_output);
void downsize ();
void crop (textblock coords);
void error (const std::string& s);
public:
Txtbin();
const std::string VERSION = "0.3.6";
void set_input (const std::string& s);
void set_output (const std::string& s);
void set_height (int h);
void set_width (int w);
void set_crop (bool c);
void set_rotate (bool r);
void set_margin (float m);
void set_blockside (int b);
void set_threshold (int t);
void set_verbose (bool v);
void run ();
};
txtbin.hpp
#include <iostream>
#include <fstream>
#include <chrono>
#include <boost/algorithm/string.hpp>
#include "/usr/local/include/opencv2/opencv.hpp"
#include "txtbin.h"
Txtbin::Txtbin(){}
void Txtbin::set_input(const std::string& s){
input = s;
}
void Txtbin::set_output(const std::string& s){
output = s;
}
void Txtbin::set_height(int h){
max_height = h;
}
void Txtbin::set_width(int w){
max_width = w;
}
void Txtbin::set_crop(bool c){
is_crop = c;
}
void Txtbin::set_rotate(bool r){
is_rotate = r;
}
void Txtbin::set_margin(float m){
margin = m;
}
void Txtbin::set_blockside(int b){
blockside = b;
}
void Txtbin::set_threshold(int t){
thresholding = t / 100;
}
void Txtbin::set_verbose(bool v){
is_verbose = v;
}
void Txtbin::error(const std::string& s){
throw std::runtime_error(s);
}
void Txtbin::binarize(){
src.convertTo(src, CV_32FC1, 1.0 / 255.0);
cv::Mat res = calc_block_mean_variance(src);
imwrite(output, res * 255);
}
void Txtbin::crop(textblock coords){
if(coords.left < coords.right && coords.top < coords.bottom){
if(coords.left < 0){
coords.left = 0;
}
int crop_width = coords.right - coords.left;
int trim_width = coords.left + crop_width - src.cols;
if(trim_width > 0){
crop_width -= trim_width;
}
if(coords.top < 0){
coords.top = 0;
}
int crop_height = coords.bottom - coords.top;
int trim_height = coords.top + crop_height - src.rows;
if(trim_height > 0){
crop_height -= trim_height;
}
cv::Rect cut_rect = cv::Rect(coords.left, coords.top, crop_width, crop_height);
src = src(cut_rect);
}
else{
std::cout << "Warning: Invalid text block coordinates. Cropping is omitted!" << std::endl;
}
}
void Txtbin::downsize(){
float
width = src.cols,
height = src.rows,
scale = 0;
bool resized = false;
if(max_width > 0 && width > max_width){
scale = width / max_width;
width /= scale;
height /= scale;
resized = true;
}
if(max_height > 0 && height > max_height){
scale = height / max_height;
width /= scale;
height /= scale;
resized = true;
}
if(resized){
resize(src, src, cv::Size(round(width), round(height)));
}
}
textblock Txtbin::detect_text_block(bool test_output){
cv::Mat img = src.clone();
// downsample image and use it for processing
int multiplier = 2;
pyrDown(img, img);
textblock block;
block.left = img.cols;
block.top = img.rows;
int
rect_bottom,
rect_right;
// morphological gradient
cv::Mat grad;
cv::Mat morphKernel = getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(3, 3));
morphologyEx(img, grad, cv::MORPH_GRADIENT, morphKernel);
// binarize
cv::Mat bw;
threshold(grad, bw, 0.0, 255.0, cv::THRESH_BINARY | cv::THRESH_OTSU);
// connect horizontally oriented regions
cv::Mat connected;
morphKernel = getStructuringElement(cv::MORPH_RECT, cv::Size(9, 1));
morphologyEx(bw, connected, cv::MORPH_CLOSE, morphKernel);
// find contours
cv::Mat mask = cv::Mat::zeros(bw.size(), CV_8UC1);
std::vector<std::vector<cv::Point> > contours;
std::vector<cv::Vec4i> hierarchy;
findContours(connected, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, cv::Point(0, 0));
cv::Scalar color = cv::Scalar(0, 255, 0);
cv::Scalar color2 = cv::Scalar(0, 0, 255);
int thickness = 2;
if(test_output){
cv::cvtColor(img, img, CV_GRAY2BGR);
}
// filter contours
if(!hierarchy.empty()){
for(int idx = 0; idx >= 0; idx = hierarchy[idx][0]){
cv::Rect rect = boundingRect(contours[idx]);
cv::Mat maskROI(mask, rect);
maskROI = cv::Scalar(0, 0, 0);
// fill the contour
drawContours(mask, contours, idx, cv::Scalar(255, 255, 255), CV_FILLED);
// ratio of non-zero pixels in the filled region
double r = (double)countNonZero(maskROI) / (rect.width * rect.height);
// assume at least 25% of the area is filled if it contains text
if (r > 0.25 &&
(rect.height > 8 && rect.width > 8) // constraints on region size
// these two conditions alone are not very robust. better to use something
//like the number of significant peaks in a horizontal projection as a third condition
){
if(test_output){
rectangle(img, rect, color, thickness);
}
//rectangle(src, cv::Rect(rect.x * multiplier, rect.y * multiplier, rect.width * multiplier, rect.height * multiplier), color, thickness);
if(rect.y < block.top){
block.top = rect.y;
}
rect_bottom = rect.y + rect.height;
if(rect_bottom > block.bottom){
block.bottom = rect_bottom;
}
if(rect.x < block.left){
block.left = rect.x;
}
rect_right = rect.x + rect.width;
if(rect_right > block.right){
block.right = rect_right;
}
}
}
}
if(test_output){
rectangle(img, cv::Point(block.left, block.top), cv::Point(block.right, block.bottom), color2, thickness);
imwrite("test_text_contours.jpg", img);
}
//rectangle(src, cv::Point(block.left * multiplier, block.top * multiplier), cv::Point(block.right * multiplier, block.bottom * multiplier), color2, thickness);
block.left *= multiplier;
block.top *= multiplier;
block.right *= multiplier;
block.bottom *= multiplier;
return block;
}
cv::Mat Txtbin::calc_block_mean_variance(cv::Mat& img){
cv::Mat res;
cv::Mat I;
img.convertTo(I, CV_32FC1);
res = cv::Mat::zeros(img.rows / blockside, img.cols / blockside, CV_32FC1);
cv::Mat inpaintmask;
cv::Mat patch;
cv::Mat small_img;
cv::Scalar m, s;
for(int i = 0; i < img.rows - blockside; i += blockside){
for(int j = 0; j < img.cols - blockside; j += blockside){
patch = I(cv::Range(i, i + blockside + 1), cv::Range(j, j + blockside + 1));
meanStdDev(patch, m, s);
if(s[0] > contrast){
res.at<float>(i / blockside, j / blockside) = m[0];
}
else{
res.at<float>(i / blockside, j / blockside) = 0;
}
}
}
resize(I, small_img, res.size());
threshold(res, inpaintmask, 0.02, 1.0, cv::THRESH_BINARY);
cv::Mat inpainted;
small_img.convertTo(small_img, CV_8UC1, 255);
inpaintmask.convertTo(inpaintmask, CV_8UC1);
inpaint(small_img, inpaintmask, inpainted, 5, cv::INPAINT_TELEA);
resize(inpainted, res, img.size());
res.convertTo(res, CV_32FC1, 1.0 / 255.0);
res = 1.0 - res;
res = img + res;
threshold(res, res, thresholding, 1, cv::THRESH_BINARY);
return res;
}
void Txtbin::run(){
// Return error if input file is not defined
if(input == ""){
error("Input file not defined");
}
// Return error if input file is not found
else{
std::ifstream stream(input.c_str());
if(!stream.good()){
error("Input file not found");
}
}
// Return error if output file is not defined
if(output == ""){
error("Output file not defined");
}
// Return error if output file is not PNG
else{
std::string output_lc = output;
boost::to_lower(output_lc);
if(output_lc.substr(output_lc.find_last_of(".") + 1) != "png"){
error("Output file must be PNG");
}
}
bool test_output = false;
auto start = std::chrono::high_resolution_clock::now();
src = cv::imread(input, CV_LOAD_IMAGE_GRAYSCALE);
if(is_crop || is_verbose){
textblock coords = detect_text_block(test_output);
if(is_verbose){
std::cout << "Image dimensions: " << src.cols << " x " << src.rows << std::endl;
std::cout << "Text block coordinates: Left|Top " << coords.left << "," << coords.top << " Right|Bottom " << coords.right << "," << coords.bottom << std::endl;
}
if(is_crop){
crop(coords);
}
}
if(margin){
int border = src.cols * margin / 100;
copyMakeBorder(src, src, border, border, border, border, cv::BORDER_CONSTANT, cv::Scalar(255, 255, 255));
}
downsize();
binarize();
auto elapsed = std::chrono::high_resolution_clock::now() - start;
if(is_verbose){
std::cout << "Execution time: " << (std::chrono::duration_cast<std::chrono::microseconds>(elapsed).count() / 1000) << " ms" << std::endl;
}
}
backtrace.h
#include <stdio.h>
#include <execinfo.h>
#include <stdlib.h>
class Backtrace {
public:
Backtrace();
void trace(int sig);
};
Backtrace::Backtrace(){}
void Backtrace::trace(int sig){
void *trace[10];
char **messages = (char **)NULL;
int i, trace_size = 0;
trace_size = backtrace(trace, 10);
messages = backtrace_symbols(trace, trace_size);
fprintf(stderr, "Error: signal %d:\n", sig);
for(i=1; i<trace_size; ++i){
fprintf(stderr, "#%d %s\n", i, messages[i]);
}
exit(1);
}
The segfault does not occur with the following implementation of Txtbin::binarize.
void Txtbin::binarize(){
src.convertTo(src, CV_32FC1, 1.0 / 255.0);
cv::Mat res = calc_block_mean_variance(src) * 255;
imwrite(output, res);
}
With the prior definition of Txtbin::binarize, cv::imwrite sees the type as _InputArray::EXPR rather than as _InputArray::MAT, and passes an uninitialized value of img_vec to cv::imwrite_. The segfault results from an attempt to dereference that uninitialized value. The _InputArray::EXPR value comes from the return value of MatExpr operator * (a cv::MatExpr) being passed to _InputArray::_InputArray.
With the modified definition of Txtbin::binarize the return value of MatExpr operator * is passed to MatExpr::operator Mat() const, which passes it to MatOp_AddEx::assign, and the value passed to _InputArray::_InputArray is a cv::Mat rather than a cv::MatExpr. In short, it seems the assignment of the return value of MatExpr operator * prevents an incorrect type conversion (cv::imwrite expects a cv::InputArray, not a cv:Mat, for its second parameter).
It appears that newer versions of opencv are not susceptible to this crash. Prior to this commit, cv::imwrite called the getMat method, and this commit restored that call as the default behavior (the else clause), as a replacement for an overly specific conditional. I confirmed that recompiling opencv with that change to cv::imwrite prevents the crash even with the prior version of Txtbin::binarize (the one in the question), and that the return value of MatExpr operator * is passed to MatExpr::operator Mat() const, as in the modified definition of Txtbin::binarize due to the assignment.
In short, you can work around issue 15545, or you can install its fix.

OpenCV 3.0 - SVM + HOG gives same wrong responses for half-person detection

I'm working in a detector for half bodies, in order to improve the performance of a normal people detector. I know there are more ways to deal with occlusion but this is what i was asked for to do in my end of degree project. My problem is that I'm not getting a good performance, more over, I'm getting kind a pattern in wich 4 rectangles that represent the detections are shown in almost the same position, not even representing a half body.
I have a set of images with 414 images of top-half bodies cropped by myself, used as positive samples, and 8520 negative images. All of them sized 64x64. I extracted the HOG descriptors as follows
int i;
string imgname, index;
HOGDescriptor hog (Size(64,64), Size(16,16), Size(8,8), Size(8,8), 9, 1, -1, HOGDescriptor::L2Hys, 0.2,false, HOGDescriptor::DEFAULT_NLEVELS, false);
vector<float> pos_rec_descript;
vector<Point> locations;
size_t SizeDesc;
SizeDesc = hog.getDescriptorSize();
FileStorage fpd ("Pos_Descriptors.yml", FileStorage::WRITE);
for (i = 1; i < 415; i++) { // 2416 images in ./pos_rec
stringstream a;
a << i;
imgname = "./pos_rec3/img" + a.str();
imgname += ".png";
Mat img = imread(imgname, CV_LOAD_IMAGE_COLOR);
hog.compute(img, pos_rec_descript, Size (16,16), Size (0,0),locations);
fpd << "Descriptores" + a.str() << pos_rec_descript;
}
fpd.release();
And I did the same with the negative samples.
Then, I trained a SVM as follows.
#define POS 414
#define NEG 8520
#define TOTAL 8934
#define DESCRIPT 1764
float trainingData[TOTAL][DESCRIPT];
int labels[TOTAL];
fstream doc;
void set_labels(){
int i;
for (i = 0; i < TOTAL; i++){
if (i < POS) {
labels[i] = 1;
}
else{
labels[i] = -1;
}
}
return;
}
int main(int, char**)
{
FileStorage fsv ("supvec.yml", FileStorage::WRITE);
FileStorage ftd ("TrainData.yml", FileStorage::WRITE);
//FileStorage flm ("Labels.yml", FileStorage::WRITE);
FileStorage fpd ("../HOG_descriptors/Pos_Descriptors.yml", FileStorage::READ);
FileStorage fnd ("../HOG_descriptors_neg/Neg_Descriptors.yml", FileStorage::READ);
set_labels();
// Set up training data
vector <float> pos_D, neg_D, train_D ;
int k = 0;
for (int i = 1; i < POS+1; i++) {
stringstream a;
a << i;
fpd["Descriptores" + a.str()] >> pos_D;
for (int j = 0; j < pos_D.size() ; j++){
train_D.push_back(pos_D[j]);
}
}
fpd.release();
for (int i = 1; i < NEG+1; i++) {
stringstream a;
a << i;
fnd["Descriptores" + a.str()] >> neg_D;
for (int j = 0; j < neg_D.size() ; j++){
train_D.push_back(neg_D[j]);
}
}
fnd.release();
for (int i = 0; i < TOTAL; i++){
for (int j = 0; j < DESCRIPT; j++){
trainingData[i][j] = train_D[k];
k++;
}
}
Mat trainingDataMat(TOTAL, DESCRIPT, CV_32FC1, trainingData);
//memcpy(trainingDataMat.data, train_D.data(), train_D.size()*sizeof(float));
Mat labelsMat(TOTAL, 1, CV_32SC1, labels);
//ftd << "trainingDataMat" << trainingDataMat;
//flm << "labelsMat" << labelsMat;
// Train the SVM
Ptr<SVM> svm = SVM::create();
svm->setType(SVM::C_SVC);
svm->setKernel(SVM::LINEAR);
svm->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER, 100, 1e-6));
/*Ptr<TrainData> autoTrainData = TrainData::create(trainingDataMat, ROW_SAMPLE, labelsMat);
ParamGrid Cgrid = SVM::getDefaultGrid(SVM::C);
ParamGrid gammaGrid = SVM::getDefaultGrid(SVM::GAMMA);
ParamGrid pGrid = SVM::getDefaultGrid(SVM::P);
pGrid.logStep = 1;
ParamGrid nuGrid = SVM::getDefaultGrid(SVM::NU);
nuGrid.logStep = 1;
ParamGrid coeffGrid = SVM::getDefaultGrid(SVM::COEF);
coeffGrid.logStep = 1;
ParamGrid degreeGrid = SVM::getDefaultGrid(SVM::DEGREE);
degreeGrid.logStep = 1; */
cout << "Está entrenando..." << endl;
//svm->trainAuto(autoTrainData, 10, Cgrid, gammaGrid, pGrid, nuGrid, coeffGrid, degreeGrid, false);
svm->train(trainingDataMat, ROW_SAMPLE, labelsMat);
svm->save("SVM3_WS16_P0_LINEAR.yml");
I've tried with both LINEAR and RBF kernels (that's why you can see an autotrain part of the code commented that I used to swap between types of SVM) but none of them seems to work. Actually, they give nearly the same responses, something that makes me think that, maybe the training phase or the detection phase (code below) are ruining the whole project.
This is how I load the SVM for the HOG detector and try it over images
using namespace cv;
using namespace std;
using namespace cv::ml;
// static void help()
// {
// printf(
// "\nDemonstrate the use of the HoG descriptor using\n"
// " HOGDescriptor::hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());\n"
// "Usage:\n"
// "./peopledetect (<image_filename> | <image_list>.txt)\n\n");
// }
void get_svm_detector(const Ptr<SVM>& svm, vector< float > & hog_detector );
void get_svm_detector(const Ptr<SVM>& svm, vector< float > & hog_detector )
{
// get the support vectors
Mat sv = svm->getSupportVectors();
const int sv_total = sv.rows;
// get the decision function
Mat alpha, svidx;
double rho = svm->getDecisionFunction(0, alpha, svidx);
CV_Assert( alpha.total() == 1 && svidx.total() == 1 && sv_total == 1 );
CV_Assert( (alpha.type() == CV_64F && alpha.at<double>(0) == 1.) ||
(alpha.type() == CV_32F && alpha.at<float>(0) == 1.f) );
CV_Assert( sv.type() == CV_32F );
hog_detector.clear();
hog_detector.resize(sv.cols + 1);
memcpy(&hog_detector[0], sv.ptr(), sv.cols*sizeof(hog_detector[0]));
hog_detector[sv.cols] = (float)-rho;
}
int main(int argc, char** argv)
{
Mat img;
FILE* f = 0;
char _filename[1024];
if( argc == 1 )
{
printf("Usage: peopledetect (People_imgs | People_imgs.txt)\n");
return 0;
}
img = imread(argv[1]);
if( img.data )
{
strcpy(_filename, argv[1]);
}
else
{
f = fopen(argv[1], "rt");
if(!f)
{
fprintf( stderr, "ERROR: the specified file could not be loaded\n");
return -1;
}
}
// Load SVM
Ptr<SVM> svm = SVM::create();
svm = cv::Algorithm::load<ml::SVM>("../SVM_Train/SVM3_WS16_P0_LINEAR.yml");
HOGDescriptor hog (Size(64,64), Size(16,16), Size(8,8), Size(8,8), 9, 1, -1, HOGDescriptor::L2Hys, 0.2,false, HOGDescriptor::DEFAULT_NLEVELS, false);
vector <float> hog_detector;
get_svm_detector (svm, hog_detector);
hog.setSVMDetector(hog_detector);
namedWindow("people detector", 1);
for(;;)
{
char* filename = _filename;
if(f)
{
if(!fgets(filename, (int)sizeof(_filename)-2, f))
break;
//while(*filename && isspace(*filename))
// ++filename;
if(filename[0] == '#')
continue;
int l = (int)strlen(filename);
while(l > 0 && isspace(filename[l-1]))
--l;
filename[l] = '\0';
img = imread(filename);
}
printf("%s:\n", filename);
if(!img.data)
continue;
fflush(stdout);
vector<Rect> found, found_filtered, searchLocations;
vector<double> found_weights;
double t = (double)getTickCount();
// run the detector with default parameters. to get a higher hit-rate
// (and more false alarms, respectively), decrease the hitThreshold and
// groupThreshold (set groupThreshold to 0 to turn off the grouping completely).
hog.detectMultiScale(img, found, found_weights, 0, Size(16,16), Size(0,0), 1.01, 2);
//hog.detect(img, found, 0, Size(16,16), Size(0,0), searchLocations);
t = (double)getTickCount() - t;
printf("tdetection time = %gms\n", t*1000./cv::getTickFrequency());
size_t i, j;
for( i = 0; i < found.size(); i++ )
{
Rect r = found[i];
for( j = 0; j < found.size(); j++ )
if( j != i && (r & found[j]) == r)
break;
if( j == found.size() )
found_filtered.push_back(r);
}
for( i = 0; i < found_filtered.size(); i++ )
{
Rect r = found_filtered[i];
// the HOG detector returns slightly larger rectangles than the real objects.
// so we slightly shrink the rectangles to get a nicer output.
r.x += cvRound(r.width*0.1);
r.width = cvRound(r.width*0.7);
r.y += cvRound(r.height*0.07);
r.height = cvRound(r.height*0.7);
rectangle(img, r.tl(), r.br(), cv::Scalar(0,255,0), 2);
imshow("people detector", img);
waitKey(0);
}
//imshow("people detector", img);
//string imgname = "./Responses/Win_Stride16_4.png";
//imwrite(imgname, img);
int c = waitKey(0) & 255;
if( c == 'q' || c == 'Q' || !f)
break;
}
if(f)
fclose(f);
return 0;
}
I have checked all dimensions for the descriptors, every Mat seems to be ok. But at the time I use detectMultiScale, it shows things like this:
Image 1: It's strange because is missing lots of detections
Image 2: Here I realized there was a kind of pattern with this 4 rects
My problem is that no matter what I change (descriptors, Kernel, winStride and Padding in detectMultiScale), there is always very similar responses, and nothing indicates that there is a correct detection there.
I'm not very sure about how I'm giving the support vectors to HOG, but is the only way I found to do it (found it in one of the post from StackOverflow).
If any of you has any idea of what is going on here, and why the responses are not changing from one configuration to another, I would be greatly thankfull. This code is giving me headaches since weeks now. I've been changing parametres on fucntions, on HOG, changing Kernels, trying different set of images, but nothing seems to give great changes on the final result.

Video gets stuck meanshift tracking opencv

I want to track any moving object using meanshift algorithm. For this I am first subtracting frames from background. Then erosion, dilation and smoothing is applied. In order to detect moving object, and to find its coordinates, I am using corner point detection.
Then I calculate mean of corner points, and pass these points to meanshift searching window. Now when object appears in screen, the program leaves corner point detection and enters meanshift tracking. It keeps running in meanshift until the object leaves the screen.
Now if object leaves the screen, I want to activate corner point detection again. For this, I take the program out of meanshift and jump back to corner point detection. The program is running fine but the problem is when it leaves meanshift and enters corner point detection again, it gets stuck for few seconds.
After that it runs smoothly. The problem occurs only during transition from meanshift to corner point detection. I don't know what could be the possible reason. Kindly tell me some solution.
Here is my code:
#include "highgui.h"
#include "cv.h"
#include "cxcore.h"
#include "cvaux.h"
#include <iostream>
using namespace std;
const int MAX_CORNERS = 500;
inline static void allocateOnDemand(IplImage** img, CvSize size, int depth, int channels) {
if (*img != NULL)
return;
*img = cvCreateImage(size, depth, channels);
if (*img == NULL) {
fprintf(stderr, "Error: Couldn't allocate image. Out of memory?\n");
exit(-1);
}
}
int main() {
CvCapture* capture = cvCaptureFromCAM(CV_CAP_V4L2);
IplImage* pFrame[10];
IplImage* bg;
IplImage* img_A;
IplImage* img_B;
IplImage* eig_image;
IplImage* tmp_image;
img_A = cvQueryFrame(capture);
CvSize img_sz = cvGetSize(img_A);
int c1 = 0, c2 = 0;
int xarr[40];
//A temporary replacement for background averaging
int j = 0;
for (j = 0; j < 10; j++) {
pFrame[j] = cvQueryFrame(capture);
cvWaitKey(200);
}
cvSaveImage("10.jpg", pFrame[9], 0); //saving the background
IplImage* imgA = cvCreateImage(cvGetSize(img_A), 8, 1);
IplImage* imgB = cvCreateImage(cvGetSize(img_A), 8, 1);
IplImage* imgB1 = cvCreateImage(cvGetSize(img_A), 8, 1);
IplImage* imgb = cvCreateImage(cvGetSize(img_A), 8, 1);
cvNamedWindow("LKpyr_OpticalFlow", CV_WINDOW_AUTOSIZE);
bg = cvLoadImage("10.jpg", CV_LOAD_IMAGE_GRAYSCALE); //loading the saved background
int flag = 0;
int index = 0;
char keypress;
bool quit = false;
CvConnectedComp*out = new CvConnectedComp(); //output window for meanshift
int win_size = 25; //window size for corner point detection
int x1 = 0;
int y1 = 0;
int x;
int y;
int cc;
line3:
while (quit == false) { //line3:
IplImage* imgC = cvCreateImage(cvGetSize(img_A), 8, 1); //creating output image
cvZero(imgC);
img_B = cvQueryFrame(capture);
imgC = cvQueryFrame(capture);
// line3:
int corner_count = MAX_CORNERS; //total no of corners found in frame
cvCvtColor(img_B, imgb, CV_BGR2GRAY);
//line3:
CvPoint2D32f* cornersA = new CvPoint2D32f[MAX_CORNERS];
CvPoint2D32f* cornersB = new CvPoint2D32f[MAX_CORNERS];
if (index % 2 == 0) {
cvSub(imgb, bg, imgB, NULL); //background subtraction and stuff
cvErode(imgB, imgB, NULL, 4);
cvDilate(imgB, imgB, 0, 2);
cvSmooth(imgB, imgB, 0, 1);
cvThreshold(imgB, imgB, 50, 255, CV_THRESH_BINARY);
//line3:
if (flag == 1) goto line1; //Go to Meanshift
allocateOnDemand(&eig_image, img_sz, IPL_DEPTH_32F, 1);
allocateOnDemand(&tmp_image, img_sz, IPL_DEPTH_32F, 1);
cvCvtColor(img_A, imgA, CV_BGR2GRAY);
//line3:
cvGoodFeaturesToTrack(imgB, eig_image, tmp_image, cornersA, &
corner_count, 0.05, 5.0, 0, 3, 0, 0.04); //detects corners and no of corners stored in corner_count
cvFindCornerSubPix(imgB, cornersA, corner_count, cvSize(12, 12),
cvSize(-1, -1), cvTermCriteria(CV_TERMCRIT_ITER |
CV_TERMCRIT_EPS, 20, 0.03));
CvPoint p0;
CvPoint p1;
CvPoint acc;
cc = corner_count + 20;
acc.x = 0;
acc.y = 0;
for (int i = 0; i < corner_count; i++) {
p0 = cvPoint(cvRound(cornersA[i].x), cvRound(cornersA[i].y));
p1 = cvPoint(cvRound(cornersB[i].x), cvRound(cornersB[i].y));
acc.x = acc.x + p0.x; //calculating mean of corner points
acc.y = acc.y + p0.y;
}
delete[] cornersA;
delete[] cornersB;
cout << "Corner Count is" << corner_count << endl;
cout << "Flag status: " << flag << endl;
if (corner_count > 0) {
flag = 1;
cvWaitKey(20);
}
x1 = cvRound(acc.x / (corner_count + 1));
y1 = cvRound(acc.y / (corner_count + 1));
cout << "x is " << x1 << " y is " << y1 << endl;
cout << "Flag status: " << flag << endl;
x = x1;
y = y1;
if (flag == 0) goto line2; //Go back to Corner Point detection
line1: CvRect window = cvRect(x, y, 80, 90); //Creates window for meanshift algo
cvMeanShift(imgB, window, cvTermCriteria(CV_TERMCRIT_EPS |
CV_TERMCRIT_ITER, 200, 1), out);
window = out->rect;
x = out->rect.x;
y = out->rect.y;
cout << "Now x is " << x << " y is " << y << endl;
cout << "Flag status: " << flag << endl;
if (out->area > 200) {
cvRectangle(imgC, cvPoint(x + 50, y + 100), cvPoint(x - 20, y -
90), cvScalar(0, 0, 255), 3, 8, 0);
} else {}
xarr[c1] = x;
c1++;
if (c1 > 39) c1 = 0;
if (xarr[0] == xarr[39]) {
c2 = 1;
cout << "c2 is now " << c2 << endl;
}
}
if (x == 0 || y == 0 || x < 7 || x > 572 || c2 == 1) {
flag = 0;
c2 = 0;
goto line3;
break;
}
line2: cvShowImage("LKpyr_OpticalFlow", imgC);
keypress = cvWaitKey(20);
// Set the flag to quit if escape was pressed
if (keypress == 27) {
quit = true;
}
//index++;
} //end of while
return 0;
}