I am trying to implement real-time tracking using templates in OpenCV/C++. I am facing problem to update the template with every frame.
Following is the code:
#include <iostream>
#include "opencv2/opencv.hpp"
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/objdetect/objdetect.hpp>
#include <sstream>
using namespace cv;
using namespace std;
Point point1, point2; /* vertical points of the bounding box */
int drag = 0;
Rect rect; /* bounding box */
Mat img, roiImg; /* roiImg - the part of the image in the bounding box */
int select_flag = 0;
bool go_fast = false;
Mat mytemplate;
///------- template matching -----------------------------------------------------------------------------------------------
Mat TplMatch( Mat &img, Mat &mytemplate )
{
Mat result;
matchTemplate( img, mytemplate, result, CV_TM_SQDIFF_NORMED );
normalize( result, result, 0, 1, NORM_MINMAX, -1, Mat() );
return result;
}
///------- Localizing the best match with minMaxLoc ------------------------------------------------------------------------
Point minmax( Mat &result )
{
double minVal, maxVal;
Point minLoc, maxLoc, matchLoc;
minMaxLoc( result, &minVal, &maxVal, &minLoc, &maxLoc, Mat() );
matchLoc = minLoc;
return matchLoc;
}
///------- tracking --------------------------------------------------------------------------------------------------------
void track()
{
if (select_flag)
{
roiImg.copyTo(mytemplate);
// select_flag = false; //select_flag is kept false so that new template can
go_fast = true; //copied to 'mytemplate' for each iteration
}
// imshow( "mytemplate", mytemplate ); waitKey(0);
Mat result = TplMatch( img, mytemplate );
Point match = minmax( result ); //PROBLEM: "match" always returning same value!!!
rectangle( img, match, Point( match.x + mytemplate.cols , match.y + mytemplate.rows ), CV_RGB(255, 255, 255), 0.5 );
std::cout << "match: " << match << endl;
/// template update step
Rect ROI = cv::Rect( match.x, match.y, mytemplate.cols, mytemplate.rows );
roiImg = img( ROI );
imshow( "roiImg", roiImg ); //waitKey(0);
}
///------- MouseCallback function ------------------------------------------------------------------------------------------
void mouseHandler(int event, int x, int y, int flags, void *param)
{
if (event == CV_EVENT_LBUTTONDOWN && !drag)
{
/// left button clicked. ROI selection begins
point1 = Point(x, y);
drag = 1;
}
if (event == CV_EVENT_MOUSEMOVE && drag)
{
/// mouse dragged. ROI being selected
Mat img1 = img.clone();
point2 = Point(x, y);
rectangle(img1, point1, point2, CV_RGB(255, 0, 0), 3, 8, 0);
imshow("image", img1);
}
if (event == CV_EVENT_LBUTTONUP && drag)
{
point2 = Point(x, y);
rect = Rect(point1.x, point1.y, x - point1.x, y - point1.y);
drag = 0;
roiImg = img(rect);
}
if (event == CV_EVENT_LBUTTONUP)
{
/// ROI selected
select_flag = 1;
drag = 0;
}
}
///------- Main() ----------------------------------------------------------------------------------------------------------
int main()
{
int k;
///open video file
VideoCapture cap;
cap.open( "Megamind.avi" );
if ( !cap.isOpened() )
{ cout << "Unable to open video file" << endl; return -1; }
cap >> img;
GaussianBlur( img, img, Size(7,7), 3.0 );
imshow( "image", img );
while (1)
{
cap >> img;
if ( img.empty() )
break;
// Flip the frame horizontally and add blur
cv::flip( img, img, 1 );
GaussianBlur( img, img, Size(7,7), 3.0 );
if ( rect.width == 0 && rect.height == 0 )
cvSetMouseCallback( "image", mouseHandler, NULL );
else
track();
imshow("image", img);
k = waitKey(go_fast ? 30 : 10000);
if (k == 27)
break;
}
return 0;
}
The updated template is not being tracked. I am not able to figure out why this is happening since I am updating my template (roiImg) with each iteration. The match value from minmax() function is returning the same value (coordinates) every-time. Test video is availbale at: Megamind
Please look into it and guide ahead...thanks a lot!
EDIT: if you run the code (with the video) you will see the white bounding-box is always at the same position. This is because, the minmax() is returning same "match" value all the time. This value should change with every update.
Try running code with select_flag = false; (uncommitted). The bounding-box is moving according to template. But in this case no template update takes place.
The problem is in this section:
if (select_flag)
{
roiImg.copyTo(mytemplate);
// select_flag = false; //select_flag is kept false so that new template can
// ^^^^^^^^^^^ WRONG
go_fast = true; //copied to 'mytemplate' for each iteration
}
You actually need to set select_flag to be false on the first iteration. Otherwise, you're just copying what's in the current image on that frame into your template, and of course you find it in exactly the same place!
Once that's done, make sure you move your template update to after the tracking is done on that frame. I'd also recommend not drawing on the original image (your rectangle) until after all image accesses are done. You were actually drawing the rectangle into your image before copying out of it. Here's my adjusted function with template update:
void track()
{
std::cout << select_flag << std::endl;
if (select_flag)
{
roiImg.copyTo(mytemplate);
select_flag = false; //select_flag is kept false so that new template can
go_fast = true; //copied to 'mytemplate' for each iteration
}
// imshow( "mytemplate", mytemplate ); waitKey(0);
Mat result = TplMatch( img, mytemplate );
imshow("match", result);
Point match = minmax( result ); //PROBLEM: "match" always returning same value!!!
std::cout << "match: " << match << endl;
/// template update step
Rect ROI = cv::Rect( match.x, match.y, mytemplate.cols, mytemplate.rows );
std::cout << ROI << std::endl;
roiImg = img( ROI );
imshow( "roiImg", roiImg ); //waitKey(0);
// Update the template AFTER tracking has occurred to carry it over to the next frame
roiImg.copyTo(mytemplate);
imshow("mytemplate", mytemplate);
// Draw onto the image AFTER all accesses are performed
rectangle( img, match, Point( match.x + mytemplate.cols , match.y + mytemplate.rows ), CV_RGB(255, 255, 255), 0.5 );
}
Related
I am using openCV to capture and save the output image with fixed size (92 by 112). It will capture the frames of video and crop them. However, the output image did not show correct size (some time are 147 by 147, 140 by 140...). What is problem in my code. There are crop image code and whole code. Thanks in advance
CROP image
crop = frame(roi_b);
resize(crop, res, Size(92, 112), 0, 0, INTER_LINEAR); // This will be needed later while saving images
cvtColor(crop, gray, CV_BGR2GRAY); // Convert cropped image to Grayscale
Let see whole code to view the meaning of parameters
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include <iostream>
#include <stdio.h>
using namespace std;
using namespace cv;
// Function Headers
void detectAndDisplay(Mat frame);
// Global variables
// Copy this file from opencv/data/haarscascades to target folder
string face_cascade_name = "haarcascade_frontalface_alt.xml";
CascadeClassifier face_cascade;
string window_name = "Capture - Face detection";
int filenumber; // Number of file to be saved
string filename;
// Function main
int main(void)
{
VideoCapture capture(0);
if (!capture.isOpened()) // check if we succeeded
return -1;
// Load the cascade
if (!face_cascade.load(face_cascade_name))
{
printf("--(!)Error loading\n");
return (-1);
};
// Read the video stream
Mat frame;
for (;;)
{
capture >> frame;
// Apply the classifier to the frame
if (!frame.empty())
{
detectAndDisplay(frame);
}
else
{
printf(" --(!) No captured frame -- Break!");
break;
}
int c = waitKey(10);
if (27 == char(c))
{
break;
}
}
return 0;
}
// Function detectAndDisplay
void detectAndDisplay(Mat frame)
{
std::vector<Rect> faces;
Mat frame_gray;
Mat crop;
Mat res;
Mat gray;
string text;
stringstream sstm;
cvtColor(frame, frame_gray, COLOR_BGR2GRAY);
equalizeHist(frame_gray, frame_gray);
// Detect faces
face_cascade.detectMultiScale(frame_gray, faces, 1.1, 2, 0 | CASCADE_SCALE_IMAGE, Size(30, 30));
// Set Region of Interest
cv::Rect roi_b;
cv::Rect roi_c;
size_t ic = 0; // ic is index of current element
int ac = 0; // ac is area of current element
size_t ib = 0; // ib is index of biggest element
int ab = 0; // ab is area of biggest element
for (ic = 0; ic < faces.size(); ic++) // Iterate through all current elements (detected faces)
{
roi_c.x = faces[ic].x;
roi_c.y = faces[ic].y;
roi_c.width = (faces[ic].width);
roi_c.height = (faces[ic].height);
ac = roi_c.width * roi_c.height; // Get the area of current element (detected face)
roi_b.x = faces[ib].x;
roi_b.y = faces[ib].y;
roi_b.width = (faces[ib].width);
roi_b.height = (faces[ib].height);
ab = roi_b.width * roi_b.height; // Get the area of biggest element, at beginning it is same as "current" element
if (ac > ab)
{
ib = ic;
roi_b.x = faces[ib].x;
roi_b.y = faces[ib].y;
roi_b.width = (faces[ib].width);
roi_b.height = (faces[ib].height);
}
crop = frame(roi_b);
resize(crop, res, Size(92, 112), 0, 0, INTER_LINEAR); // This will be needed later while saving images
cvtColor(crop, gray, CV_BGR2GRAY); // Convert cropped image to Grayscale
// Form a filename
filename = "";
stringstream ssfn;
ssfn << filenumber << ".pgm";
filename = ssfn.str();
filenumber++;
imwrite(filename, gray);
Point pt1(faces[ic].x, faces[ic].y); // Display detected faces on main window - live stream from camera
Point pt2((faces[ic].x + faces[ic].height), (faces[ic].y + faces[ic].width));
rectangle(frame, pt1, pt2, Scalar(0, 255, 0), 2, 8, 0);
}
// Show image
sstm << "Crop area size: " << roi_b.width << "x" << roi_b.height << " Filename: " << filename;
text = sstm.str();
putText(frame, text, cvPoint(30, 30), FONT_HERSHEY_COMPLEX_SMALL, 0.8, cvScalar(0, 0, 255), 1, CV_AA);
imshow("original", frame);
if (!crop.empty())
{
imshow("detected", crop);
}
else
destroyWindow("detected");
}
You get it wrong in the 'resize image' part. The cv::resize is defined as
void resize(InputArray src, OutputArray dst, Size dsize, double fx=0, double fy=0, int interpolation=INTER_LINEAR )
thus in your code, when you call
resize(crop, res, Size(92, 112), 0, 0, INTER_LINEAR);
the resulted resized image is stored in 'res', not in 'crop'.
The correct code for the crop part is:
crop = frame(roi_b);
resize(crop, res, Size(92, 112), 0, 0, INTER_LINEAR); // This will be needed later while saving images
cvtColor(res, gray, CV_BGR2GRAY); // Convert cropped image to Grayscale
and 'gray' is the grayscale resulted of your resized cropped image.
Some warnings appear in terminal during running:
OpenCV Error: Assertion failed(s>=0) in setSize, file /home/me/opencv2.4/modules/core/src/matrix.cpp, line 116
The program compiled without error and executes, the problem is the eye ROI size changes when user moves closer/farther away from webcam, due to the changing of size, the warning appears. I managed to solve these warnings by setting the eye ROI size equal to my eye template size. However, it ends up the program fails to classify user's eyes open/close because the minVal obtained is 0. The method used is OpenCV Template Matching. Alternatively, I fix my distance from webcam and fix the eye template size could avoid the warning. Every time warning appears, the program fails to classify open/close eyes. The program doesn't work effectively because sometimes it mistakenly classifies the open eyes as closed and vice versa.
Questions:
Is there any alternative to identify open and close eyes other than template matching?
Any ideas how to improve the program in classification of blinking?
Any working example that you know in opencv C/C++ API can classify open and close eyes and count accurately the blinking times?
static CvMemStorage* storage = 0;
// Create a new Haar classifier
static CvHaarClassifierCascade* cascade = 0;
// Function prototype for detecting and drawing an object from an image
bool detect_and_draw( IplImage* image ,CvHaarClassifierCascade* cascade);
const char *cascade_name[1]={"eyes.xml"};
cv::Mat roiImg;
int threshold_value = 200;
int threshold_type = 3;;
int const max_value = 255;
int const max_type = 4;
int const max_BINARY_value = 255;
int hough_thr = 35;
cv::Mat src_gray, dst;
using namespace cv;
Mat img1; Mat img2; Mat templ; Mat result;
const char* image_window = "Source Image";
const char* result_window = "Result window";
int match_method=0;
int max_Trackbar = 5;
int eye_open=0;
int eye_close=0;
//Matching with 2 images ,eye closed or open
void MatchingMethod(cv::Mat templ,int id )
{
/// Source image to display
cv::Mat img_display;
roiImg.copyTo( img_display );
/// Create the result matrix
int result_cols = roiImg.cols - templ.cols + 1;
int result_rows = roiImg.rows - templ.rows + 1;
result.create( result_cols, result_rows, CV_32FC1 );
/// Do the Matching and Normalize
cv::matchTemplate( roiImg, templ, result, match_method );
cv::normalize( result, result, 0, 1, NORM_MINMAX, -1, Mat() );
/// Localizing the best match with minMaxLoc
double minVal; double maxVal; Point minLoc; Point maxLoc;
cv::Point matchLoc;
cv::minMaxLoc( result, &minVal, &maxVal, &minLoc, &maxLoc, Mat() );
///Justing checkin the match template value reaching the threashold
if(id == 0 && (minVal < 0))
{
eye_open=eye_open+1;
if(eye_open == 1)
{
std::cout<<"Eye Open"<<std::endl;
eye_open=0;
eye_close=0;
}
}
else if(id == 1 && (minVal < 0))
eye_close=eye_close+1;
if(eye_close == 1)
{
std::cout<<"Eye Closed"<<std::endl;
eye_close=0;
system("python send_arduino.py");
}
/// For SQDIFF and SQDIFF_NORMED, the best matches are lower values. For all the other methods, the higher the better
if( match_method == CV_TM_SQDIFF || match_method == CV_TM_SQDIFF_NORMED )
{ matchLoc = minLoc; }
else
{ matchLoc = maxLoc; }
/// Show me what you got
cv::rectangle( img_display, matchLoc, Point( matchLoc.x + templ.cols , matchLoc.y + templ.rows ), Scalar::all(0), 2, 8, 0 );
cv::rectangle( result, matchLoc, Point( matchLoc.x + templ.cols , matchLoc.y + templ.rows ), Scalar::all(0), 2, 8, 0 );
cv::imshow( image_window, img_display );
cv::imshow( result_window, result );
return;
}
void detect_blink(cv::Mat roi)
{
try
{
MatchingMethod(img1,0);
MatchingMethod(img2,1);
}
catch( cv::Exception& e )
{
std::cout<<"An exception occued"<<std::endl;
}
}
// Main function, defines the entry point for the program.
int main( int argc, char** argv )
{
if(argc <= 1)
{
std::cout<<"\n Help "<<std::endl;
std::cout<<"\n ------------------------------------\n"<<std::endl;
std::cout<<"./blink_detect open_eye.jpg close_eye.jpg\n"<<std::endl;
std::cout<<"Eg :: ./blink_detect 2.jpg 3.jpg\n"<<std::endl;
std::cout<<"\n ------------------------------------\n"<<std::endl;
exit(0);
}
// Structure for getting video from camera or avi
CvCapture* capture = 0;
// Images to capture the frame from video or camera or from file
IplImage *frame, *frame_copy = 0;
// Used for calculations
int optlen = strlen("--cascade=");
// Input file name for avi or image file.
const char* input_name;
img1 = imread( argv[1], 1 );
img2 = imread( argv[2], 1 );
// Load the HaarClassifierCascade
/// Create windows
cv::namedWindow( image_window, CV_WINDOW_AUTOSIZE );
cv::namedWindow( result_window, CV_WINDOW_AUTOSIZE );
// Allocate the memory storage
storage = cvCreateMemStorage(0);
capture = cvCaptureFromCAM( 0);
// Create a new named window with title: result
cvNamedWindow( "original_frame", 1 );
// If loaded succesfully, then:
if( capture )
{
// Capture from the camera.
for(;;)
{
// Capture the frame and load it in IplImage
if( !cvGrabFrame( capture ))
break;
frame = cvRetrieveFrame( capture );
// If the frame does not exist, quit the loop
if( !frame )
break;
// Allocate framecopy as the same size of the frame
if( !frame_copy )
frame_copy = cvCreateImage( cvSize(frame->width,frame->height),
IPL_DEPTH_8U, frame->nChannels );
// Check the origin of image. If top left, copy the image frame to frame_copy.
if( frame->origin == IPL_ORIGIN_TL )
cvCopy( frame, frame_copy, 0 );
// Else flip and copy the image
for(int i=0;i<1;i++)
{
cascade = (CvHaarClassifierCascade*)cvLoad( cascade_name[i], 0, 0, 0 );
// Check whether the cascade has loaded successfully. Else report and error and quit
if( !cascade )
{
fprintf( stderr, "ERROR: Could not load classifier cascade\n" );
return -1;
}
// Call the function to detect and draw the face
if(detect_and_draw(frame_copy,cascade))
{
std::cout<<"Detected"<<std::endl;
}
}
// Wait for a while before proceeding to the next frame
if( cvWaitKey( 1 ) >= 0 )
break;
}
// Release the images, and capture memory
cvReleaseHaarClassifierCascade(&cascade);
cvReleaseImage( &frame_copy );
cvReleaseCapture( &capture );
cvReleaseMemStorage(&storage);
}
return 0;
}
// Function to detect and draw any faces that is present in an image
bool detect_and_draw( IplImage* img,CvHaarClassifierCascade* cascade )
{
int scale = 1;
// Create a new image based on the input image
IplImage* temp = cvCreateImage( cvSize(img->width/scale,img->height/scale), 8, 3 );
// Create two points to represent the face locations
CvPoint pt1, pt2;
int i;
// Clear the memory storage which was used before
cvClearMemStorage( storage );
// Find whether the cascade is loaded, to find the faces. If yes, then:
if( cascade )
{
// There can be more than one face in an image. So create a growable sequence of faces.
// Detect the objects and store them in the sequence
CvSeq* faces = cvHaarDetectObjects( img, cascade, storage,
1.1, 8, CV_HAAR_DO_CANNY_PRUNING,
cvSize(40, 40) );
// Loop the number of faces found.
for( i = 0; i < (faces ? faces->total : 0); i++ )
{
// Create a new rectangle for drawing the face
CvRect* r = (CvRect*)cvGetSeqElem( faces, i );
// Find the dimensions of the face,and scale it if necessary
pt1.x = r->x*scale;
pt2.x = (r->x+r->width)*scale;
pt1.y = r->y*scale;
pt2.y = (r->y+r->height)*scale;
// Draw the rectangle in the input image
cvRectangle( img, pt1, pt2, CV_RGB(255,0,0), 3, 8, 0 );
cv::Mat image(img);
cv::Rect rect;
rect = cv::Rect(pt1.x,pt1.y,(pt2.x-pt1.x),(pt2.y-pt1.y));
roiImg = image(rect);
cv::imshow("roi",roiImg);
///Send to arduino
detect_blink(roiImg);
}
}
cvShowImage( "original_frame", img );
if(i > 0)
return 1;
else
return 0;
cvReleaseImage( &temp );
}
Reference:
Website referred
I am trying to implement real-time tracking using templates. I wish to update the template with every frame. The main modifications I have done are:
1) separated the template matching and minmaxLoc into separate modules namely, TplMatch() and minmax() functions, respectively.
2) Inside the track() function, the select_flag is kept always true so that new template is copied to 'myTemplate' with every iteration.
3) The last 3 lines of function track() are to update the template (roiImg).
4) Also, I have removed any arguments to track() function, since, img and roiImg are global variables and hence no need to pass them to functions.
Following is the code:
#include <iostream>
#include "opencv2/opencv.hpp"
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/objdetect/objdetect.hpp>
#include <sstream>
using namespace cv;
using namespace std;
Point point1, point2; /* vertical points of the bounding box */
int drag = 0;
Rect rect; /* bounding box */
Mat img, roiImg; /* roiImg - the part of the image in the bounding box */
int select_flag = 0;
bool go_fast = false;
Mat mytemplate;
///------- template matching -----------------------------------------------------------------------------------------------
Mat TplMatch( Mat &img, Mat &mytemplate )
{
Mat result;
matchTemplate( img, mytemplate, result, CV_TM_SQDIFF_NORMED );
normalize( result, result, 0, 1, NORM_MINMAX, -1, Mat() );
return result;
}
///------- Localizing the best match with minMaxLoc ------------------------------------------------------------------------
Point minmax( Mat &result )
{
double minVal, maxVal;
Point minLoc, maxLoc, matchLoc;
minMaxLoc( result, &minVal, &maxVal, &minLoc, &maxLoc, Mat() );
matchLoc = minLoc;
return matchLoc;
}
///------- tracking --------------------------------------------------------------------------------------------------------
void track()
{
if (select_flag)
{
roiImg.copyTo(mytemplate);
// select_flag = false;
go_fast = true;
}
// imshow( "mytemplate", mytemplate ); waitKey(0);
Mat result = TplMatch( img, mytemplate );
Point match = minmax( result );
rectangle( img, match, Point( match.x + mytemplate.cols , match.y + mytemplate.rows ), CV_RGB(255, 255, 255), 0.5 );
std::cout << "match: " << match << endl;
/// latest match is the new template
Rect ROI = cv::Rect( match.x, match.y, mytemplate.cols, mytemplate.rows );
roiImg = img( ROI );
imshow( "roiImg", roiImg ); //waitKey(0);
}
///------- MouseCallback function ------------------------------------------------------------------------------------------
void mouseHandler(int event, int x, int y, int flags, void *param)
{
if (event == CV_EVENT_LBUTTONDOWN && !drag)
{
/// left button clicked. ROI selection begins
point1 = Point(x, y);
drag = 1;
}
if (event == CV_EVENT_MOUSEMOVE && drag)
{
/// mouse dragged. ROI being selected
Mat img1 = img.clone();
point2 = Point(x, y);
rectangle(img1, point1, point2, CV_RGB(255, 0, 0), 3, 8, 0);
imshow("image", img1);
}
if (event == CV_EVENT_LBUTTONUP && drag)
{
point2 = Point(x, y);
rect = Rect(point1.x, point1.y, x - point1.x, y - point1.y);
drag = 0;
roiImg = img(rect);
// imshow("MOUSE roiImg", roiImg); waitKey(0);
}
if (event == CV_EVENT_LBUTTONUP)
{
/// ROI selected
select_flag = 1;
drag = 0;
}
}
///------- Main() ----------------------------------------------------------------------------------------------------------
int main()
{
int k;
/*
///open webcam
VideoCapture cap(0);
if (!cap.isOpened())
return 1;*/
///open video file
VideoCapture cap;
cap.open( "Megamind.avi" );
if ( !cap.isOpened() )
{ cout << "Unable to open video file" << endl; return -1; }
/*
/// Set video to 320x240
cap.set(CV_CAP_PROP_FRAME_WIDTH, 320);
cap.set(CV_CAP_PROP_FRAME_HEIGHT, 240);*/
cap >> img;
GaussianBlur( img, img, Size(7,7), 3.0 );
imshow( "image", img );
while (1)
{
cap >> img;
if ( img.empty() )
break;
// Flip the frame horizontally and add blur
cv::flip( img, img, 1 );
GaussianBlur( img, img, Size(7,7), 3.0 );
if ( rect.width == 0 && rect.height == 0 )
cvSetMouseCallback( "image", mouseHandler, NULL );
else
track();
imshow("image", img);
// waitKey(100); k = waitKey(75);
k = waitKey(go_fast ? 30 : 10000);
if (k == 27)
break;
}
return 0;
}
The updated template is not being tracked. I am not able to figure out why this is happening since I am updating my template (roiImg) with each iteration. The match value from minmax() function is returning the same point (coordinates) every-time. Test video is availbale at: http://www.youtube.com/watch?v=vpnkk7N2E0Q&feature=youtu.be
Please look into it and guide ahead...thanks a lot!
I get your original code from this revision of your question: https://stackoverflow.com/revisions/20180073/3
I made the smallest change to your original code, my resulting code is the following:
#include <iostream>
#include "opencv2/opencv.hpp"
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/objdetect/objdetect.hpp>
#include <sstream>
using namespace cv;
using namespace std;
Point point1, point2; /* vertical points of the bounding box */
int drag = 0;
Rect rect; /* bounding box */
Mat img, roiImg; /* roiImg - the part of the image in the bounding box */
int select_flag = 0;
bool go_fast = false;
Mat mytemplate;
///------- template matching -----------------------------------------------------------------------------------------------
Mat TplMatch( Mat &img, Mat &mytemplate )
{
Mat result;
matchTemplate( img, mytemplate, result, CV_TM_SQDIFF_NORMED );
normalize( result, result, 0, 1, NORM_MINMAX, -1, Mat() );
return result;
}
///------- Localizing the best match with minMaxLoc ------------------------------------------------------------------------
Point minmax( Mat &result )
{
double minVal, maxVal;
Point minLoc, maxLoc, matchLoc;
minMaxLoc( result, &minVal, &maxVal, &minLoc, &maxLoc, Mat() );
matchLoc = minLoc;
return matchLoc;
}
///------- tracking --------------------------------------------------------------------------------------------------------
void track()
{
if (select_flag)
{
//roiImg.copyTo(mytemplate);
// select_flag = false;
go_fast = true;
}
// imshow( "mytemplate", mytemplate ); waitKey(0);
Mat result = TplMatch( img, mytemplate );
Point match = minmax( result );
rectangle( img, match, Point( match.x + mytemplate.cols , match.y + mytemplate.rows ), CV_RGB(255, 255, 255), 0.5 );
std::cout << "match: " << match << endl;
/// latest match is the new template
Rect ROI = cv::Rect( match.x, match.y, mytemplate.cols, mytemplate.rows );
roiImg = img( ROI );
roiImg.copyTo(mytemplate);
imshow( "roiImg", roiImg ); //waitKey(0);
}
///------- MouseCallback function ------------------------------------------------------------------------------------------
void mouseHandler(int event, int x, int y, int flags, void *param)
{
if (event == CV_EVENT_LBUTTONDOWN && !drag)
{
/// left button clicked. ROI selection begins
point1 = Point(x, y);
drag = 1;
}
if (event == CV_EVENT_MOUSEMOVE && drag)
{
/// mouse dragged. ROI being selected
Mat img1 = img.clone();
point2 = Point(x, y);
rectangle(img1, point1, point2, CV_RGB(255, 0, 0), 3, 8, 0);
imshow("image", img1);
}
if (event == CV_EVENT_LBUTTONUP && drag)
{
point2 = Point(x, y);
rect = Rect(point1.x, point1.y, x - point1.x, y - point1.y);
drag = 0;
roiImg = img(rect);
roiImg.copyTo(mytemplate);
// imshow("MOUSE roiImg", roiImg); waitKey(0);
}
if (event == CV_EVENT_LBUTTONUP)
{
/// ROI selected
select_flag = 1;
drag = 0;
}
}
///------- Main() ----------------------------------------------------------------------------------------------------------
int main()
{
int k;
/*
///open webcam
VideoCapture cap(0);
if (!cap.isOpened())
return 1;*/
///open video file
VideoCapture cap;
cap.open( "Megamind.avi" );
if ( !cap.isOpened() )
{ cout << "Unable to open video file" << endl; return -1; }
/*
/// Set video to 320x240
cap.set(CV_CAP_PROP_FRAME_WIDTH, 320);
cap.set(CV_CAP_PROP_FRAME_HEIGHT, 240);*/
cap >> img;
GaussianBlur( img, img, Size(7,7), 3.0 );
imshow( "image", img );
while (1)
{
cap >> img;
if ( img.empty() )
break;
// Flip the frame horizontally and add blur
cv::flip( img, img, 1 );
GaussianBlur( img, img, Size(7,7), 3.0 );
if ( rect.width == 0 && rect.height == 0 )
cvSetMouseCallback( "image", mouseHandler, NULL );
else
track();
imshow("image", img);
// waitKey(100); k = waitKey(75);
k = waitKey(go_fast ? 30 : 10000);
if (k == 27)
break;
}
return 0;
}
The video at https://www.youtube.com/watch?v=rBCopeneCos shows a test of the above program.
I would avoid the use of global variable because I think they do not help in understanding where the problems lie; furthermore I also would pay attention to the shallow vs deep copy for OpenCV's Mat class, as 1'' wrote in his answer:
OpenCV's Mat class is simply a header for the actual image data,
which it contains a pointer to. The operator= copies the pointer
(and the other information in the header, like the image dimensions)
so that both Mats share the same data. This means that modifying the
data in one Mat also changes it in the other. This is called a
"shallow" copy, since only the top layer (the header) is copied, not
the lower layer (the data).
To make a copy of the underlying data (called a "deep copy"), use the
clone() method. You can find information about it on the page that
you linked to.
Edit about the drift:
In comment Real-time template matching - OpenCV, C++, learner asks about the tracking drift.
Looking at the video https://www.youtube.com/watch?v=rBCopeneCos we see that at the beginning of the video the program is tracking the girl's right eye while at 0:15 it starts to track the girl's eyebrows, at 0:19 it starts to track the boy's eyebrows and it never tracks anymore the girl's eye, for example at 0:27 it tracks the girl's right eyebrow while the girl's right eye is clearly visible in the image.
This drift from tracking the eye to tracking the eyebrow is normal in a simple code as the one I posted and the explanation is quite simple: see the video at https://www.youtube.com/watch?v=sGHEu3u9XvI, the video starts with the tracking (contents of the black rectangle) of the playing card, then I remove the playing card from the scene and the tracking black rectangle "drifts" to the bottom left of the scene; after all we are continuosly updating the template and so the behavior is correct: the program stops to track the playing card and starts to track a white background and so you have the "drift"... in other words, your TplMatch() function will always return a valid result image and your current implementation of minmax() will always return a valid a minimum.
You can follow the OpenCV tutorial "Template Matching". Your track function may contain the code to find the template in the current frame; a simple code is based on the matchTemplate and minMaxLoc functions.
The interesting issue related to the "real-time" part of your question is to succeed in finding the match, if present, within the time between the current frame and the next one.
Edit:
The following quick-and-dirty code and the video at http://www.youtube.com/watch?v=vpnkk7N2E0Q&feature=youtu.be shows what I mean for tracking.
Since I do not have a webcam I slightly modified your code to just use a video, this one https://code.ros.org/trac/opencv/export/7237/trunk/opencv/samples/cpp/tutorial_code/HighGUI/video-input-psnr-ssim/video/Megamind.avi
I then add track function and some logic to slow down the video until I choose a ROI and after that playing the video at normal speed.
#include <iostream>
#include "opencv2/opencv.hpp"
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/objdetect/objdetect.hpp>
#include <sstream>
using namespace cv;
using namespace std;
Point point1, point2; /* vertical points of the bounding box */
int drag = 0;
Rect rect; /* bounding box */
Mat img, roiImg; /* roiImg - the part of the image in the bounding box */
int select_flag = 0;
bool go_fast = false;
Mat mytemplate;
void track(cv::Mat &img, const cv::Mat &templ, const cv::Rect &r )
{
static int n = 0;
if (select_flag)
{
templ.copyTo(mytemplate);
select_flag = false;
go_fast = true;
}
cv::Mat result;
/// Do the Matching and Normalize
matchTemplate( img, mytemplate, result, CV_TM_SQDIFF_NORMED );
normalize( result, result, 0, 1, NORM_MINMAX, -1, Mat() );
/// Localizing the best match with minMaxLoc
double minVal; double maxVal; Point minLoc; Point maxLoc;
Point matchLoc;
minMaxLoc( result, &minVal, &maxVal, &minLoc, &maxLoc, Mat() );
matchLoc = minLoc;
rectangle( img, matchLoc, Point( matchLoc.x + mytemplate.cols , matchLoc.y + mytemplate.rows ), CV_RGB(255, 255, 255), 3 );
std::cout << matchLoc << "\n";
}
///MouseCallback function
void mouseHandler(int event, int x, int y, int flags, void *param)
{
if (event == CV_EVENT_LBUTTONDOWN && !drag)
{
/* left button clicked. ROI selection begins */
point1 = Point(x, y);
drag = 1;
}
if (event == CV_EVENT_MOUSEMOVE && drag)
{
/* mouse dragged. ROI being selected */
Mat img1 = img.clone();
point2 = Point(x, y);
rectangle(img1, point1, point2, CV_RGB(255, 0, 0), 3, 8, 0);
imshow("image", img1);
}
if (event == CV_EVENT_LBUTTONUP && drag)
{
point2 = Point(x, y);
rect = Rect(point1.x, point1.y, x - point1.x, y - point1.y);
drag = 0;
roiImg = img(rect);
}
if (event == CV_EVENT_LBUTTONUP)
{
/* ROI selected */
select_flag = 1;
drag = 0;
}
}
///Main function
int main()
{
int k;
/*
VideoCapture cap(0);
if (!cap.isOpened())
return 1;
*/
VideoCapture cap;
//cap.open("~/Downloads/opencv-2.4.4/samples/cpp/tutorial_code/HighGUI/video-input-psnr-ssim/video/Megamind.avi");
cap.open("./Megamind.avi");
if (!cap.isOpened())
{
printf("Unable to open video file\n");
return -1;
}
/*
// Set video to 320x240
cap.set(CV_CAP_PROP_FRAME_WIDTH, 320);
cap.set(CV_CAP_PROP_FRAME_HEIGHT, 240);
*/
cap >> img;
imshow("image", img);
while (1)
{
cap >> img;
if (img.empty())
break;
if (rect.width == 0 && rect.height == 0)
cvSetMouseCallback("image", mouseHandler, NULL);
else
track(img, roiImg, rect);
if (select_flag == 1)
imshow("Template", roiImg);
imshow("image", img);
k = waitKey(go_fast ? 30 : 10000);
if (k == 27)
break;
}
return 0;
}
You can also have a general introduction to the subject starting from this wikipedia page http://en.wikipedia.org/wiki/Video_tracking
One whole day I have tried a lot to get all the related matches (with matchtemplate function) in sub-Image , which is ROI i have already extracted from the original image with the mousecallback function. So my code is below for the Matchingfunction
////Matching Function
void CTemplate_MatchDlg::OnBnTemplatematch()
{
namedWindow("reference",CV_WINDOW_AUTOSIZE);
while(true)
{
Mat ref = imread("img.jpg"); // Original Image
mod_ref = cvCreateMat(ref.rows,ref.cols,CV_32F);// resizing the image to fit in picture box
resize(ref,mod_ref,Size(),0.5,0.5,CV_INTER_AREA);
Mat tpl =imread("Template.jpg"); // TEMPLATE IMAGE
cvSetMouseCallback("reference",find_mouseHandler,0);
Mat aim=roiImg1.clone(); // SUB_IMAGE FROM ORIGINALIMAGE
// aim variable contains the ROI matrix
// next, want to perform template matching in that ROI // and display results on original image
if(select_flag1 == 1)
{
// imshow("ref",aim);
Mat res(aim.rows-tpl.rows+1, aim.cols-tpl.cols+1,CV_32FC1);
matchTemplate(aim, tpl, res, CV_TM_CCOEFF_NORMED);
threshold(res, res, 0.8, 1., CV_THRESH_TOZERO);
while (1)
{
double minval, maxval, threshold = 0.8;
Point minloc, maxloc;
minMaxLoc(res, &minval, &maxval, &minloc, &maxloc);
//// Draw Bound boxes for detected templates in sub matrix
if (maxval >= threshold)
{
rectangle(
aim,
maxloc,
Point(maxloc.x + tpl.cols, maxloc.y + tpl.rows),
CV_RGB(0,255,0), 1,8,0
);
floodFill(res, maxloc, cv::Scalar(0), 0, cv::Scalar(.1), cv::Scalar(1.));
}else
break;
}
}
////Bounding box for ROI selection with mouse
rectangle(mod_ref, rect2, CV_RGB(255, 0, 0), 1, 8, 0); // rect2 is ROI
// my idea is to get all the matches in ROI with bounding boxes
// no need to mark any matches outside the ROI
//Clearly i want to process only ROI
imshow("reference", mod_ref); // show the image with the results
waitKey(10);
}
//cvReleaseMat(&mod_ref);
destroyWindow("reference");
}
/// ImplementMouse Call Back
void find_mouseHandler(int event, int x, int y, int flags, void* param)
{
if (event == CV_EVENT_LBUTTONDOWN && !drag)
{
/* left button clicked. ROI selection begins*/
point1 = Point(x, y);
drag = 1;
}
if (event == CV_EVENT_MOUSEMOVE && drag)
{
/* mouse dragged. ROI being selected*/
Mat img3 = mod_ref.clone();
point2 = Point(x, y);
rectangle(img3, point1, point2, CV_RGB(255, 0, 0), 1, 8, 0);
imshow("reference", img3);
//
}
if (event == CV_EVENT_LBUTTONUP && drag)
{
Mat img4=mod_ref.clone();
point2 = Point(x, y);
rect1 = Rect(point1.x,point1.y,x-point1.x,y-point1.y);
drag = 0;
roiImg1 = mod_ref(rect1); //SUB_IMAGE MATRIX
imshow("reference", img4);
}
if (event == CV_EVENT_LBUTTONUP)
{
/* ROI selected */
select_flag1 = 1;
drag = 0;
}
}
build and debugging process successfully done. But, when I click the Match button in dialog I'm getting the error:
Unhandled exception at 0x74bf812f in Match.exe: Microsoft C++ exception: cv::Exception at memory location 0x001ae150..
So my idea is to get all the matches in the Sub-image when compare with the TEMPLATE IMAGE and show the final result (matches with bounding boxes) in the ORIGINAL IMAGE itself.
Anyone help me in this regard!! Help would be appreciated greatly!!
My code below is a modification of the original tutorial provided by OpenCV.
It loads an image from the command-line and displays it on the screen so the user can draw a rectangle somewhere to select the sub-image to be the template. After that operation is done, the sub-image will be inside a green rectangle:
Press any key to let the program perform the template matching. A new window titled "Template Match:" appears displaying the original image plus a blue rectangle that shows the matched area:
#include <cv.h>
#include <highgui.h>
#include <iostream>
const char* ref_window = "Draw rectangle to select template";
std::vector<cv::Point> rect_points;
void mouse_callback(int event, int x, int y, int flags, void* param)
{
if (!param)
return;
cv::Mat* ref_img = (cv::Mat*) param;
// Upon LMB click, store the X,Y coordinates to define a rectangle.
// Later this info is used to set a ROI in the reference image.
switch (event)
{
case CV_EVENT_LBUTTONDOWN:
{
if (rect_points.size() == 0)
rect_points.push_back(cv::Point(x, y));
}
break;
case CV_EVENT_LBUTTONUP:
{
if (rect_points.size() == 1)
rect_points.push_back(cv::Point(x, y));
}
break;
default:
break;
}
if (rect_points.size() == 2)
{
cv::rectangle(*ref_img,
rect_points[0],
rect_points[1],
cv::Scalar(0, 255, 0),
2);
cv::imshow(ref_window, *ref_img);
}
}
int main(int argc, char* argv[])
{
if (argc < 2)
{
std::cout << "Usage: " << argv[0] << " <image>" << std::endl;
return -1;
}
cv::Mat source = cv::imread(argv[1]); // original image
if (source.empty())
{
std::cout << "!!! Failed to load source image." << std::endl;
return -1;
}
// For testing purposes, our template image will be a copy of the original.
// Later we will present it in a window to the user, and he will select a region
// as a template, and then we'll try to match that to the original image.
cv::Mat reference = source.clone();
cv::namedWindow(ref_window, CV_WINDOW_AUTOSIZE);
cv::setMouseCallback(ref_window, mouse_callback, (void*)&reference);
cv::imshow(ref_window, reference);
cv::waitKey(0);
if (rect_points.size() != 2)
{
std::cout << "!!! Oops! You forgot to draw a rectangle." << std::endl;
return -1;
}
// Create a cv::Rect with the dimensions of the selected area in the image
cv::Rect template_roi = cv::boundingRect(rect_points);
// Create THE TEMPLATE image using the ROI from the rectangle
cv::Mat template_img = cv::Mat(source, template_roi);
// Create the result matrix
int result_cols = source.cols - template_img.cols + 1;
int result_rows = source.rows - template_img.rows + 1;
cv::Mat result;
// Do the matching and normalize
cv::matchTemplate(source, template_img, result, CV_TM_CCORR_NORMED);
cv::normalize(result, result, 0, 1, cv::NORM_MINMAX, -1, cv::Mat());
/// Localizing the best match with minMaxLoc
double min_val = 0, max_val = 0;
cv::Point min_loc, max_loc, match_loc;
int match_method = CV_TM_CCORR_NORMED;
cv::minMaxLoc(result, &min_val, &max_val, &min_loc, &max_loc, cv::Mat());
// When using CV_TM_CCORR_NORMED, max_loc holds the point with maximum
// correlation.
match_loc = max_loc;
// Draw a rectangle in the area that was matched
cv:rectangle(source,
match_loc,
cv::Point(match_loc.x + template_img.cols , match_loc.y + template_img.rows),
cv::Scalar(255, 0, 0), 2, 8, 0 );
imshow("Template Match:", source);
cv::waitKey(0);
return 0;
}
i'm trying to do a C++ program for fingertip recognition using kinect with the libraries OpenNi and OpenCV 2.4.
Here i list my main function (I intentionally avoided to write down here other function/classes that probably are not cause of the error):
// xml to initialize OpenNI
#define SAMPLE_XML_PATH "/home/jacopo/workspace/opencv_fingertip_detector/Sample-Tracking.xml"
int main(int argc, char ** argv)
{
XnStatus rc = XN_STATUS_OK;
xn::EnumerationErrors errors;
// Initialize OpenNI
rc = g_Context.InitFromXmlFile(SAMPLE_XML_PATH, g_ScriptNode, &errors);
CHECK_ERRORS(rc, errors, "InitFromXmlFile");
CHECK_RC(rc, "InitFromXmlFile");
rc = g_Context.FindExistingNode(XN_NODE_TYPE_DEPTH, g_DepthGenerator);
CHECK_RC(rc, "Find depth generator");
rc = g_Context.FindExistingNode(XN_NODE_TYPE_HANDS, g_HandsGenerator);
CHECK_RC(rc, "Find hands generator");
rc = g_Context.FindExistingNode(XN_NODE_TYPE_GESTURE, g_GestureGenerator);
CHECK_RC(rc, "Find gesture generator");
XnCallbackHandle h;
if (g_HandsGenerator.IsCapabilitySupported(XN_CAPABILITY_HAND_TOUCHING_FOV_EDGE))
{
g_HandsGenerator.GetHandTouchingFOVEdgeCap().RegisterToHandTouchingFOVEdge(TouchingCallback, NULL, h);
}
XnCallbackHandle hGestureIntermediateStageCompleted, hGestureProgress, hGestureReadyForNextIntermediateStage;
g_GestureGenerator.RegisterToGestureIntermediateStageCompleted(GestureIntermediateStageCompletedHandler, NULL, hGestureIntermediateStageCompleted);
g_GestureGenerator.RegisterToGestureReadyForNextIntermediateStage(GestureReadyForNextIntermediateStageHandler, NULL, hGestureReadyForNextIntermediateStage);
g_GestureGenerator.RegisterGestureCallbacks(NULL, GestureProgressHandler, NULL, hGestureProgress);
// Create NITE objects
g_pSessionManager = new XnVSessionManager;
rc = g_pSessionManager->Initialize(&g_Context, "Click,Wave", "RaiseHand");
CHECK_RC(rc, "SessionManager::Initialize");
g_pSessionManager->RegisterSession(NULL, SessionStarting, SessionEnding, FocusProgress);
g_pDrawer = new XnVPointDrawer(20, g_DepthGenerator);
g_pFlowRouter = new XnVFlowRouter;
g_pFlowRouter->SetActive(g_pDrawer);
g_pSessionManager->AddListener(g_pFlowRouter);
g_pDrawer->RegisterNoPoints(NULL, NoHands);
g_pDrawer->SetDepthMap(g_bDrawDepthMap);
// Initialization done. Start generating
rc = g_Context.StartGeneratingAll();
CHECK_RC(rc, "StartGenerating");
cv::Mat depthshow;
int key_pre = 0 ;
for( ; ; )
{
if (key_pre == 27)
break ;
XnMapOutputMode mode;
g_DepthGenerator.GetMapOutputMode(mode);
g_Context.WaitOneUpdateAll(g_DepthGenerator);
//TEST
g_DepthGenerator.GetMetaData(depthMD);
XnDepthPixel* pDepth = depthMD.WritableData();
// Update NITE tree
g_pSessionManager->Update(&g_Context);
if(g_pDrawer->handFound)
{
for (XnUInt y = 0; y < depthMD.YRes(); ++y)
{
for (XnUInt x = 0; x < depthMD.XRes(); ++x, ++pDepth)
{
if (*pDepth>g_pDrawer->pt3D.Z+50 || *pDepth<g_pDrawer->pt3D.Z-50)
{
*pDepth=0;
}
}
}
}
//for opencv Mat
cv::Mat depth16(480,640,CV_16SC1,(unsigned short*)depthMD.WritableData());
//convert depth 11 bit image 2 8 bit image
//depth16.convertTo(depthshow,CV_8U,-255/4096.0,255);
depth16.convertTo(depthshow, CV_8UC1, 0.025);
if(g_pDrawer->handFound)
{
cv::Point handPos(g_pDrawer->ptProjective.X, g_pDrawer->ptProjective.Y);
cv::circle(depthshow, handPos, 5, cv::Scalar(0xffff), 10, 8, 0);
Mat src_gray;
int thresh = 10;
RNG rng(12345);
blur( depthshow, src_gray, Size(3,3) );
Mat threshold_output;
vector<vector<Point> > contours;
vector<Vec4i> hierarchy;
/// Detect edges using Threshold
threshold( src_gray, threshold_output, thresh, 255, THRESH_BINARY );
/// Find contours
findContours( threshold_output, contours, hierarchy, CV_RETR_TREE, CV_CHAIN_APPROX_SIMPLE, Point(0, 0) );
/// Find the convex hull object for each contour
vector<vector<int> > hullsI (contours.size());
vector<vector<Point> > hullsP (contours.size() );
vector<vector<Vec4i> > defects (contours.size() );
for( int i = 0; i < contours.size(); i++ )
{
convexHull(contours[i], hullsI[i], false, false);
convexHull(contours[i], hullsP[i], false, true);
if (contours[i].size() >3 )
{
convexityDefects(contours[i], hullsI[i], defects[i]);
}
}
/// Draw contours + hull results
Mat drawing = Mat::zeros( threshold_output.size(), CV_8UC3 );
for( int i = 0; i < defects.size(); i++)
{
Vec4i defect(defects[i][0]);
Point start = contours[i][defect[0]];
Point end = contours[i][defect[1]];
Point far = contours[i][defect[2]];
std::cout << "Roba mia: "<< start << "\t" << end << "\t" << far << std::endl;
line(drawing, start, end, Scalar( 255, 0, 0 ), 1);
circle(drawing, far, 5, Scalar( 255, 0, 0 ), 1);
}
for( int i = 0; i < contours.size(); i++ )
{
Scalar color = Scalar( 255, 0, 0 );
drawContours( drawing, contours, i, color, 1, 8, vector<Vec4i>(), 0, Point() );
drawContours( drawing, hullsP, i, color, 1, 8, vector<Vec4i>(), 0, Point() );
}
/// Show in a window
namedWindow( "Hull demo", CV_WINDOW_AUTOSIZE );
imshow( "Hull demo", drawing );
}
cv::imshow("PrimeSense Nite Point Viewer", depthshow);
key_pre = cv::waitKey(33);
}
cv::destroyWindow("PrimeSense Nite Point Viewer");
CleanupExit();
return 0;
}
The segmentation fault happen when i try to use one of the variable start, end or far, indeed if I comment the three lines (std::cout, line and circle) right after I assign the three variables the code run normally without problems.
Does someone of you have an idea on how to solve this issue?
Thanks for reading
J.