Hey,
I'm doing a project to stabilize video sequence by using optical flow method.
I have done well the optical flow so far. But I have 2 branches in front of me to work on it..
1- after getting the optical flow, I have found the average of the image displacement and then I have subtracted the average from the features of the second frame, my question is what to do next?
2- Or I could use the openCV function in order stabilize image, which I calculated the transformation Matrix and then I used cvPerspectiveTransform then cvWarpPerspective, but I'm getting error which is "bad flag"
you can see the code, what I want is what to do to stabilize the image? I wanna any solution you can provide?
enter code here
#include <stdio.h>
#include <stdlib.h>
//#include "/usr/include/opencv/cv.h"
#include <cv.h>
#include <cvaux.h>
#include <highgui.h>
#include <math.h>
#include <iostream>
#define PI 3.1415926535898
double rads(double degs)
{
return (PI/180 * degs);
}
CvCapture *cap;
IplImage *img;
IplImage *frame;
IplImage *frame1;
IplImage *frame3;
IplImage *frame2;
IplImage *temp_image1;
IplImage *temp_image2;
IplImage *frame1_1C;
IplImage *frame2_1C;
IplImage *eig_image;
IplImage *temp_image;
IplImage *pyramid1 = NULL;
IplImage *pyramid2 = NULL;
char * mapx;
char * mapy;
int h;
int corner_count;
CvMat* M = cvCreateMat(3,3,CV_32FC1);
CvPoint p,q,l,s;
double hypotenuse;
double angle;
int line_thickness = 1, line_valid = 1, pos = 0;
CvScalar line_color;
CvScalar target_color[4] = { // in BGR order
{{ 0, 0, 255, 0 }}, // red
{{ 0, 255, 0, 0 }}, // green
{{ 255, 0, 0, 0 }}, // blue
{{ 0, 255, 255, 0 }} // yellow
};
inline static double square(int a)
{
return a * a;
}
char* IntToChar(int num){return NULL;}
/*{
char* retstr = static_cast<char*>(calloc(12, sizeof(char)));
if (sprintf(retstr, "%i", num) > 0)
{
return retstr;
}
else
{
return NULL;
}
}*/
inline static void allocateOnDemand( IplImage **img, CvSize size, int depth, int channels )
{
if ( *img != NULL )
return;
*img = cvCreateImage( size, depth, channels );
if ( *img == NULL )
{
fprintf(stderr, "Error: Couldn't allocate image. Out of memory?\n");
exit(-1);
}
}
void clearImage (IplImage *img)
{
for (int i=0; i<img->imageSize; i++)
img->imageData[i] = (char) 0;
}
int main()
{
cap = cvCaptureFromCAM(0);
//cap = cvCaptureFromAVI("/home/saif/Desktop/NAO.. the project/jj/Test3.avi");
CvSize frame_size;
// Reading the video's frame size
frame_size.height = (int) cvGetCaptureProperty( cap, CV_CAP_PROP_FRAME_HEIGHT );
frame_size.width = (int) cvGetCaptureProperty( cap, CV_CAP_PROP_FRAME_WIDTH );
cvNamedWindow("Optical Flow", CV_WINDOW_AUTOSIZE);
while(true)
{
frame = cvQueryFrame( cap );
if (frame == NULL)
{
fprintf(stderr, "Error: Hmm. The end came sooner than we thought.\n");
return -1;
}
// Allocating another image if it is not allocated already.
allocateOnDemand( &frame1_1C, frame_size, IPL_DEPTH_8U, 1 );
cvConvertImage(frame, frame1_1C, 0);
allocateOnDemand( &frame1, frame_size, IPL_DEPTH_8U, 3 );
cvConvertImage(frame, frame1, 0);
//Get the second frame of video.
frame = cvQueryFrame( cap );
if (frame == NULL)
{
fprintf(stderr, "Error: Hmm. The end came sooner than we thought.\n");
return -1;
}
if(!frame)
{
printf("bad video \n");
exit(0);
}
allocateOnDemand( &frame2_1C, frame_size, IPL_DEPTH_8U, 1 );
cvConvertImage(frame, frame2_1C, 0);
allocateOnDemand( &frame2, frame_size, IPL_DEPTH_8U, 3 );
cvConvertImage(frame, frame2, 0);
CvSize optical_flow_window = cvSize(5,5);
eig_image = cvCreateImage( frame_size, IPL_DEPTH_32F, 1 );
temp_image = cvCreateImage( frame_size, IPL_DEPTH_32F, 1 );
CvTermCriteria optical_flow_termination_criteria = cvTermCriteria( CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 20, .3 );
// Feature tracking
CvPoint2D32f frame1_features[4];
CvPoint2D32f frame2_features[4];
//cvCornerEigenValsAndVecs(eig_image, temp_image, 1 );
corner_count = 4;
cvGoodFeaturesToTrack(frame1_1C,eig_image , temp_image, frame1_features, &corner_count, 0.1, .01, NULL, 5, 1);
cvFindCornerSubPix( frame1_1C, frame1_features, corner_count,cvSize(5, 5) ,optical_flow_window , optical_flow_termination_criteria);
if ( corner_count <= 0 )
printf( "\nNo features detected.\n" );
else
printf( "\nNumber of features found = %d\n", corner_count );
//Locus Kande method.
char optical_flow_found_feature[20];
float optical_flow_feature_error[20];
allocateOnDemand( &pyramid1, frame_size, IPL_DEPTH_8U, 1 );
allocateOnDemand( &pyramid2, frame_size, IPL_DEPTH_8U, 1 );
cvCalcOpticalFlowPyrLK(frame1_1C, frame2_1C, pyramid1, pyramid2, frame1_features, frame2_features, corner_count, optical_flow_window, 5, optical_flow_found_feature, NULL, optical_flow_termination_criteria, NULL);
/*
double sumOfDistancesX = 0;
double sumOfDistancesY = 0;
int debug = 0;
CvFont font1, font2;
CvScalar red, green, blue;
IplImage* seg_in = NULL;
IplImage *seg_out = NULL;
allocateOnDemand( &seg_in, frame_size, IPL_DEPTH_8U, 3 );
allocateOnDemand( &seg_out, frame_size, IPL_DEPTH_8U, 3 );
clearImage(seg_in);
clearImage(seg_in);
for( int i=0; i <corner_count; i++ )
{
if ( optical_flow_found_feature[i] == 0 )
continue;
p.x = (int) frame1_features[i].x;
p.y = (int) frame1_features[i].y;
q.x = (int) frame2_features[i].x;
q.y = (int) frame2_features[i].y;
angle = atan2( (double) p.y - q.y, (double) p.x - q.x );
sumOfDistancesX += q.x - p.x;
sumOfDistancesY += q.y - p.y;
//cvRemap(frame2,frame1,averageDistanceX , averageDistanceY,CV_INTER_LINEAR | CV_WARP_FILL_OUTLIERS, cvScalarAll(0));
}
*/
/*
int averageDistanceX = sumOfDistancesX / corner_count;
int averageDistanceY = sumOfDistancesY / corner_count;
l.x = averageDistanceX - q.x;
s.y = averageDistanceY - q.y;
*/
#define cvWarpPerspectiveQMatrix cvGetPerspectiveTransform
//CvMat* N = cvCreateMat(3,3,CV_32FC1);
cvGetPerspectiveTransform(frame2_features, frame1_features, M);
cvPerspectiveTransform(frame1_features, frame2_features, M);
cvWarpPerspective( frame2_features, frame1_features, M,CV_INTER_LINEAR+CV_WARP_FILL_OUTLIERS,cvScalarAll(0) );
cvShowImage("Optical Flow", frame1);
cvWaitKey(50);
}
cvReleaseCapture(&cap);
cvReleaseMat(&M);
return 0;
}
You don't want to subtract the average displacement from the second image, you want to transform (move) the second image by the average displacement so that it "matches" the first. The "displacement" you use depends on your situation.
If your camera is shaking but stationary otherwise you want the use the average displacement between two consecutive frames as transformation vector for the second frame. With each new frame you compute the displacement between the transformed first frame and the new frame, and transform the new frame.
If your camera moves and shakes (i.e. helmet mounted camera on a mountainbiker) you want to first find an average displacement between frames over a few frames and then transform the individual frames in a sequence by the difference between that average displacement and the displacement between it and the previous frame.
EDIT
What you basically need to do for option 2 is calculate the average of the average movement between frames over the last few frames. This you could do in any number of ways, but I'd suggest using something like a kalman filter. Then, for a new frame you calculate the movement between that and the (corrected) previous frame. From the movement you get you subtract the average movement up to that point and you move the new frame by that difference.
Related
I have a video file from which I'm capturing a frames. I want to crop a triangle from captured frame and display it, but my program shows just a source frame.
Here is my code:
cv::Mat Detector::cropRegionOfInterest(cv::Mat& frame)
{
cv::Point corners[1][3];
corners[0][0] = cv::Point(0, frameHeight);
corners[0][1] = cv::Point(frameWidth, frameHeight);
corners[0][2] = cv::Point(frameWidth / 2, frameHeight / 2);
const cv::Point* cornerList[1] = { corners[0] };
int numPoints = 3;
int numPolygons = 1;
cv::Mat mask(frame.size(), CV_8UC1, cv::Scalar(0, 0, 0));
cv::fillPoly(mask, cornerList, &numPoints, numPolygons, cv::Scalar(255, 255, 255), 8);
cv::Mat result(frame.size(), CV_8UC3);
cv::bitwise_and(frame, mask, result);
return result;
}
Instead of displaying source frame I want it to display cropped triangle.
Since you're using CV_8UC3 as the type of result, I'm assuming (see the Edit at the end of the answer if that's not the case) that the input image frame also has 3 channels. In that case, I'm a bit surprised that you can even see the non-cropped image, as running your code simply throws an exception on my machine at the call to bitwise_and:
OpenCV(3.4.1) Error: Sizes of input arguments do not match
From the documentation, it seems to me that you can't mix different input and mask types. A quick and dirty solution is to split the input image into a vector of three channels, call bitwise_and for each of them, and then merge them back. The code below works for me:
#include <stdio.h>
#include <opencv2/opencv.hpp>
using namespace cv;
cv::Mat cropRegionOfInterest(cv::Mat& frame)
{
const int frameWidth=frame.cols-1;
const int frameHeight=frame.rows-1;
cv::Point corners[1][3];
corners[0][0] = cv::Point(0, frameHeight);
corners[0][1] = cv::Point(frameWidth, frameHeight);
corners[0][2] = cv::Point(frameWidth / 2, frameHeight / 2);
const cv::Point* cornerList[1] = { corners[0] };
int numPoints = 3;
int numPolygons = 1;
cv::Mat mask(frame.rows,frame.cols, CV_8UC1, cv::Scalar(0, 0, 0));
cv::fillPoly(mask, cornerList, &numPoints, numPolygons, cv::Scalar(255, 255, 255), 8);
std::vector<cv::Mat> src_channels;
std::vector<cv::Mat> result_channels;
cv::split(frame,src_channels);
for(int idx=0;idx<3;++idx)
{
result_channels.emplace_back(frame.rows,frame.cols,CV_8UC1);
cv::bitwise_and(src_channels[idx], mask,result_channels[idx]);
}
cv::Mat result;
cv::merge(result_channels,result);
return result;
}
int main(int argc, char** argv )
{
if ( argc != 2 )
{
printf("usage: DisplayImage.out <Image_Path>\n");
return -1;
}
Mat image;
image = imread( argv[1], 1 );
if ( !image.data )
{
printf("No image data \n");
return -1;
}
cv::Mat cropped=cropRegionOfInterest(image);
namedWindow("cropped Image", WINDOW_AUTOSIZE );
imshow("cropped Image", cropped);
waitKey(0);
return 0;
}
Edit: From your comments it seems that frameĀ is actually grayscale. In that case, nevermind all the code above, and just change cv::Mat result(frame.size(), CV_8UC3); to
cv::Mat result(frame.rows,frame.cols,CV_8UC1);
in your original code.
My difficulty in implementing optical flow method to track vehicles with input from the haar cascade.
So far I can only implement Optical flow but input not from Haar Cascade.
can you help me .. ??
this is my code
using namespace cv;
using namespace std;
int main()
{
int count= 0; double areax, areay, KoorX, KoorY;
Mat prev_frame, gray, temp, prev_img;
Mat frameROI, imgROI, frameLKP;
Mat ROI, prevROI, ROIOF;
//Parameter OFLKP
int win_size = 24;
int maxCorners =24;
int maxlevel =8;
TermCriteria termcrit(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS,20,0.01);
vector<uchar>found;
vector<float>error;
//Parameter Shi-Tomasi
vector<Point2f> prevcorners, corners;
double qualityLevel = 0.05; //0.4
double minDistance = 1; //2
int blockSize = 3;
bool useHarrisDetector = false;
double k = 0.04;
vector<Point2f> frame_corners; // CvPoint array of features
frame_corners.reserve(maxCorners);
vector<Point2f> prevframe_corners;
prevframe_corners.reserve(maxCorners);
//=======> Manggil dan buka video
VideoCapture video("Uji1.avi");
//=======> Manggil .xml
CascadeClassifier Casmobil;
String Casmobil_file = "car2500.xml";
Casmobil.load(Casmobil_file);
namedWindow("Video", 1);
namedWindow("Tracking OF", 1);
namedWindow("Deteksi Haar", 1);
video >> prev_frame;
Rect roi = Rect(50, 180, 540, 240);
prevROI=prev_frame(roi);
cvtColor(prevROI, gray, CV_BGR2GRAY);
gray.convertTo(prev_img, CV_8UC1);
while(true)
{
//=====> baca frame dr video
video >> frameROI;
//=====> ROI
Rect roi = Rect(50, 180, 540, 240);
Mat ROI=frameROI(roi);
cvtColor(ROI, gray, CV_BGR2GRAY); //=====> RGB to Grayscale
gray.convertTo(imgROI, CV_8UC1);
Mat ROIOF = frameROI(roi);
//======> Deteksi
vector<Rect> mobil;
Casmobil.detectMultiScale(gray, mobil, 1.1, 3,
CV_HAAR_DO_CANNY_PRUNING|CV_HAAR_SCALE_IMAGE,
Size(0,0));
//======> Gambar kotak
for (size_t i = 0; i < mobil.size(); i++)
{
Rect kotak = mobil[i];
areax = (mobil[i].x + mobil[i].width*0.5);
areay = (mobil[i].y + mobil[i].height*0.5);
Point center = Point(areax ,areay);
rectangle(ROI, kotak,CV_RGB(0,255,0),2,8,0);
circle(ROI, center, 3,CV_RGB(255, 0, 0),-2);
}
//prev_frame
goodFeaturesToTrack(imgROI, frame_corners,maxCorners,
qualityLevel,minDistance,Mat(),
blockSize,useHarrisDetector,k);
cornerSubPix(imgROI, frame_corners, Size(win_size, win_size),
Size( -1, -1 ),termcrit);
calcOpticalFlowPyrLK(imgROI, prev_img, frame_corners,
prevframe_corners, found, error,
Size(win_size, win_size), maxlevel,termcrit);
for( int j = 0; j < frame_corners.size(); j++ )
{
circle(ROIOF, frame_corners[j], 2, CV_RGB(255, 0, 0), -1);
circle(ROIOF, prevframe_corners[j], 2, CV_RGB(0, 0, 255), -1);
//circle(copy, corners[i], r, Scalar(rng.uniform(0,255), rng.uniform(0,255), rng.uniform(0,255)), -1, 8, 0 );
line(ROIOF, prevframe_corners[j], frame_corners[j], CV_RGB(0, 255, 0),2, 8, 0);
}
prev_img = imgROI.clone();
imshow("Video ", frameROI);
imshow("Deteksi Haar", ROI);
imshow("Tracking OF", ROIOF);
if(waitKey(400) >= 0) break;
}
return 0;
}
Thanks,,
do I need to replace the input image from goodfeaturesToTrack by croping images from Haar Results ??
like :
Mat Crop = imgROI(mobil[i]);
goodFeaturesToTrack(Crop,frame_corners,maxCorners,qualityLevel,minDistance,Mat(),blockSize,useHarrisDetector,k);
Some warnings appear in terminal during running:
OpenCV Error: Assertion failed(s>=0) in setSize, file /home/me/opencv2.4/modules/core/src/matrix.cpp, line 116
The program compiled without error and executes, the problem is the eye ROI size changes when user moves closer/farther away from webcam, due to the changing of size, the warning appears. I managed to solve these warnings by setting the eye ROI size equal to my eye template size. However, it ends up the program fails to classify user's eyes open/close because the minVal obtained is 0. The method used is OpenCV Template Matching. Alternatively, I fix my distance from webcam and fix the eye template size could avoid the warning. Every time warning appears, the program fails to classify open/close eyes. The program doesn't work effectively because sometimes it mistakenly classifies the open eyes as closed and vice versa.
Questions:
Is there any alternative to identify open and close eyes other than template matching?
Any ideas how to improve the program in classification of blinking?
Any working example that you know in opencv C/C++ API can classify open and close eyes and count accurately the blinking times?
static CvMemStorage* storage = 0;
// Create a new Haar classifier
static CvHaarClassifierCascade* cascade = 0;
// Function prototype for detecting and drawing an object from an image
bool detect_and_draw( IplImage* image ,CvHaarClassifierCascade* cascade);
const char *cascade_name[1]={"eyes.xml"};
cv::Mat roiImg;
int threshold_value = 200;
int threshold_type = 3;;
int const max_value = 255;
int const max_type = 4;
int const max_BINARY_value = 255;
int hough_thr = 35;
cv::Mat src_gray, dst;
using namespace cv;
Mat img1; Mat img2; Mat templ; Mat result;
const char* image_window = "Source Image";
const char* result_window = "Result window";
int match_method=0;
int max_Trackbar = 5;
int eye_open=0;
int eye_close=0;
//Matching with 2 images ,eye closed or open
void MatchingMethod(cv::Mat templ,int id )
{
/// Source image to display
cv::Mat img_display;
roiImg.copyTo( img_display );
/// Create the result matrix
int result_cols = roiImg.cols - templ.cols + 1;
int result_rows = roiImg.rows - templ.rows + 1;
result.create( result_cols, result_rows, CV_32FC1 );
/// Do the Matching and Normalize
cv::matchTemplate( roiImg, templ, result, match_method );
cv::normalize( result, result, 0, 1, NORM_MINMAX, -1, Mat() );
/// Localizing the best match with minMaxLoc
double minVal; double maxVal; Point minLoc; Point maxLoc;
cv::Point matchLoc;
cv::minMaxLoc( result, &minVal, &maxVal, &minLoc, &maxLoc, Mat() );
///Justing checkin the match template value reaching the threashold
if(id == 0 && (minVal < 0))
{
eye_open=eye_open+1;
if(eye_open == 1)
{
std::cout<<"Eye Open"<<std::endl;
eye_open=0;
eye_close=0;
}
}
else if(id == 1 && (minVal < 0))
eye_close=eye_close+1;
if(eye_close == 1)
{
std::cout<<"Eye Closed"<<std::endl;
eye_close=0;
system("python send_arduino.py");
}
/// For SQDIFF and SQDIFF_NORMED, the best matches are lower values. For all the other methods, the higher the better
if( match_method == CV_TM_SQDIFF || match_method == CV_TM_SQDIFF_NORMED )
{ matchLoc = minLoc; }
else
{ matchLoc = maxLoc; }
/// Show me what you got
cv::rectangle( img_display, matchLoc, Point( matchLoc.x + templ.cols , matchLoc.y + templ.rows ), Scalar::all(0), 2, 8, 0 );
cv::rectangle( result, matchLoc, Point( matchLoc.x + templ.cols , matchLoc.y + templ.rows ), Scalar::all(0), 2, 8, 0 );
cv::imshow( image_window, img_display );
cv::imshow( result_window, result );
return;
}
void detect_blink(cv::Mat roi)
{
try
{
MatchingMethod(img1,0);
MatchingMethod(img2,1);
}
catch( cv::Exception& e )
{
std::cout<<"An exception occued"<<std::endl;
}
}
// Main function, defines the entry point for the program.
int main( int argc, char** argv )
{
if(argc <= 1)
{
std::cout<<"\n Help "<<std::endl;
std::cout<<"\n ------------------------------------\n"<<std::endl;
std::cout<<"./blink_detect open_eye.jpg close_eye.jpg\n"<<std::endl;
std::cout<<"Eg :: ./blink_detect 2.jpg 3.jpg\n"<<std::endl;
std::cout<<"\n ------------------------------------\n"<<std::endl;
exit(0);
}
// Structure for getting video from camera or avi
CvCapture* capture = 0;
// Images to capture the frame from video or camera or from file
IplImage *frame, *frame_copy = 0;
// Used for calculations
int optlen = strlen("--cascade=");
// Input file name for avi or image file.
const char* input_name;
img1 = imread( argv[1], 1 );
img2 = imread( argv[2], 1 );
// Load the HaarClassifierCascade
/// Create windows
cv::namedWindow( image_window, CV_WINDOW_AUTOSIZE );
cv::namedWindow( result_window, CV_WINDOW_AUTOSIZE );
// Allocate the memory storage
storage = cvCreateMemStorage(0);
capture = cvCaptureFromCAM( 0);
// Create a new named window with title: result
cvNamedWindow( "original_frame", 1 );
// If loaded succesfully, then:
if( capture )
{
// Capture from the camera.
for(;;)
{
// Capture the frame and load it in IplImage
if( !cvGrabFrame( capture ))
break;
frame = cvRetrieveFrame( capture );
// If the frame does not exist, quit the loop
if( !frame )
break;
// Allocate framecopy as the same size of the frame
if( !frame_copy )
frame_copy = cvCreateImage( cvSize(frame->width,frame->height),
IPL_DEPTH_8U, frame->nChannels );
// Check the origin of image. If top left, copy the image frame to frame_copy.
if( frame->origin == IPL_ORIGIN_TL )
cvCopy( frame, frame_copy, 0 );
// Else flip and copy the image
for(int i=0;i<1;i++)
{
cascade = (CvHaarClassifierCascade*)cvLoad( cascade_name[i], 0, 0, 0 );
// Check whether the cascade has loaded successfully. Else report and error and quit
if( !cascade )
{
fprintf( stderr, "ERROR: Could not load classifier cascade\n" );
return -1;
}
// Call the function to detect and draw the face
if(detect_and_draw(frame_copy,cascade))
{
std::cout<<"Detected"<<std::endl;
}
}
// Wait for a while before proceeding to the next frame
if( cvWaitKey( 1 ) >= 0 )
break;
}
// Release the images, and capture memory
cvReleaseHaarClassifierCascade(&cascade);
cvReleaseImage( &frame_copy );
cvReleaseCapture( &capture );
cvReleaseMemStorage(&storage);
}
return 0;
}
// Function to detect and draw any faces that is present in an image
bool detect_and_draw( IplImage* img,CvHaarClassifierCascade* cascade )
{
int scale = 1;
// Create a new image based on the input image
IplImage* temp = cvCreateImage( cvSize(img->width/scale,img->height/scale), 8, 3 );
// Create two points to represent the face locations
CvPoint pt1, pt2;
int i;
// Clear the memory storage which was used before
cvClearMemStorage( storage );
// Find whether the cascade is loaded, to find the faces. If yes, then:
if( cascade )
{
// There can be more than one face in an image. So create a growable sequence of faces.
// Detect the objects and store them in the sequence
CvSeq* faces = cvHaarDetectObjects( img, cascade, storage,
1.1, 8, CV_HAAR_DO_CANNY_PRUNING,
cvSize(40, 40) );
// Loop the number of faces found.
for( i = 0; i < (faces ? faces->total : 0); i++ )
{
// Create a new rectangle for drawing the face
CvRect* r = (CvRect*)cvGetSeqElem( faces, i );
// Find the dimensions of the face,and scale it if necessary
pt1.x = r->x*scale;
pt2.x = (r->x+r->width)*scale;
pt1.y = r->y*scale;
pt2.y = (r->y+r->height)*scale;
// Draw the rectangle in the input image
cvRectangle( img, pt1, pt2, CV_RGB(255,0,0), 3, 8, 0 );
cv::Mat image(img);
cv::Rect rect;
rect = cv::Rect(pt1.x,pt1.y,(pt2.x-pt1.x),(pt2.y-pt1.y));
roiImg = image(rect);
cv::imshow("roi",roiImg);
///Send to arduino
detect_blink(roiImg);
}
}
cvShowImage( "original_frame", img );
if(i > 0)
return 1;
else
return 0;
cvReleaseImage( &temp );
}
Reference:
Website referred
I am trying to implement real-time tracking using templates. I wish to update the template with every frame. The main modifications I have done are:
1) separated the template matching and minmaxLoc into separate modules namely, TplMatch() and minmax() functions, respectively.
2) Inside the track() function, the select_flag is kept always true so that new template is copied to 'myTemplate' with every iteration.
3) The last 3 lines of function track() are to update the template (roiImg).
4) Also, I have removed any arguments to track() function, since, img and roiImg are global variables and hence no need to pass them to functions.
Following is the code:
#include <iostream>
#include "opencv2/opencv.hpp"
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/objdetect/objdetect.hpp>
#include <sstream>
using namespace cv;
using namespace std;
Point point1, point2; /* vertical points of the bounding box */
int drag = 0;
Rect rect; /* bounding box */
Mat img, roiImg; /* roiImg - the part of the image in the bounding box */
int select_flag = 0;
bool go_fast = false;
Mat mytemplate;
///------- template matching -----------------------------------------------------------------------------------------------
Mat TplMatch( Mat &img, Mat &mytemplate )
{
Mat result;
matchTemplate( img, mytemplate, result, CV_TM_SQDIFF_NORMED );
normalize( result, result, 0, 1, NORM_MINMAX, -1, Mat() );
return result;
}
///------- Localizing the best match with minMaxLoc ------------------------------------------------------------------------
Point minmax( Mat &result )
{
double minVal, maxVal;
Point minLoc, maxLoc, matchLoc;
minMaxLoc( result, &minVal, &maxVal, &minLoc, &maxLoc, Mat() );
matchLoc = minLoc;
return matchLoc;
}
///------- tracking --------------------------------------------------------------------------------------------------------
void track()
{
if (select_flag)
{
roiImg.copyTo(mytemplate);
// select_flag = false;
go_fast = true;
}
// imshow( "mytemplate", mytemplate ); waitKey(0);
Mat result = TplMatch( img, mytemplate );
Point match = minmax( result );
rectangle( img, match, Point( match.x + mytemplate.cols , match.y + mytemplate.rows ), CV_RGB(255, 255, 255), 0.5 );
std::cout << "match: " << match << endl;
/// latest match is the new template
Rect ROI = cv::Rect( match.x, match.y, mytemplate.cols, mytemplate.rows );
roiImg = img( ROI );
imshow( "roiImg", roiImg ); //waitKey(0);
}
///------- MouseCallback function ------------------------------------------------------------------------------------------
void mouseHandler(int event, int x, int y, int flags, void *param)
{
if (event == CV_EVENT_LBUTTONDOWN && !drag)
{
/// left button clicked. ROI selection begins
point1 = Point(x, y);
drag = 1;
}
if (event == CV_EVENT_MOUSEMOVE && drag)
{
/// mouse dragged. ROI being selected
Mat img1 = img.clone();
point2 = Point(x, y);
rectangle(img1, point1, point2, CV_RGB(255, 0, 0), 3, 8, 0);
imshow("image", img1);
}
if (event == CV_EVENT_LBUTTONUP && drag)
{
point2 = Point(x, y);
rect = Rect(point1.x, point1.y, x - point1.x, y - point1.y);
drag = 0;
roiImg = img(rect);
// imshow("MOUSE roiImg", roiImg); waitKey(0);
}
if (event == CV_EVENT_LBUTTONUP)
{
/// ROI selected
select_flag = 1;
drag = 0;
}
}
///------- Main() ----------------------------------------------------------------------------------------------------------
int main()
{
int k;
/*
///open webcam
VideoCapture cap(0);
if (!cap.isOpened())
return 1;*/
///open video file
VideoCapture cap;
cap.open( "Megamind.avi" );
if ( !cap.isOpened() )
{ cout << "Unable to open video file" << endl; return -1; }
/*
/// Set video to 320x240
cap.set(CV_CAP_PROP_FRAME_WIDTH, 320);
cap.set(CV_CAP_PROP_FRAME_HEIGHT, 240);*/
cap >> img;
GaussianBlur( img, img, Size(7,7), 3.0 );
imshow( "image", img );
while (1)
{
cap >> img;
if ( img.empty() )
break;
// Flip the frame horizontally and add blur
cv::flip( img, img, 1 );
GaussianBlur( img, img, Size(7,7), 3.0 );
if ( rect.width == 0 && rect.height == 0 )
cvSetMouseCallback( "image", mouseHandler, NULL );
else
track();
imshow("image", img);
// waitKey(100); k = waitKey(75);
k = waitKey(go_fast ? 30 : 10000);
if (k == 27)
break;
}
return 0;
}
The updated template is not being tracked. I am not able to figure out why this is happening since I am updating my template (roiImg) with each iteration. The match value from minmax() function is returning the same point (coordinates) every-time. Test video is availbale at: http://www.youtube.com/watch?v=vpnkk7N2E0Q&feature=youtu.be
Please look into it and guide ahead...thanks a lot!
I get your original code from this revision of your question: https://stackoverflow.com/revisions/20180073/3
I made the smallest change to your original code, my resulting code is the following:
#include <iostream>
#include "opencv2/opencv.hpp"
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/objdetect/objdetect.hpp>
#include <sstream>
using namespace cv;
using namespace std;
Point point1, point2; /* vertical points of the bounding box */
int drag = 0;
Rect rect; /* bounding box */
Mat img, roiImg; /* roiImg - the part of the image in the bounding box */
int select_flag = 0;
bool go_fast = false;
Mat mytemplate;
///------- template matching -----------------------------------------------------------------------------------------------
Mat TplMatch( Mat &img, Mat &mytemplate )
{
Mat result;
matchTemplate( img, mytemplate, result, CV_TM_SQDIFF_NORMED );
normalize( result, result, 0, 1, NORM_MINMAX, -1, Mat() );
return result;
}
///------- Localizing the best match with minMaxLoc ------------------------------------------------------------------------
Point minmax( Mat &result )
{
double minVal, maxVal;
Point minLoc, maxLoc, matchLoc;
minMaxLoc( result, &minVal, &maxVal, &minLoc, &maxLoc, Mat() );
matchLoc = minLoc;
return matchLoc;
}
///------- tracking --------------------------------------------------------------------------------------------------------
void track()
{
if (select_flag)
{
//roiImg.copyTo(mytemplate);
// select_flag = false;
go_fast = true;
}
// imshow( "mytemplate", mytemplate ); waitKey(0);
Mat result = TplMatch( img, mytemplate );
Point match = minmax( result );
rectangle( img, match, Point( match.x + mytemplate.cols , match.y + mytemplate.rows ), CV_RGB(255, 255, 255), 0.5 );
std::cout << "match: " << match << endl;
/// latest match is the new template
Rect ROI = cv::Rect( match.x, match.y, mytemplate.cols, mytemplate.rows );
roiImg = img( ROI );
roiImg.copyTo(mytemplate);
imshow( "roiImg", roiImg ); //waitKey(0);
}
///------- MouseCallback function ------------------------------------------------------------------------------------------
void mouseHandler(int event, int x, int y, int flags, void *param)
{
if (event == CV_EVENT_LBUTTONDOWN && !drag)
{
/// left button clicked. ROI selection begins
point1 = Point(x, y);
drag = 1;
}
if (event == CV_EVENT_MOUSEMOVE && drag)
{
/// mouse dragged. ROI being selected
Mat img1 = img.clone();
point2 = Point(x, y);
rectangle(img1, point1, point2, CV_RGB(255, 0, 0), 3, 8, 0);
imshow("image", img1);
}
if (event == CV_EVENT_LBUTTONUP && drag)
{
point2 = Point(x, y);
rect = Rect(point1.x, point1.y, x - point1.x, y - point1.y);
drag = 0;
roiImg = img(rect);
roiImg.copyTo(mytemplate);
// imshow("MOUSE roiImg", roiImg); waitKey(0);
}
if (event == CV_EVENT_LBUTTONUP)
{
/// ROI selected
select_flag = 1;
drag = 0;
}
}
///------- Main() ----------------------------------------------------------------------------------------------------------
int main()
{
int k;
/*
///open webcam
VideoCapture cap(0);
if (!cap.isOpened())
return 1;*/
///open video file
VideoCapture cap;
cap.open( "Megamind.avi" );
if ( !cap.isOpened() )
{ cout << "Unable to open video file" << endl; return -1; }
/*
/// Set video to 320x240
cap.set(CV_CAP_PROP_FRAME_WIDTH, 320);
cap.set(CV_CAP_PROP_FRAME_HEIGHT, 240);*/
cap >> img;
GaussianBlur( img, img, Size(7,7), 3.0 );
imshow( "image", img );
while (1)
{
cap >> img;
if ( img.empty() )
break;
// Flip the frame horizontally and add blur
cv::flip( img, img, 1 );
GaussianBlur( img, img, Size(7,7), 3.0 );
if ( rect.width == 0 && rect.height == 0 )
cvSetMouseCallback( "image", mouseHandler, NULL );
else
track();
imshow("image", img);
// waitKey(100); k = waitKey(75);
k = waitKey(go_fast ? 30 : 10000);
if (k == 27)
break;
}
return 0;
}
The video at https://www.youtube.com/watch?v=rBCopeneCos shows a test of the above program.
I would avoid the use of global variable because I think they do not help in understanding where the problems lie; furthermore I also would pay attention to the shallow vs deep copy for OpenCV's Mat class, as 1'' wrote in his answer:
OpenCV's Mat class is simply a header for the actual image data,
which it contains a pointer to. The operator= copies the pointer
(and the other information in the header, like the image dimensions)
so that both Mats share the same data. This means that modifying the
data in one Mat also changes it in the other. This is called a
"shallow" copy, since only the top layer (the header) is copied, not
the lower layer (the data).
To make a copy of the underlying data (called a "deep copy"), use the
clone() method. You can find information about it on the page that
you linked to.
Edit about the drift:
In comment Real-time template matching - OpenCV, C++, learner asks about the tracking drift.
Looking at the video https://www.youtube.com/watch?v=rBCopeneCos we see that at the beginning of the video the program is tracking the girl's right eye while at 0:15 it starts to track the girl's eyebrows, at 0:19 it starts to track the boy's eyebrows and it never tracks anymore the girl's eye, for example at 0:27 it tracks the girl's right eyebrow while the girl's right eye is clearly visible in the image.
This drift from tracking the eye to tracking the eyebrow is normal in a simple code as the one I posted and the explanation is quite simple: see the video at https://www.youtube.com/watch?v=sGHEu3u9XvI, the video starts with the tracking (contents of the black rectangle) of the playing card, then I remove the playing card from the scene and the tracking black rectangle "drifts" to the bottom left of the scene; after all we are continuosly updating the template and so the behavior is correct: the program stops to track the playing card and starts to track a white background and so you have the "drift"... in other words, your TplMatch() function will always return a valid result image and your current implementation of minmax() will always return a valid a minimum.
You can follow the OpenCV tutorial "Template Matching". Your track function may contain the code to find the template in the current frame; a simple code is based on the matchTemplate and minMaxLoc functions.
The interesting issue related to the "real-time" part of your question is to succeed in finding the match, if present, within the time between the current frame and the next one.
Edit:
The following quick-and-dirty code and the video at http://www.youtube.com/watch?v=vpnkk7N2E0Q&feature=youtu.be shows what I mean for tracking.
Since I do not have a webcam I slightly modified your code to just use a video, this one https://code.ros.org/trac/opencv/export/7237/trunk/opencv/samples/cpp/tutorial_code/HighGUI/video-input-psnr-ssim/video/Megamind.avi
I then add track function and some logic to slow down the video until I choose a ROI and after that playing the video at normal speed.
#include <iostream>
#include "opencv2/opencv.hpp"
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/objdetect/objdetect.hpp>
#include <sstream>
using namespace cv;
using namespace std;
Point point1, point2; /* vertical points of the bounding box */
int drag = 0;
Rect rect; /* bounding box */
Mat img, roiImg; /* roiImg - the part of the image in the bounding box */
int select_flag = 0;
bool go_fast = false;
Mat mytemplate;
void track(cv::Mat &img, const cv::Mat &templ, const cv::Rect &r )
{
static int n = 0;
if (select_flag)
{
templ.copyTo(mytemplate);
select_flag = false;
go_fast = true;
}
cv::Mat result;
/// Do the Matching and Normalize
matchTemplate( img, mytemplate, result, CV_TM_SQDIFF_NORMED );
normalize( result, result, 0, 1, NORM_MINMAX, -1, Mat() );
/// Localizing the best match with minMaxLoc
double minVal; double maxVal; Point minLoc; Point maxLoc;
Point matchLoc;
minMaxLoc( result, &minVal, &maxVal, &minLoc, &maxLoc, Mat() );
matchLoc = minLoc;
rectangle( img, matchLoc, Point( matchLoc.x + mytemplate.cols , matchLoc.y + mytemplate.rows ), CV_RGB(255, 255, 255), 3 );
std::cout << matchLoc << "\n";
}
///MouseCallback function
void mouseHandler(int event, int x, int y, int flags, void *param)
{
if (event == CV_EVENT_LBUTTONDOWN && !drag)
{
/* left button clicked. ROI selection begins */
point1 = Point(x, y);
drag = 1;
}
if (event == CV_EVENT_MOUSEMOVE && drag)
{
/* mouse dragged. ROI being selected */
Mat img1 = img.clone();
point2 = Point(x, y);
rectangle(img1, point1, point2, CV_RGB(255, 0, 0), 3, 8, 0);
imshow("image", img1);
}
if (event == CV_EVENT_LBUTTONUP && drag)
{
point2 = Point(x, y);
rect = Rect(point1.x, point1.y, x - point1.x, y - point1.y);
drag = 0;
roiImg = img(rect);
}
if (event == CV_EVENT_LBUTTONUP)
{
/* ROI selected */
select_flag = 1;
drag = 0;
}
}
///Main function
int main()
{
int k;
/*
VideoCapture cap(0);
if (!cap.isOpened())
return 1;
*/
VideoCapture cap;
//cap.open("~/Downloads/opencv-2.4.4/samples/cpp/tutorial_code/HighGUI/video-input-psnr-ssim/video/Megamind.avi");
cap.open("./Megamind.avi");
if (!cap.isOpened())
{
printf("Unable to open video file\n");
return -1;
}
/*
// Set video to 320x240
cap.set(CV_CAP_PROP_FRAME_WIDTH, 320);
cap.set(CV_CAP_PROP_FRAME_HEIGHT, 240);
*/
cap >> img;
imshow("image", img);
while (1)
{
cap >> img;
if (img.empty())
break;
if (rect.width == 0 && rect.height == 0)
cvSetMouseCallback("image", mouseHandler, NULL);
else
track(img, roiImg, rect);
if (select_flag == 1)
imshow("Template", roiImg);
imshow("image", img);
k = waitKey(go_fast ? 30 : 10000);
if (k == 27)
break;
}
return 0;
}
You can also have a general introduction to the subject starting from this wikipedia page http://en.wikipedia.org/wiki/Video_tracking
I'm new to OpenCV and trying some stuff. I want to detect a hand using a webcam and here is a simple code. But it gives me something like that:
Unhandled exception at 0x000000013f5b140b in HaarCascade.exe: 0xC0000005: Access violation reading location 0x0000000000000004.
#include <opencv2/opencv.hpp>
using namespace cv;
using namespace std;
IplImage* img = 0;
CvHaarClassifierCascade *cascade;
CvMemStorage *cstorage;
CvMemStorage *hstorage;
void detectObjects( IplImage *img );
int key;
int main( int argc, char** argv )
{
CvCapture *capture;
IplImage *frame;
// loads classifier for hand haar cascade
char *filename = "haarcascade_hand.xml";
cascade = ( CvHaarClassifierCascade* )cvLoad( "haarcascade_hand.xml", 0, 0, 0 );
// setup memory buffer
hstorage = cvCreateMemStorage( 0 );
cstorage = cvCreateMemStorage( 0 );
// initialize camera
capture = cvCaptureFromCAM( 0 );
// always check
//assert( cascade && storage && capture );
// create a window
cvNamedWindow( "Camera", 1 );
while(key!='q') {
// captures frame and check every frame
frame = cvQueryFrame( capture );
if( !frame ) break;
// detect objects and display video
detectObjects (frame );
// quit if user press 'q'
key = cvWaitKey( 10 );
}
// free memory
cvReleaseCapture( &capture );
cvDestroyAllWindows();
cvReleaseHaarClassifierCascade( &cascade );
cvReleaseMemStorage( &cstorage );
cvReleaseMemStorage( &hstorage );
return 0;
}
void detectObjects( IplImage *img )
{
int px;
int py;
int edge_thresh = 1;
IplImage *gray = cvCreateImage( cvSize(640,480), 8, 1 );
IplImage *edge = cvCreateImage( cvSize(640,480), 8, 1 );
// convert video image color
cvCvtColor(img,gray,CV_BGR2GRAY);
// set the converted image's origin
gray->origin=1;
// color threshold
cvThreshold(gray,gray,100,255,CV_THRESH_BINARY);
// smooths out image
cvSmooth(gray, gray, CV_GAUSSIAN, 11, 11);
// get edges
cvCanny(gray, edge, (float)edge_thresh, (float)edge_thresh*3, 5);
// detects circle
CvSeq* circle = cvHoughCircles(gray, cstorage, CV_HOUGH_GRADIENT, 1, gray->height/50, 5, 35);
// draws circle and its centerpoint
float* p = (float*)cvGetSeqElem( circle, 0 );
cvCircle( img, cvPoint(cvRound(p[0]),cvRound(p[1])), 3, CV_RGB(255,0,0), -1, 8, 0 );
cvCircle( img, cvPoint(cvRound(p[0]),cvRound(p[1])), cvRound(p[2]), CV_RGB(200,0,0), 1, 8, 0 );
px=cvRound(p[0]);
py=cvRound(p[1]);
// displays coordinates of circle's center
cout <<"(x,y) -> ("<<px<<","<<py<<")"<<endl;
// detects hand
CvSeq *hand = cvHaarDetectObjects(img, cascade, hstorage, 1.2, 2, CV_HAAR_DO_CANNY_PRUNING, cvSize(100, 100));
// draws red box around hand when detected
CvRect *r = ( CvRect* )cvGetSeqElem( hand, 0 );
cvRectangle( img,
cvPoint( r->x, r->y ),
cvPoint( r->x + r->width, r->y + r->height ),
CV_RGB( 255, 0, 0 ), 1, 8, 0 );
cvShowImage("Camera",img);
}
Image:
http://i.imgur.com/Dneiw.png
Thank you for all your responses.
There's a chance that cvLoad() failed because it didn't found the file. That's a problem because you use it later on, and if it's a NULL pointer it can crash your application:
But you'll never know this unless you test the return of the function:
cascade = ( CvHaarClassifierCascade* )cvLoad( "haarcascade_hand.xml", 0, 0, 0 );
if (!cascade)
// Print something to say it failed!