I have trained a YOLOv7 model on a custom dataset. I want to use the trained model in my C++ project. I tried to do so using .pt weights and .onnx weights, but I'm continuously getting errors.
As advised here, I performed 'Reparameterization' on the .pt file, before converting it into .onnx.
In the current trial, I followed this repository, and I used the following code:
#include <fstream>
#include <sstream>
#include <iostream>
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
using namespace cv;
using namespace dnn;
using namespace std;
struct Net_config
float confThreshold; // Confidence threshold
float nmsThreshold; // Non-maximum suppression threshold
string modelpath;
class YOLOV7
YOLOV7(Net_config config);
void detect(Mat& frame);
int inpWidth;
int inpHeight;
vector<string> class_names;
int num_class;
float confThreshold;
float nmsThreshold;
Net net;
void drawPred(float conf, int left, int top, int right, int bottom, Mat& frame, int classid);
YOLOV7::YOLOV7(Net_config config)
this->confThreshold = config.confThreshold;
this->nmsThreshold = config.nmsThreshold;
//this->net = readNetFromONNX(config.modelpath);
this->net = readNetFromONNX("yolov7.onnx");
ifstream ifs("coco.names");
string line;
while (getline(ifs, line)) this->class_names.push_back(line);
this->num_class = class_names.size();
size_t pos = config.modelpath.find("_");
int len = config.modelpath.length() - 6 - pos;
string hxw = config.modelpath.substr(pos + 1, len);
pos = hxw.find("x");
string h = hxw.substr(0, pos);
len = hxw.length() - pos;
string w = hxw.substr(pos + 1, len);
this->inpHeight = stoi(h);
this->inpWidth = stoi(w);
void YOLOV7::drawPred(float conf, int left, int top, int right, int bottom, Mat& frame, int classid) // Draw the predicted bounding box
//Draw a rectangle displaying the bounding box
rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 2);
//Get the label for the class name and its confidence
string label = format("%.2f", conf);
label = this->class_names[classid] + ":" + label;
//Display the label at the top of the bounding box
int baseLine;
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
top = max(top, labelSize.height);
//rectangle(frame, Point(left, top - int(1.5 * labelSize.height)), Point(left + int(1.5 * labelSize.width), top + baseLine), Scalar(0, 255, 0), FILLED);
putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0), 1);
void YOLOV7::detect(Mat& frame)
Mat blob = blobFromImage(frame, 1 / 255.0, Size(this->inpWidth, this->inpHeight), Scalar(0, 0, 0), true, false);
vector<Mat> outs;
this->net.forward(outs, this->net.getUnconnectedOutLayersNames());
int num_proposal = outs[0].size[0];
int nout = outs[0].size[1];
if (outs[0].dims > 2)
num_proposal = outs[0].size[1];
nout = outs[0].size[2];
outs[0] = outs[0].reshape(0, num_proposal);
/////generate proposals
vector<float> confidences;
vector<Rect> boxes;
vector<int> classIds;
float ratioh = (float)frame.rows / this->inpHeight, ratiow = (float)frame.cols / this->inpWidth;
int n = 0, row_ind = 0; ///cx,cy,w,h,box_score,class_score
float* pdata = (float*)outs[0].data;
for (n = 0; n < num_proposal; n++)
float box_score = pdata[4];
if (box_score > this->confThreshold)
Mat scores = outs[0].row(row_ind).colRange(5, nout);
Point classIdPoint;
double max_class_socre;
// Get the value and location of the maximum score
minMaxLoc(scores, 0, &max_class_socre, 0, &classIdPoint);
max_class_socre *= box_score;
if (max_class_socre > this->confThreshold)
const int class_idx = classIdPoint.x;
float cx = pdata[0] * ratiow; ///cx
float cy = pdata[1] * ratioh; ///cy
float w = pdata[2] * ratiow; ///w
float h = pdata[3] * ratioh; ///h
int left = int(cx - 0.5 * w);
int top = int(cy - 0.5 * h);
boxes.push_back(Rect(left, top, (int)(w), (int)(h)));
pdata += nout;
// Perform non maximum suppression to eliminate redundant overlapping boxes with
// lower confidences
vector<int> indices;
dnn::NMSBoxes(boxes, confidences, this->confThreshold, this->nmsThreshold, indices);
for (size_t i = 0; i < indices.size(); ++i)
int idx = indices[i];
Rect box = boxes[idx];
this->drawPred(confidences[idx], box.x, box.y,
box.x + box.width, box.y + box.height, frame, classIds[idx]);
int main()
Net_config YOLOV7_nets = { 0.3, 0.5, "yolov7.onnx" }; ////choices=["models/yolov7_640x640.onnx", "models/yolov7-tiny_640x640.onnx", "models/yolov7_736x1280.onnx", "models/yolov7-tiny_384x640.onnx", "models/yolov7_480x640.onnx", "models/yolov7_384x640.onnx", "models/yolov7-tiny_256x480.onnx", "models/yolov7-tiny_256x320.onnx", "models/yolov7_256x320.onnx", "models/yolov7-tiny_256x640.onnx", "models/yolov7_256x640.onnx", "models/yolov7-tiny_480x640.onnx", "models/yolov7-tiny_736x1280.onnx", "models/yolov7_256x480.onnx"]
YOLOV7 net(YOLOV7_nets);
string imgpath = "frame1.png";
Mat srcimg = imread(imgpath);
static const string kWinName = "Deep learning object detection in OpenCV";
namedWindow(kWinName, WINDOW_NORMAL);
imshow(kWinName, srcimg);
However, I got the following error:
OpenCV: terminate handler is called! The last OpenCV error is:
OpenCV(4.1.1) Error: Unsupported format or combination of formats (Failed to parse onnx model) in cv::dnn::dnn4_v20190621::ONNXImporter::ONNXImporter, file C:\opencv-4.1.1\modules\dnn\src\onnx\onnx_importer.cpp, line 57
Here is a link to my 'yolov7.onnx' file, and here is a link to 'frame1.png'
The model is trained to detect 1 class, which is 'Potholes' in roads.
Currently, I have visual studio 2019, and opencv 4.1.1.
Should I upgrade to another opencv version?
Pls guide me to any possible solutions, so that I can successfully deploy the YOLOv7 model using C++.
I upgraded to Opencv 4.6.0.
Also, I discovered that I wasn't placing the .onnx and the image files in the same folder as the .exe file.
The following is the code in it's final status (but as I said above, the paths of the .onnx file, and the image files should be specified correctly):
#include <fstream>
#include <sstream>
#include <iostream>
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
using namespace cv;
using namespace dnn;
using namespace std;
struct Net_config
float confThreshold; // Confidence threshold
float nmsThreshold; // Non-maximum suppression threshold
string modelpath;
class YOLOV7
YOLOV7(Net_config config);
void detect(Mat& frame);
int inpWidth;
int inpHeight;
vector<string> class_names;
int num_class;
float confThreshold;
float nmsThreshold;
Net net;
void drawPred(float conf, int left, int top, int right, int bottom, Mat& frame, int classid);
YOLOV7::YOLOV7(Net_config config)
this->confThreshold = config.confThreshold;
this->nmsThreshold = config.nmsThreshold;
//this->net = readNetFromONNX(config.modelpath);
this->net = readNetFromONNX("yolov7.onnx");
//ifstream ifs("coco.names");
ifstream ifs("Potholes.names");
string line;
while (getline(ifs, line)) this->class_names.push_back(line);
this->num_class = class_names.size();
this->inpHeight = 640;//stoi(h);
this->inpWidth = 640;//stoi(w);
void YOLOV7::drawPred(float conf, int left, int top, int right, int bottom, Mat& frame, int classid) // Draw the predicted bounding box
//Draw a rectangle displaying the bounding box
rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 2);
//Get the label for the class name and its confidence
string label = format("%.2f", conf);
label = this->class_names[classid] + ":" + label;
//Display the label at the top of the bounding box
int baseLine;
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
top = max(top, labelSize.height);
//rectangle(frame, Point(left, top - int(1.5 * labelSize.height)), Point(left + int(1.5 * labelSize.width), top + baseLine), Scalar(0, 255, 0), FILLED);
putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0), 1);
void YOLOV7::detect(Mat& frame)
Mat blob = blobFromImage(frame, 1 / 255.0, Size(this->inpWidth, this->inpHeight), Scalar(0, 0, 0), true, false);
vector<Mat> outs;
this->net.forward(outs, this->net.getUnconnectedOutLayersNames());
int num_proposal = outs[0].size[0];
int nout = outs[0].size[1];
if (outs[0].dims > 2)
num_proposal = outs[0].size[1];
nout = outs[0].size[2];
outs[0] = outs[0].reshape(0, num_proposal);
/////generate proposals
vector<float> confidences;
vector<Rect> boxes;
vector<int> classIds;
float ratioh = (float)frame.rows / this->inpHeight, ratiow = (float)frame.cols / this->inpWidth;
int n = 0, row_ind = 0; ///cx,cy,w,h,box_score,class_score
float* pdata = (float*)outs[0].data;
for (n = 0; n < num_proposal; n++)
float box_score = pdata[4];
if (box_score > this->confThreshold)
Mat scores = outs[0].row(row_ind).colRange(5, nout);
Point classIdPoint;
double max_class_socre;
// Get the value and location of the maximum score
minMaxLoc(scores, 0, &max_class_socre, 0, &classIdPoint);
max_class_socre *= box_score;
if (max_class_socre > this->confThreshold)
const int class_idx = classIdPoint.x;
float cx = pdata[0] * ratiow; ///cx
float cy = pdata[1] * ratioh; ///cy
float w = pdata[2] * ratiow; ///w
float h = pdata[3] * ratioh; ///h
int left = int(cx - 0.5 * w);
int top = int(cy - 0.5 * h);
boxes.push_back(Rect(left, top, (int)(w), (int)(h)));
pdata += nout;
// Perform non maximum suppression to eliminate redundant overlapping boxes with
// lower confidences
vector<int> indices;
dnn::NMSBoxes(boxes, confidences, this->confThreshold, this->nmsThreshold, indices);
for (size_t i = 0; i < indices.size(); ++i)
int idx = indices[i];
Rect box = boxes[idx];
this->drawPred(confidences[idx], box.x, box.y,
box.x + box.width, box.y + box.height, frame, classIds[idx]);
int main()try
int img_index = 0;
Net_config YOLOV7_nets = { 0.3, 0.5, "yolov7.onnx" }; ////choices=["models/yolov7_640x640.onnx", "models/yolov7-tiny_640x640.onnx", "models/yolov7_736x1280.onnx", "models/yolov7-tiny_384x640.onnx", "models/yolov7_480x640.onnx", "models/yolov7_384x640.onnx", "models/yolov7-tiny_256x480.onnx", "models/yolov7-tiny_256x320.onnx", "models/yolov7_256x320.onnx", "models/yolov7-tiny_256x640.onnx", "models/yolov7_256x640.onnx", "models/yolov7-tiny_480x640.onnx", "models/yolov7-tiny_736x1280.onnx", "models/yolov7_256x480.onnx"]
YOLOV7 net(YOLOV7_nets);
while (img_index <= 822)
string base_path = "D:/Post_Grad/STDF/Depth_estimation-master/workspace/test_vid/pngFrames/frame";
//string imgpath = "frame1.png";
string imgpath = base_path + to_string(img_index) + ".png";
Mat srcimg = imread(imgpath);
static const string kWinName = "Deep learning object detection in OpenCV";
namedWindow(kWinName, WINDOW_NORMAL);
imshow(kWinName, srcimg);
catch (const std::exception& e)
std::cerr << e.what() << std::endl;
**Requirements: **
(1) Build OpenCV with Cuda and compile in C++
(2) Version = OpenCV Latest Version
(3) Build and Compile OpenCV Link: https://techawarey.com/programming/install-opencv-c-c-in-ubuntu-18-04-lts-step-by-step-guide/#Summary
(4) Library samples_utility is here: https://github.com/opencv/opencv_contrib/blob/master/modules/tracking/samples/samples_utility.hpp
(4) Compile Program Command: g++ test.cpp -o testoutput -std=c++11 'pkg-config --cflags --libs opencv'
(5) Run Program Command: ./testoutput
Code is working fine but not accurate
Step: 1
Read Frame from Camera
Select ROI(Region of Interest)
After that start KCF tracker with Sobal Features Extractor
Tracking the selected object.
Step: 2
Failure detect
After that call template matching function called MatchingMethod()
Run template matching
Get x, y value from template matching
After that reinitialize KCF tracker with Sobal Features Extractor.
This code is fine for still object when the object is moving the tracker false detection. I want to improve accuracy and reduce false detection.
#include <opencv2/core/utility.hpp>
#include <opencv2/tracking.hpp>
#include <opencv2/videoio.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/core/ocl.hpp>
#include <iostream>
#include <cstring>
#include <unistd.h>
#include "sample_utility.hpp"
#include <thread>
#include <opencv2/cudaimgproc.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/calib3d.hpp>
#include <opencv2/cudaarithm.hpp>
#include <iomanip>
#include <stdlib.h>
#include <unistd.h>
using namespace cv;
using namespace std;
// Convert to string
#define SSTR( x ) static_cast< std::ostringstream & >( \
( std::ostringstream() << std::dec << x ) ).str()
/// Global Variables
struct Array {
int arr[2];
Mat img;
Mat templ;
Mat result_h;
bool flag = true;
int match_method = 5;
int i=0;
int max_Trackbar = 5;
float fps;
int seconds = 0;
// Function Headers
void delay();
// prototype of the functino for feature extractor
void sobelExtractor(const Mat img, const Rect roi, Mat& feat);
struct Array MatchingMethod( int, void* );
int main(int argc, char **argv)
TrackerKCF::Params param;
param.compress_feature = true;
param.compressed_size = 2;
param.desc_npca = 0;
param.desc_pca = TrackerKCF::GRAY | TrackerKCF::CN;
param.detect_thresh = 0.8;
// create a tracker object
Ptr<TrackerKCF> tracker = TrackerKCF::create(param);
VideoCapture cap(0);
// Exit if video is not opened
//cout << "Could not read video file" << endl;
return 1;
// Read first frame
Mat frame;
bool ok = cap.read(frame);
// Define initial bounding box
//Rect bbox(x, y, w, h);
// Uncomment the line below to select a different bounding box
Rect bbox = selectROI(frame, false);
// Display bounding box.
rectangle(frame, bbox, Scalar( 255, 0, 0 ), 2, 1 );
int H, W, cW, cH;
// print(f"hight {H} , Width {W}")
H = display_height;
W = display_width;
// Center point of the screen
cW = int(W / 2);
cH = int(H / 2);
Point p1(cW, cH);
// get bounding box
Mat imCrop = frame(bbox);
imwrite("1.png", imCrop);
//quit if ROI was not selected
if(bbox.width==0 || bbox.height==0)
return 0;
//imshow("Tracking", frame);
tracker->init(frame, bbox);
Mat frame;
cap >> frame;
circle(frame, p1, 3, Scalar(0,255,0), -1);
// Start timer
if(bbox.width!=0 || bbox.height!=0){
double timer = (double)getTickCount();
// Update the tracking result
bool ok = tracker->update(frame, bbox);
//ok, bbox = tracker->update(frame);
// Calculate Frames per second (FPS)
fps = getTickFrequency() / ((double)getTickCount() - timer);
if (ok)
// Tracking success : Draw the tracked object
rectangle(frame, bbox, Scalar( 255, 0, 0 ), 2, 1 );
int xxx, yyy, height, width;
xxx = bbox.x;
yyy = bbox.y;
height = bbox.height;
width = bbox.width;
int diffX, diffY;
float cxROI, cyROI;
cxROI = int((xxx + (xxx + width)) / 2);
cyROI = int((yyy + (yyy + height)) / 2);
diffX = cxROI - cW;
diffY = cH - cyROI;
Point p(cxROI, cyROI);
circle(frame, p, 3, Scalar(128,0,0), -1);
putText(frame, "FPS : " + SSTR(int(fps)), Point(100,20), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(50,170,50), 2);
putText(frame, "Difference From X-Axis: "+SSTR(int(diffX)), Point(100, 50), FONT_HERSHEY_SIMPLEX, 0.6, Scalar(100, 200, 200), 2);
putText(frame, "Difference From Y-Axis: "+SSTR(int(diffY)), Point(100, 80), FONT_HERSHEY_SIMPLEX, 0.6, Scalar(100, 200, 200), 2);
// Tracking failure detected.
putText(frame, "Tracking failure detected", Point(100,110), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0,0,255),2);
templ = imread( "1.png", 1 );
struct Array a = MatchingMethod( 0, 0 );
cout<<"X: "<<a.arr[0]<<"\n";
cout<<"Y: "<<a.arr[1]<<"\n";
cout<<"Width: "<<w<<"\n";
cout<<"Height: "<<h<<"\n";
int xx, yy, ww, hh;
xx = a.arr[0];
yy = a.arr[1];
ww = w;
hh = h;
Rect bbox(xx, yy, ww, hh);
tracker = TrackerKCF::create(param);
tracker->init(frame, bbox);
//roi.x = MatchingMethod.
rectangle(frame, bbox, Scalar( 255, 0, 0 ), 2, 1 );
int diffX, diffY;
float cxROI, cyROI;
cxROI = int((xx + (xx + ww)) / 2);
cyROI = int((yy + (yy + hh)) / 2);
diffX = cxROI - cW;
diffY = cH - cyROI;
Point p(cxROI, cyROI);
circle(frame, p, 3, Scalar(128,0,0), -1);
// Display frame.
imshow("Tracking", frame);
// Exit if ESC pressed.
int k = waitKey(1);
if(k == 27)
return 0;
void sobelExtractor(const Mat img, const Rect roi, Mat& feat){
Mat sobel[2];
Mat patch;
Rect region=roi;
// extract patch inside the image
cvtColor(patch,patch, COLOR_BGR2GRAY);
// add some padding to compensate when the patch is outside image border
int addTop,addBottom, addLeft, addRight;
Sobel(patch, sobel[0], CV_32F,1,0,1);
Sobel(patch, sobel[1], CV_32F,0,1,1);
feat=feat/255.0-0.5; // normalize to range -0.5 .. 0.5
struct Array MatchingMethod( int, void* )
/// Source image to display
Mat frame;
struct Array a;
for(int i=1; i<=4; i++){
img.copyTo( frame );
// break;
cv::cuda::setDevice(0); // initialize CUDA
// convert from mat to gpumat
cv::cuda::GpuMat image_d(img);
cv::cuda::GpuMat templ_d(templ);
cv::cuda::GpuMat result;
// GPU -> NG
cv::Ptr<cv::cuda::TemplateMatching> alg =
cv::cuda::createTemplateMatching(image_d.type(), cv::TM_CCOEFF_NORMED);
alg->match(image_d, templ_d, result); // no return.
//cv::cuda::normalize(result, result, 0, 1, cv::NORM_MINMAX, -1);
double max_value, min_value;
cv::Point location;
cv::cuda::minMaxLoc(result, &min_value, &max_value, 0, &location);
double THRESHOLD = 3e-09; //0.3;
if( min_value <= THRESHOLD) {
//struct Array a;
a.arr[0] = location.x;
a.arr[1] = location.y;
return a;
flag = false;
Okey here is my answer to your question.
First of all, you are making a mistake by applying template matching when the tracker misses. Because template matching matches the feature if and only if it is totally same with the reference feature. So in your case, there will be shadows, light issues etc. in the environment, and you will never be able to get success results.
Secondly, if you delete the template matching scope, tracker will continue to search the target in the image effectively. Which changements I did in your code is listed below. With these changes, I got better results:
Delete the template matching scope
Decrease the detection threshold(param.detect_thresh) to 0.5
Create more tracker objects to catch the target: This change is the most important part. What I am suggesting is that create more and more tracker objects(in my case I did 4 tracker objects, but you can increase the number). Each tracker should get as input rectangle similar to ROI user chose but not the same coordinates. For example, if user chooses cv::Rect(200,200,400,400) then other tracker should get target as cv::Rect(180,190,400,400) , cv::Rect(220,180,400,400) ... and so on. Why you should do it because, tracker algorithm is feature based, so it will always try to get a similar features to the reference. By doing this, you will increase the feature references.
And here is my code to guide you:
#include <opencv2/core/utility.hpp>
#include <opencv2/tracking.hpp>
#include <opencv2/videoio.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/core/ocl.hpp>
#include <iostream>
#include <cstring>
#include <unistd.h>
#include <thread>
#include <opencv2/cudaimgproc.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/calib3d.hpp>
#include <opencv2/cudaarithm.hpp>
#include <iomanip>
#include <stdlib.h>
#include <unistd.h>
using namespace cv;
using namespace std;
// Convert to string
#define SSTR( x ) static_cast< std::ostringstream & >( \
( std::ostringstream() << std::dec << x ) ).str()
/// Global Variables
struct Array {
int arr[2];
Mat img;
Mat templ;
Mat result_h;
bool flag = true;
int match_method = 5;
int i=0;
int max_Trackbar = 5;
float fps;
int seconds = 0;
// Function Headers
void delay();
// prototype of the functino for feature extractor
void sobelExtractor(const Mat img, const Rect roi, Mat& feat);
struct Array MatchingMethod( int, void* );
int main(int argc, char **argv)
TrackerKCF::Params param;
param.compress_feature = true;
param.compressed_size = 2;
param.desc_npca = 0;
param.desc_pca = TrackerKCF::GRAY | TrackerKCF::CN;
param.detect_thresh = 0.5;
// create a tracker object
Ptr<TrackerKCF> tracker = TrackerKCF::create(param);
Ptr<TrackerKCF> tracker2 = TrackerKCF::create(param);
Ptr<TrackerKCF> tracker3 = TrackerKCF::create(param);
Ptr<TrackerKCF> tracker4 = TrackerKCF::create(param);
VideoCapture cap(0);
// Exit if video is not opened
//cout << "Could not read video file" << endl;
return 1;
// Read first frame
Mat frame;
bool ok = cap.read(frame);
// Define initial bounding box
//Rect bbox(x, y, w, h);
// Uncomment the line below to select a different bounding box
Rect2d bbox = selectROI(frame, false);
// Display bounding box.
rectangle(frame, bbox, Scalar( 255, 0, 0 ), 2, 1 );
int H, W, cW, cH;
// print(f"hight {H} , Width {W}")
H = frame.rows;
W = frame.cols;
// Center point of the screen
cW = int(W / 2);
cH = int(H / 2);
Point p1(cW, cH);
//quit if ROI was not selected
if(bbox.width==0 || bbox.height==0)
return 0;
//imshow("Tracking", frame);
tracker->init(frame, bbox);
tracker2->init(frame, cv::Rect2d(bbox.x-10,bbox.y-10, bbox.width,bbox.height));
tracker3->init(frame, cv::Rect2d(bbox.x+10,bbox.y+10, bbox.width,bbox.height));
tracker4->init(frame, cv::Rect2d(bbox.x+20,bbox.y+20, bbox.width,bbox.height));
Mat frame;
cap >> frame;
circle(frame, p1, 3, Scalar(0,255,0), -1);
// Start timer
if(bbox.width!=0 || bbox.height!=0){
double timer = (double)getTickCount();
// Update the tracking result
bool ok = tracker->update(frame, bbox);
bool ok2 = tracker->update(frame, bbox);
bool ok3 = tracker->update(frame, bbox);
bool ok4 = tracker->update(frame, bbox);
//ok, bbox = tracker->update(frame);
// Calculate Frames per second (FPS)
fps = getTickFrequency() / ((double)getTickCount() - timer);
if (ok || ok2 || ok3 || ok4)
// Tracking success : Draw the tracked object
rectangle(frame, bbox, Scalar( 255, 0, 0 ), 2, 1 );
int xxx, yyy, height, width;
xxx = bbox.x;
yyy = bbox.y;
height = bbox.height;
width = bbox.width;
int diffX, diffY;
float cxROI, cyROI;
cxROI = int((xxx + (xxx + width)) / 2);
cyROI = int((yyy + (yyy + height)) / 2);
diffX = cxROI - cW;
diffY = cH - cyROI;
Point p(cxROI, cyROI);
circle(frame, p, 3, Scalar(128,0,0), -1);
putText(frame, "FPS : " + SSTR(int(fps)), Point(100,20), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(50,170,50), 2);
putText(frame, "Difference From X-Axis: "+SSTR(int(diffX)), Point(100, 50), FONT_HERSHEY_SIMPLEX, 0.6, Scalar(100, 200, 200), 2);
putText(frame, "Difference From Y-Axis: "+SSTR(int(diffY)), Point(100, 80), FONT_HERSHEY_SIMPLEX, 0.6, Scalar(100, 200, 200), 2);
// Display frame.
imshow("Tracking", frame);
// Exit if ESC pressed.
int k = waitKey(1);
if(k == 27)
return 0;
void sobelExtractor(const Mat img, const Rect roi, Mat& feat){
Mat sobel[2];
Mat patch;
Rect region=roi;
// extract patch inside the image
cvtColor(patch,patch, COLOR_BGR2GRAY);
// add some padding to compensate when the patch is outside image border
int addTop,addBottom, addLeft, addRight;
Sobel(patch, sobel[0], CV_32F,1,0,1);
Sobel(patch, sobel[1], CV_32F,0,1,1);
feat=feat/255.0-0.5; // normalize to range -0.5 .. 0.5
struct Array MatchingMethod( int, void* )
/// Source image to display
Mat frame;
struct Array a;
for(int i=1; i<=4; i++){
img.copyTo( frame );
// break;
cv::cuda::setDevice(0); // initialize CUDA
// convert from mat to gpumat
cv::cuda::GpuMat image_d(img);
cv::cuda::GpuMat templ_d(templ);
cv::cuda::GpuMat result;
// GPU -> NG
cv::Ptr<cv::cuda::TemplateMatching> alg =
cv::cuda::createTemplateMatching(image_d.type(), cv::TM_CCOEFF_NORMED);
alg->match(image_d, templ_d, result); // no return.
//cv::cuda::normalize(result, result, 0, 1, cv::NORM_MINMAX, -1);
double max_value, min_value;
cv::Point location;
cv::cuda::minMaxLoc(result, &min_value, &max_value, 0, &location);
double THRESHOLD = 3e-09; //0.3;
if( min_value <= THRESHOLD) {
//struct Array a;
a.arr[0] = location.x;
a.arr[1] = location.y;
return a;
flag = false;
I'm using cairomm, opencv4, box2d(not revealed in below code) to make physics education video.
My plan is like this.
Many Sobject(Scientific obect) constitute a Scene.
So I decided to use inner-class.(I don't know it's best design, I'm newbie to programming)
My problem is in below.
cv::Mat Surface2Mat(Cairo::RefPtr<Cairo::ImageSurface> surface)
cv::Mat Frame(surface->get_height(), surface->get_width(), CV_8UC4, surface->get_data(), surface->get_stride());
cv::cvtColor(Frame, Frame, cv::ColorConversionCodes::COLOR_BGRA2BGR);
return Frame;
class Scene
int surface_width = 1920;
int surface_height = 1080;
int second = 4;
cv::Size resolution = {surface_width, surface_height};
const int mp4 = cv::VideoWriter::fourcc('a', 'v', 'c', '1');
cv::VideoWriter video;
double FPS;
class Sobject
double width;
double height;
double color[3] = {1, 1, 1};
Cairo::RefPtr<Cairo::Context> cr;
Cairo::RefPtr<Cairo::ImageSurface> surface;
surface = Cairo::ImageSurface::create(Cairo::Format::FORMAT_ARGB32, 1920, 1080);
cr = Cairo::Context::create(surface);
set_position(double x = 0, double y = 0)
cr->move_to(960, 540);
void drawCircle()
cr->arc(0, 0, 40, 0.0, 2 * M_PI);
Scene(int width = 1920, int height = 1080)
video.open("Scene.mp4", mp4, FPS = 30, resolution, true);
void write()
for (int i = 0; i < (FPS * second); i++)
The "video.write(Surface2Mat(surface));" at last makes problem.
How can I access to the surface?
You have not created an instance of Sobject. You can do something like:
class Scene
class Sobject
Sobject mSobject; // can have it public or private depending on other requirements
void write()
for (int i = 0; i < (FPS * second); i++)
// now that you have an instance of Sobject, you can use surface from it
I write program when window show face and some special points(68).
I use Haar casscade and FaceLandmarkLBF.I have problem in my program. When face have stable position face points are jitter(shaking). How I can fix that? Thanks.
#include <iostream>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/objdetect/objdetect.hpp>
#include <opencv2/face.hpp>
using cv::Scalar;
using cv::Point;
int main(int argc, char** argv)
cv::CascadeClassifier faceDetector("haarcascade_frontalface_alt2.xml");
cv::Ptr<cv::face::Facemark>facemark = cv::face::FacemarkLBF::create();
cv::VideoCapture vc(0);
while (true)
cv::Mat frame, gray;
cv::cvtColor(frame, gray, cv::COLOR_BGR2GRAY);
std::vector<cv::Rect> faces;
faceDetector.detectMultiScale(gray, faces);
std::vector< std::vector<cv::Point2f> > landmarks;
bool success = facemark->fit(frame, faces, landmarks);
for (size_t i = 0; i < landmarks.size(); i++)
for (size_t j = 0; j < landmarks[i].size(); j++)
cv::circle(frame, cv::Point(landmarks[i][j].x, landmarks[i][j].y), 2, Scalar(255, 0, 0), 2);
cv::imshow("1", frame);
if ((char)cv::waitKey(20) == 27)
return 0;
I saw #Nuzhny link : lkdemo.cpp. Not everything is clear for me.
I done rewrite my code but nothing changed:
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/objdetect/objdetect.hpp>
#include "opencv2/video/tracking.hpp"
#include <opencv2/face.hpp>
int main(int argc, char** argv)
cv::CascadeClassifier faceDetector("haarcascade_frontalface_alt2.xml");
cv::Ptr<cv::face::Facemark>facemark = cv::face::FacemarkLBF::create();
cv::VideoCapture vc(0);
cv::Mat gray, prevGray, image, frame;
cv::Size subPixWinSize(10, 10), winSize(64, 64);
cv::TermCriteria termcrit(cv::TermCriteria::COUNT | cv::TermCriteria::EPS, 20, 0.03);
std::vector<uchar> status;
std::vector<float> err;
std::vector<cv::Point2f> oldLandmarks;
std::vector< std::vector<cv::Point2f> > landmarks;
bool b = true;
while (true)
cv::cvtColor(frame, gray, cv::COLOR_BGR2GRAY);
std::vector<cv::Rect> faces;
faceDetector.detectMultiScale(gray, faces);
bool success = facemark->fit(frame, faces, landmarks);
if (!success)
cv::imshow("1", frame);
if (oldLandmarks.empty())
oldLandmarks = landmarks.front();
if (prevGray.empty())
calcOpticalFlowPyrLK(prevGray, gray, landmarks.front(), oldLandmarks, status, err, winSize, 3, termcrit, cv::OPTFLOW_LK_GET_MIN_EIGENVALS, 0.001);
for (size_t i = 0; i < oldLandmarks.size(); i++)
cv::circle(frame, cv::Point(oldLandmarks[i].x, oldLandmarks[i].y), 2, cv::Scalar(255, 0, 0), 2);
cv::imshow("1", frame);
std::swap(oldLandmarks, landmarks.front());
cv::swap(prevGray, gray);
if ((char)cv::waitKey(20) == 27)
return 0;
Only LK tracking may be not enough. I'm writing some simple application for correcting landmarks after LK with linear Kalman filter (EDIT 2 - remove prev landmarks):
#include <opencv2/opencv.hpp>
#include <opencv2/face.hpp>
class PointState
PointState(cv::Point2f point)
m_kalman(4, 2, 0, CV_64F)
void Update(cv::Point2f point)
cv::Mat measurement(2, 1, CV_64FC1);
if (point.x < 0 || point.y < 0)
measurement.at<double>(0) = m_point.x; //update using prediction
measurement.at<double>(1) = m_point.y;
m_isPredicted = true;
measurement.at<double>(0) = point.x; //update using measurements
measurement.at<double>(1) = point.y;
m_isPredicted = false;
// Correction
cv::Mat estimated = m_kalman.correct(measurement);
m_point.x = static_cast<float>(estimated.at<double>(0)); //update using measurements
m_point.y = static_cast<float>(estimated.at<double>(1));
cv::Point2f GetPoint() const
return m_point;
bool IsPredicted() const
return m_isPredicted;
cv::Point2f m_point;
cv::KalmanFilter m_kalman;
double m_deltaTime = 0.2;
double m_accelNoiseMag = 0.3;
bool m_isPredicted = false;
void Init()
m_kalman.transitionMatrix = (cv::Mat_<double>(4, 4) <<
1, 0, m_deltaTime, 0,
0, 1, 0, m_deltaTime,
0, 0, 1, 0,
0, 0, 0, 1);
m_kalman.statePre.at<double>(0) = m_point.x; // x
m_kalman.statePre.at<double>(1) = m_point.y; // y
m_kalman.statePre.at<double>(2) = 1; // init velocity x
m_kalman.statePre.at<double>(3) = 1; // init velocity y
m_kalman.statePost.at<double>(0) = m_point.x;
m_kalman.statePost.at<double>(1) = m_point.y;
m_kalman.processNoiseCov = (cv::Mat_<double>(4, 4) <<
pow(m_deltaTime, 4.0) / 4.0, 0, pow(m_deltaTime, 3.0) / 2.0, 0,
0, pow(m_deltaTime, 4.0) / 4.0, 0, pow(m_deltaTime, 3.0) / 2.0,
pow(m_deltaTime, 3.0) / 2.0, 0, pow(m_deltaTime, 2.0), 0,
0, pow(m_deltaTime, 3.0) / 2.0, 0, pow(m_deltaTime, 2.0));
m_kalman.processNoiseCov *= m_accelNoiseMag;
cv::setIdentity(m_kalman.measurementNoiseCov, cv::Scalar::all(0.1));
cv::setIdentity(m_kalman.errorCovPost, cv::Scalar::all(.1));
cv::Point2f Predict()
cv::Mat prediction = m_kalman.predict();
m_point.x = static_cast<float>(prediction.at<double>(0));
m_point.y = static_cast<float>(prediction.at<double>(1));
return m_point;
void TrackPoints(cv::Mat prevFrame, cv::Mat currFrame,
const std::vector<cv::Point2f>& currLandmarks,
std::vector<PointState>& trackPoints)
// Lucas-Kanade
cv::TermCriteria termcrit(cv::TermCriteria::COUNT | cv::TermCriteria::EPS, 30, 0.01);
cv::Size winSize(7, 7);
std::vector<uchar> status(trackPoints.size(), 0);
std::vector<float> err;
std::vector<cv::Point2f> newLandmarks;
std::vector<cv::Point2f> prevLandmarks;
std::for_each(trackPoints.begin(), trackPoints.end(), [&](const PointState& pts) { prevLandmarks.push_back(pts.GetPoint()); });
cv::calcOpticalFlowPyrLK(prevFrame, currFrame, prevLandmarks, newLandmarks, status, err, winSize, 3, termcrit, 0, 0.001);
for (size_t i = 0; i < status.size(); ++i)
if (status[i])
trackPoints[i].Update((newLandmarks[i] + currLandmarks[i]) / 2);
int main(int argc, char** argv)
cv::CascadeClassifier faceDetector("haarcascade_frontalface_alt2.xml");
cv::Ptr<cv::face::Facemark> facemark = cv::face::FacemarkLBF::create();
cv::VideoCapture cam(0, cv::CAP_DSHOW);
cv::namedWindow("Facial Landmark Detection", cv::WINDOW_NORMAL);
cv::Mat frame;
cv::Mat currGray;
cv::Mat prevGray;
std::vector<PointState> trackPoints;
while (cam.read(frame))
std::vector<cv::Rect> faces;
cv::cvtColor(frame, currGray, cv::COLOR_BGR2GRAY);
faceDetector.detectMultiScale(currGray, faces, 1.1, 3, cv::CASCADE_FIND_BIGGEST_OBJECT);
std::vector<std::vector<cv::Point2f>> landmarks;
bool success = facemark->fit(frame, faces, landmarks);
if (success)
if (prevGray.empty())
for (cv::Point2f lp : landmarks[0])
if (trackPoints.empty())
for (cv::Point2f lp : landmarks[0])
TrackPoints(prevGray, currGray, landmarks[0], trackPoints);
for (const PointState& tp : trackPoints)
cv::circle(frame, tp.GetPoint(), 3, tp.IsPredicted() ? cv::Scalar(0, 0, 255) : cv::Scalar(0, 255, 0), cv::FILLED);
for (cv::Point2f lp : landmarks[0])
cv::circle(frame, lp, 2, cv::Scalar(255, 0, 255), cv::FILLED);
cv::imshow("Facial Landmark Detection", frame);
if (cv::waitKey(1) == 27)
prevGray = currGray;
return 0;
So, the margenta points - raw landmarks and green points - corrected after LK+Kalman: result video.
You can change Kalman options with 2 constants:
double m_deltaTime = 0.2;
double m_accelNoiseMag = 0.3;
It's latency and noise.
I keep getting segmentation fault while trying to run this code. I tried running it by changing the data types. Is it a problem with data types such that uchar and float number of bytes do not match. If so what to do?
#include "opencv2/highgui/highgui.hpp"
#include <iostream>
using namespace cv;
using namespace std;
int main( int argc, const char** argv )
Mat img = imread("/home/sahiti/Downloads/images3.png", CV_LOAD_IMAGE_UNCHANGED); //read the image data in the file "MyPic.JPG" and store it in 'img'
if (img.empty()) //check whether the image is loaded or not
cout << "Error : Image cannot be loaded..!!" << endl;
//system("pause"); //wait for a key press
return -1;
Mat image;
int w,h;
Vec3f intensity = img.at<Vec3f>(w, h);
float blue = intensity.val[0];
float green = intensity.val[1];
float red = intensity.val[2];
float lumin=(0.2126 * red + 0.7152 *green + 0.0722 *blue);
Mat im;
imwrite( "/home/sahiti/gray_Image.png", im );
return 0;
This line
Vec3f intensity = img.at<Vec3f>(w, h);
should be
Vec3f intensity = image.at<Vec3f>(w, h);
Also this does not make sense:
That is trying to write a single result into a three channel image.
This might make more sense:
#include "opencv2/highgui/highgui.hpp"
#include <iostream>
using namespace cv;
using namespace std;
int main( int argc, const char** argv )
Mat img = imread("/home/sahiti/Downloads/images3.png", CV_LOAD_IMAGE_UNCHANGED); //read the image data in the file "MyPic.JPG" and store it in 'img'
if (img.empty()) //check whether the image is loaded or not
cout << "Error : Image cannot be loaded..!!" << endl;
//system("pause"); //wait for a key press
return -1;
Mat image;
Mat im(img.size(), CV_8UC1);
int w,h;
//Vec3f intensity = img.at<Vec3f>(w, h);
Vec3f intensity = image.at<Vec3f>(w, h);
float blue = intensity.val[0];
float green = intensity.val[1];
float red = intensity.val[2];
float lumin=(0.2126 * red + 0.7152 *green + 0.0722 *blue);
//Mat im;
imwrite( "/home/sahiti/gray_Image.png", im );
return 0;
I'm attempting to integrate SDL_ttf into a particle generator program that I wrote recently, but I keep getting a runtime error: Access violation reading location 0x00000044.. This occurs when I attempt to blit a text surface to the game screen. I've scanned through my code several times, and had similar issues in the past, but I can't figure out the issue. What's most likely the problem?
The error occurs in Text.h:
#pragma once
#include "System.h"
#include <iostream>
#include <sstream>
class Text {
Text(const char *fontaddress = "times.ttf", int size = 30, const char
*begintext = "0", int x = 0, int y = 0, SDL_Color color = { 255, 255, 255 }) {
font = TTF_OpenFont(fontaddress, size);
drawpos.x = x;
drawpos.y = y;
textcolor = color;
text = TTF_RenderText_Solid(font, begintext, textcolor);
if (!text)
std::cout << "damn" << std::endl;
~Text() {
if (text)
void SetText(const char *txt);
void SetText(int txt);
void DrawText();
TTF_Font *font;
SDL_Rect drawpos;
SDL_Color textcolor;
SDL_Surface *text;
inline void Text::SetText(const char *txt) {
if (text)
text = TTF_RenderText_Solid(font, txt, textcolor);
inline void Text::SetText(int txt) {
if (text)
std::stringstream s;
s << txt;
text = TTF_RenderText_Solid(font, s.str().c_str(), textcolor);
inline void Text::DrawText() {
static SDL_Surface *const screen = System::GetInstance().GetScreen();
SDL_BlitSurface(text, NULL, screen, &drawpos);
// ^ This line throws the exception
#pragma once
#include <SDL.h>
#include <iostream>
#include "System.h"
#include "Text.h"
class Bar {
Bar(const int xpos = 0, const int ypos = 0, unsigned width = 0, unsigned height = 0, const Uint32 c = SDL_MapRGB(System::GetInstance().GetScreen()- >format, 0, 0, 0), const char *address = "times.ttf",
int sz = 30, const char *bgtext = "0", SDL_Color co = { 255, 255, 255 }) : max_w(width), color(c), colortext(address, sz, bgtext, xpos - 50, ypos, co) {
bar.x = xpos;
bar.y = ypos;
bar.w = width;
bar.h = height;
modval = max_w / 255;
if (!modval)
modval = 1;
if (max_w < 255) {
std::cerr << "invalid width; below 255" << std::endl;
colorval = width / modval;
void Modify(int width_modifier);
void Draw();
Uint8 GetColorVal();
SDL_Rect bar;
unsigned max_w;
Uint32 color;
Uint8 modval; // number of pixels (in width) required to advance or decrement the color value by 1
Uint8 colorval; // the color value modified when width is changed. Determined by
// modval. This value will be used by RGBTable class for text (on-screen value display) and particle color creation
Text colortext;
//inlined methods
//modifies the width of a bar
inline void Bar::Modify(int width_modifier) {
if (bar.w + width_modifier < 0 || bar.w + width_modifier > max_w)
bar.w += width_modifier;
if (bar.w % modval == 0) {
colorval = bar.w / modval;
//draws bar to system screen
inline void Bar::Draw() {
static SDL_Surface *screen = System::GetInstance().GetScreen();
SDL_FillRect(screen, &bar, color);
//returns the 8bit color value represented by the width of a bar
inline Uint8 Bar::GetColorVal() {
return colorval;
#pragma once
#include "System.h"
#include "Bar.h"
#include <array>
class ColorTable {
static bool needupdate;
static ColorTable &GetInstance();
void Input();
void Draw();
Uint32 MakeColor();
ColorTable(int top_left_x, int top_left_y, unsigned width, int height, int sepval) {
static SDL_Surface *screen = System::GetInstance().GetScreen();
bars[0] = Bar(top_left_x, top_left_y, width, height, SDL_MapRGB(screen->format, 255, 0, 0));
bars[1] = Bar(top_left_x, top_left_y + height + sepval, width, height, SDL_MapRGB(screen->format, 0, 255, 0));
bars[2] = Bar(top_left_x, top_left_y + (sepval *2) + (height * 2), width, height, SDL_MapRGB(screen->format, 0, 0, 255));
activebar = bars.begin();
std::array<Bar, 3> bars;
std::array<Bar, 3>::iterator activebar;
const Uint8 *key = SDL_GetKeyboardState(NULL);
inline ColorTable &ColorTable::GetInstance() {
static ColorTable table(750, 620, 510, 50, 10);
return table;
inline void ColorTable::Draw() {
inline Uint32 ColorTable::MakeColor() {
static SDL_Surface *screen = System::GetInstance().GetScreen();
Uint8 r = bars[0].GetColorVal();
Uint8 g = bars[1].GetColorVal();
Uint8 b = bars[2].GetColorVal();
return SDL_MapRGB(screen->format, r, g, b);
Run the application in the debugger and break at the exception. Inspect the call stack. If you're lucky, it will trap at the point in your own code where the exception occurred. If the exception is not in your code module at the trap, click your way up the call stack (down the list presented there) until you reach the portion of your code where you can inspect the arguments of the caller, it's usually easy to spot where it went wrong at that point. 0x44 is suspiciously like ASCII '$', do you have a $ in your txt?