I'm trying to get to grips with Bag Of Words in c++ and I have some sample code, but this Error keeps on throwing it and I don't know why.
I'm completely new to this and am very much lost.
Here's the entirety of the code:
#include "stdafx.h"
#include <opencv/cv.h>
#include <opencv/highgui.h>
#include <opencv2/nonfree/features2d.hpp>
using namespace cv;
using namespace std;
#define DICTIONARY_BUILD 1 // set DICTIONARY_BUILD 1 to do Step 1, otherwise it goes to step 2
int _tmain(int argc, _TCHAR* argv[])
{
#if DICTIONARY_BUILD == 1
//Step 1 - Obtain the set of bags of features.
//to store the input file names
char * filename = new char[100];
//to store the current input image
Mat input;
//To store the keypoints that will be extracted by SIFT
vector<KeyPoint> keypoints;
//To store the SIFT descriptor of current image
Mat descriptor;
//To store all the descriptors that are extracted from all the images.
Mat featuresUnclustered;
//The SIFT feature extractor and descriptor
SiftDescriptorExtractor detector;
//I select 20 (1000/50) images from 1000 images to extract feature descriptors and build the vocabulary
for(int f=0;f<999;f+=50){
//create the file name of an image
sprintf(filename,"G:\\testimages\\image\\%i.jpg",f);
//open the file
input = imread(filename, CV_LOAD_IMAGE_GRAYSCALE); // -- Forgot to add in
//detect feature points
detector.detect(input, keypoints);
//compute the descriptors for each keypoint
detector.compute(input, keypoints,descriptor);
//put the all feature descriptors in a single Mat object
featuresUnclustered.push_back(descriptor);
//print the percentage
printf("%i percent done\n",f/10);
}
//Construct BOWKMeansTrainer
//the number of bags
int dictionarySize=200;
//define Term Criteria
TermCriteria tc(CV_TERMCRIT_ITER,100,0.001);
//retries number
int retries=1;
//necessary flags
int flags=KMEANS_PP_CENTERS;
//Create the BoW (or BoF) trainer
BOWKMeansTrainer bowTrainer(dictionarySize,tc,retries,flags);
//cluster the feature vectors
Mat dictionary;
dictionary=bowTrainer.cluster(featuresUnclustered); // -- BREAKS
//store the vocabulary
FileStorage fs("dictionary.yml", FileStorage::WRITE);
fs << "vocabulary" << dictionary;
fs.release();
#else
//Step 2 - Obtain the BoF descriptor for given image/video frame.
//prepare BOW descriptor extractor from the dictionary
Mat dictionary;
FileStorage fs("dictionary.yml", FileStorage::READ);
fs["vocabulary"] >> dictionary;
fs.release();
//create a nearest neighbor matcher
Ptr<DescriptorMatcher> matcher(new FlannBasedMatcher);
//create Sift feature point extracter
Ptr<FeatureDetector> detector(new SiftFeatureDetector());
//create Sift descriptor extractor
Ptr<DescriptorExtractor> extractor(new SiftDescriptorExtractor);
//create BoF (or BoW) descriptor extractor
BOWImgDescriptorExtractor bowDE(extractor,matcher);
//Set the dictionary with the vocabulary we created in the first step
bowDE.setVocabulary(dictionary);
//To store the image file name
char * filename = new char[100];
//To store the image tag name - only for save the descriptor in a file
char * imageTag = new char[10];
//open the file to write the resultant descriptor
FileStorage fs1("descriptor.yml", FileStorage::WRITE);
//the image file with the location. change it according to your image file location
sprintf(filename,"G:\\testimages\\image\\1.jpg");
//read the image
Mat img=imread(filename,CV_LOAD_IMAGE_GRAYSCALE);
//To store the keypoints that will be extracted by SIFT
vector<KeyPoint> keypoints;
//Detect SIFT keypoints (or feature points)
detector->detect(img,keypoints);
//To store the BoW (or BoF) representation of the image
Mat bowDescriptor;
//extract BoW (or BoF) descriptor from given image
bowDE.compute(img,keypoints,bowDescriptor);
//prepare the yml (some what similar to xml) file
sprintf(imageTag,"img1");
//write the new BoF descriptor to the file
fs1 << imageTag << bowDescriptor;
//You may use this descriptor for classifying the image.
//release the file storage
fs1.release();
#endif
printf("\ndone\n");
return 0;
}
But then it throws this up:
OpenCV Error: Assertion failed (data.dims <= 2 && type == CV_32F && K > 0) in cv::kmeans, file C:\buildslave64\win64_amdoc1\2_4_PackSlave-win32-vc11-shared\opencv\modules\core\src\matrix.cpp, line 2701
Help, please.
EDIT
Line that it breaks on:
dictionary = bowTrainer.cluster(featuresUnclustered); // -- Breaks
EDIT 2
Ive come across this, but i am unsure how to translate it to help with my cause.
I'm not 100% sure of what the code is doing since I'm not an OpenCV expert. However I can see that you are not initializing input in any way. This probably results in you not getting the descriptors you want, and thus not really doing anything. The code then probably breaks since it expects actual data in, but there is none.
In general, when dealing with OpenCV or other big "kind of messy" libraries I would advise you to proceed step by step, and checking that results are what you expect every step of the way. Copy-pasting a big blob of code and expecting it to work is never the best course of action.
if (allDescriptors.type() != CV_32F)
{
allDescriptors.convertTo(allDescriptors, CV_32F);
}
Make sure that your image directory in 1st step is correct. It should exist training images as 0.jpg, 50.jpg, ... etc. Cause in a lot of situations, this error occurs when image is not loaded. You can add following codes after imread to check. Hope it can work.
if(input.empty())
{
cout << "Error: Image cannot be loaded !" << endl;
system("Pause");
return -1;
}
Related
void SceneRecognition::BowRepresentation()
{
Mat dstGray2;
//Folder path is written and file names are taken according to that.
vector <String> fileNames;
String folder("airCond/Train/*.jpg");
glob(folder, fileNames,false);
//File names are checked.
for (auto t : fileNames)
{
cout << t << endl;
}
//Object is opened.
Ptr<SiftFeatureDetector> detector;
//Gray image holder is opened.
Mat dst, dstGray;
//Detector is created.
detector = SiftFeatureDetector::create();
//Keypoint vector is created.
vector<KeyPoint> keypoints;
//Object is opened.
Mat Desp;
Ptr<SiftDescriptorExtractor> extractor;
//Extractor is created.
extractor = SiftDescriptorExtractor::create();
Mat training_descriptors(1, extractor->descriptorSize(), extractor->descriptorType());
// Image matrices are read in a loop.
for (size_t i = 0; i < fileNames.size(); i++)
{
Mat im = imread(fileNames[i]);
//Image is converted to gray.
cvtColor(im, dstGray, COLOR_BGR2GRAY);
detector->detect(dstGray, keypoints);
//Descriptors are extracted.
extractor->compute(dstGray, keypoints, Desp);
training_descriptors.push_back(Desp);
}
cout << training_descriptors.size << endl;
/*Number of clusters are chosen as 1000.*/
//TermCriteria tc(TermCriteria::MAX_ITER + TermCriteria::EPS, 10, 0.001);
//int retries = 1;
//int flags = KMEANS_PP_CENTERS;
BOWKMeansTrainer bowTrainer(100);
bowTrainer.add(training_descriptors);
//Created descriptors are added.
cout << "a" << endl;
//Vocabulary is created by k-means clustering.
Mat vocabulary = bowTrainer.cluster();
}
When I run my code I get an error as followed: OpenCV(4.5.5) Error: Iterations do not converge (kmeans: can't update cluster center (check input for huge or NaN values)) in cv::generateCentersPP, file C:\Users\LENOVO\Desktop\openncvv\opencv-4.5.5\modules\core\src\kmeans.cpp, line 147
I tried different approaches but couldn't come up with an answer. Any suggestions?
One row of an input image
One of the image size is 75*144. Should I change something when I taking the image as input ?
Im writing program to classify objects using SVM and BoW. I am getting the following error when I try use the TrainData::create() member function to create the data necessary to train SVM classifier.
OpenCV Error: Assertion failed (responses.type() == CV_32F || responses.type() == CV_32S) in setData
This is my function to read the train data from a director, compute BoW histogram for each train image, create a matrix of all descriptors of all train images in a matrix and the create the train data, labels and then train the SVM
void trainClassifier(string dictionaryPath, string trainDataPath, string saveClassifierPath, int samples){
//Write file
FileStorage readFile(dictionaryPath, FileStorage::READ);
//Load into Dictionary matrix
readFile["Data"] >> dictionary;
if(dictionary.empty() == false)
{
cout << "Error loading visual vocalbulary" << endl;
}
//Set the Bow descripter with the dictionary
testBOW.setVocabulary(dictionary);
//Inititate variables
vector<KeyPoint> keypointTrain;
vector<DMatch> matchTrain;
Mat descriptorTrain;
//inputTrain -> input images, inputFeatures -> BoW descriptor output
Mat inputTrain;
Mat inputFeatures;
//Label array
vector<string> label;
//Create a string to read files from directory
string updatedDataPath;
for(int i = 1; i <= samples; i++)
{
//Update the string updateDataPath to correspond the image FILENAME with each iteration
updatedDataPath.append(trainDataPath);
updatedDataPath += to_string(i);
updatedDataPath.append(".JPEG");
//Read FILE from the updated datapath
inputTrain = imread(updatedDataPath);
//Convert to single channel, since classifier takes only single channel data
cvtColor(inputTrain, inputTrain, CV_BGR2GRAY);
//Generate BoW features/histogram for the train image
testBOW.compute(inputTrain, keypointTrain, inputFeatures);
//Load the data in the descriptor Matrix
descriptorTrain.push_back(inputFeatures);
//Generate label according to the sample
if(samples > 1 && samples <= 10)
{
label.push_back("OBJ1 POSSITIVE");
}
else if (samples > 11 && samples <= 20)
{
label.push_back("OBJ1 NEGATIVE");
}
//Reset data path
updatedDataPath.clear();
}
//Convert the descriptor matrix into 32-pt float to make it compatible with classifier
if(descriptorTrain.type() != CV_32F)
{
descriptorTrain.convertTo(descriptorTrain, CV_32F);
}
//Create train data using TrainData::create()
Ptr<TrainData> trainData = TrainData::create(descriptorTrain, ROW_SAMPLE, label);
//Iniitialize Support vector based classifier (SVM) to classify and detect object
Ptr<SVM>SVM = SVM::create();
SVM->setType(SVM::C_SVC);
SVM->setKernel(SVM::LINEAR);
SVM->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER, 100, 1e-6));
//Now train the SVM
SVM->trainAuto(trainData);
SVM->save(saveClassifierPath);
cout << "Classifier training status: SUCCESSFUL" << endl;}
Any help is appreciated. Thanks and cheers :)
You are using a vector<string> as the TrainData responses.
//Label array
vector<string> label;
// [long code]
//Create train data using TrainData::create()
Ptr<TrainData> trainData = TrainData::create(descriptorTrain, ROW_SAMPLE, label);
And it shoud be a Mat CV_32F or CV_32S, as the error says.
You can confirm that at:
documentation: TrainData::create(...)
source-code: TrainData::create(...) that calls setData(...)
This is my code for face recognition in videos. It runs without any error but it's prediction
is wrong most of the time.I am using LBPH face recognizer to recognize the faces.
I tried using haar cascades but it does not load. so i switched to LBHP.please help me to improve the prediction.
I am using gray scale cropped images of size 500 x 500 (pixels) for training the cascade classifier.
#include <opencv2/core/core.hpp>
#include <opencv2/contrib/contrib.hpp
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/objdetect/objdetect.hpp>
#include <iostream>
#include <fstream>
#include <sstream>
using namespace cv;
using namespace std;
static void read_csv(const string& filename, vector<Mat>& images, vector<int>& labels, char separator = ';') {
std::ifstream file(filename.c_str(), ifstream::in);
if (!file) {
string error_message = "No valid input file was given, please check the given filename.";
CV_Error(CV_StsBadArg, error_message);
}
string line, path, classlabel;
while (getline(file, line)) {
stringstream liness(line);
getline(liness, path, separator);
getline(liness, classlabel);
if(!path.empty() && !classlabel.empty()) {
images.push_back(imread(path, 0));
labels.push_back(atoi(classlabel.c_str()));
}
}
}
string g_listname_t[]=
{
"ajay","Aasai","famiz"
};
int main(int argc, const char *argv[]) {
// Check for valid command line arguments, print usage
// if no arguments were given.
//if (argc != 4) {
// cout << "usage: " << argv[0] << " </path/to/haar_cascade> </path/to/csv.ext> </path/to/device id>"<<endl;
// cout << "\t </path/to/haar_cascade> -- Path to the Haar Cascade for face detection." << endl;
// cout << "\t </path/to/csv.ext> -- Path to the CSV file with the face database." << endl;
// cout << "\t <device id> -- The webcam device id to grab frames from." << endl;
// exit(1);
//}
//// Get the path to your CSV:
//string fn_haar = string(argv[1]);
//string fn_csv = string(argv[2]);
//int deviceId = atoi(argv[3]);
//// Get the path to your CSV:
// please set the correct path based on your folder
string fn_haar = "lbpcascade_frontalface.xml";
string fn_csv = "reader.ext ";
int deviceId = 0; // here is my webcam Id.
// These vectors hold the images and corresponding labels:
vector<Mat> images;
vector<int> labels;
// Read in the data (fails if no valid input filename is given, but you'll get an error message):
try {
read_csv(fn_csv, images, labels);
} catch (cv::Exception& e) {
cerr << "Error opening file \"" << fn_csv << "\". Reason: " << e.msg << endl;
// nothing more we can do
exit(1);
}
// Get the height from the first image. We'll need this
// later in code to reshape the images to their original
// size AND we need to reshape incoming faces to this size:
int im_width = images[0].cols;
int im_height = images[0].rows;
// Create a FaceRecognizer and train it on the given images:
Ptr<FaceRecognizer> model = createLBPHFaceRecognizer();
model->train(images, labels);
cout<<("Facerecognizer created");
// That's it for learning the Face Recognition model. You now
// need to create the classifier for the task of Face Detection.
// We are going to use the haar cascade you have specified in the
// command line arguments:
CascadeClassifier lbp_cascade;
if ( ! lbp_cascade.load(fn_haar) )
{
cout<<("\nlbp cascade not loaded");
}
else
{
cout<<("\nlbp cascade loaded");
}
// Get a handle to the Video device:
VideoCapture cap(deviceId);
cout<<("\nvideo device is opened");
// Check if we can use this device at all:
if(!cap.isOpened()) {
cerr << "Capture Device ID " << deviceId << "cannot be opened." << endl;
return -1;
}
// Holds the current frame from the Video device:
Mat frame;
for(;;) {
cap >> frame;
// Clone the current frame:
Mat original = frame.clone();
cout<<("\nframe is cloned");
// Convert the current frame to grayscale:
Mat gray;
//gray = imread("G:\Picture\003.jpg",0);
cvtColor(original, gray, CV_BGR2GRAY);
imshow("gray image", gray);
// And display it:
char key1 = (char) waitKey(50);
// Find the faces in the frame:
cout<<("\ncolor converted");
vector< Rect_<int> > faces;
cout<<("\ndetecting faces");
lbp_cascade.detectMultiScale(gray, faces);
// At this point you have the position of the faces in
// faces. Now we'll get the faces, make a prediction and
// annotate it in the video. Cool or what?
cout<<("\nfaces detected\n");
cout<<faces.size();
for(int i = 0; i < faces.size(); i++)
{
// Process face by face:
cout<<("\nprocessing faces");
Rect face_i = faces[i];
// Crop the face from the image. So simple with OpenCV C++:
Mat face = gray(face_i);
// Resizing the face is necessary for Eigenfaces and Fisherfaces. You can easily
// verify this, by reading through the face recognition tutorial coming with OpenCV.
// Resizing IS NOT NEEDED for Local Binary Patterns Histograms, so preparing the
// input data really depends on the algorithm used.
//
// I strongly encourage you to play around with the algorithms. See which work best
// in your scenario, LBPH should always be a contender for robust face recognition.
//
// Since I am showing the Fisherfaces algorithm here, I also show how to resize the
// face you have just found:
/*Mat face_resized;
cv::resize(face, face_resized, Size(im_width, im_height), 1.0, 1.0, INTER_CUBIC);
// Now perform the prediction, see how easy that is:
cout<<("\nface resized");
imshow("resized face image", face_resized);*/
int prediction = model->predict(face);
cout<<("\nface predicted");
// And finally write all we've found out to the original image!
// First of all draw a green rectangle around the detected face:
cout<<("\nnow writing to original");
rectangle(original, face_i, CV_RGB(0, 255,0), 1);
// Create the text we will annotate the box with:
string box_text;
box_text = format( "Prediction =",prediction);
// Get stringname
if ( prediction >= 0 && prediction <=1 )
{
box_text.append( g_listname_t[prediction] );
}
else box_text.append( "Unknown" );
// Calculate the position for annotated text (make sure we don't
// put illegal values in there):
int pos_x = std::max(face_i.tl().x - 10, 0);
int pos_y = std::max(face_i.tl().y - 10, 0);
// And now put it into the image:
putText(original, box_text, Point(pos_x, pos_y), FONT_HERSHEY_PLAIN, 1.0, CV_RGB(0,255,0), 2.0);
}
// Show the result:
imshow("face_recognizer", original);
// And display it:
char key = (char) waitKey(50);
// Exit this loop on escape:
if(key == 27)
break;
}
return 0;
}
That is an expected result if you ask me, the code which you showed is the basic one to do recognition, there are some backdrops which we need to take care of before implementing.
1) the quality of training images, how did you crop them ?
do they contain any extra information apart from face, if you used haar classifier in our opencv data to crop faces, then, the images tend to contain extra information than the face, as the rectangles are a bit large in size when compared to face.
2) there might be a chance that, even the rotated faces might be trained, so, its tough to classify with the features of rotated faces.
3) how many images, you trained the recognizer with ?, it playes a crucial role.
Answer for the first question, is most likely to be out of opencv, we cant do much about it, as there is very less probability that, we ll find a face detector which is as good and as simple as haar detector, so, we could make this as an exemption, if we can adjust with an accuracy around 70 %.
the second problem could be solved with some preprocessing techniques on training and testing dataset.
Like., aligning faces which are being rotated
follow this link, very good suggestions for face alignment are being suggested.
How to align face images c++ opencv
the third problem is solved with good number of samples which is not a hard task to achieve, take care of alignment before training, so that correct features could be extracted to classify.
there might be other factors that can improve the accuracy which I might have missed.
I have this problem,
when i run with vs2010 (debug) (open cv 2.4.0) facerec_demo.cpp gaves me the program this error
OpenCV Error: Image step is wrong (The matrix is not continuous, thus its number of rows can not be changed) in unknown function, file ......\src\opencv\modul es\core>\src\matrix.cpp, line 801
This error derives me to this line in facerec.cpp
(Fisherfaces::train(InputArray src, InputArray _lbls)
Mat data = asRowMatrix(src, CV_64FC1); <-- this gets a exeption, not handled.
and a i use at pgm img database and this is my original *facerec_demo.cpp* file
#include "stdafx.h"
#include <opencv2/opencv.hpp>
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <sstream>
using namespace cv;
using namespace std;
vector<string> split_at_commas(const string& row)
{
vector<string> res;
istringstream buf(row);
string s;
while (getline(buf, s, ';'))
res.push_back(s);
return res;
}
Mat toGrayscale(InputArray _src) {
Mat src = _src.getMat();
// only allow one channel
if(src.channels() != 1)
CV_Error(CV_StsBadArg, "Only Matrices with one channel are supported");
// create and return normalized image
Mat dst;
cv::normalize(_src, dst, 0, 255, NORM_MINMAX, CV_8UC1);
return dst;
}
void read_csv(const string& filename, vector<Mat>& images, vector<int>& labels, char separator = ';') {
//std::ifstream file(filename.c_str(), ifstream::in);
std::ifstream file(_T("D:\\Users\\PC ACER\\Documents\\mycsv4.csv"));
if (!file)
throw std::exception();
string line="", path="", classlabel="";
while (getline(file, line)) {
//vector<string> values = split_at_commas(line);
stringstream liness(line);
getline(liness, path, ';');
getline(liness, classlabel);
images.push_back(imread(path, 0));
labels.push_back(atoi(classlabel.c_str()));
}
}
int main(int argc, const char *argv[]) {
// check for command line arguments
if (argc != 2) {
cout << "usage: " << argv[0] << " <csv.ext>" << endl;
exit(1);
}
// path to your CSV
string fn_csv = string(argv[1]);
// images and corresponding labels
vector<Mat> images;
vector<int> labels;
// read in the data
try {
read_csv(fn_csv, images, labels);
} catch (exception&) {
cerr << "Error opening file \"" << fn_csv << "\"." << endl;
exit(1);
}
// get width and height
//int width = images[0].cols;
int height = images[0].rows;
// get test instances
Mat testSample = images[images.size() - 1];
int testLabel = labels[labels.size() - 1];
// ... and delete last element
images.pop_back();
labels.pop_back();
// build the Fisherfaces model
Ptr<FaceRecognizer> model = createFisherFaceRecognizer();
model->train(images, labels);
// test model
int predicted = model->predict(testSample);
cout << "predicted class = " << predicted << endl;
cout << "actual class = " << testLabel << endl;
// get the eigenvectors
Mat W = model->eigenvectors();
// show first 10 fisherfaces
for (int i = 0; i < min(10, W.cols); i++) {
// get eigenvector #i
Mat ev = W.col(i).clone();
// reshape to original size AND normalize between [0...255]
Mat grayscale = toGrayscale(ev.reshape(1, height));
// show image (with Jet colormap)
Mat cgrayscale;
applyColorMap(grayscale, cgrayscale, COLORMAP_JET);
imshow(format("%d", i), cgrayscale);
}
waitKey(0);
return 0;
}
I see you are using OpenCV 2.4.0. As the developer I admit the confusion is my fault: I didn't thoroughly check the input data passed to the training method back then, so people passing wrongly aligned data got error messages like yours. Most likely the error you see happens, because your training images don't have equal size. This is necessary for the Eigenfaces and Fisherfaces algorithm (not for the Local Binary Patterns Histograms). OpenCV 2.4.0 just tries to reshape the data to a matrix and blows up with the error message you see; OpenCV 2.4.2 instead checks (before training) if the input data is correctly aligned and throws a meaningful exception... with a very clear message.
This post assumes it could also be due to linking the OpenCV libraries:
Getting OpenCV Error "Image step is wrong" in Fisherfaces.train() method
If it's not linking the libraries it might be due to the image size. Resizing your training images, can easily be done OpenCV with cv::resize:
http://docs.opencv.org/modules/imgproc/doc/geometric_transformations.html#resize
But you probably should consider to switch to OpenCV 2.4.2, where all this is added:
http://opencv.org/opencv-v2-4-2-released.html
This version also comes with an extensive documentation at:
http://docs.opencv.org/trunk/modules/contrib/doc/facerec/index.html
However if you can't change to OpenCV 2.4.2 and you'll need to stay with OpenCV 2.4.0, then you could also use libfacerec:
https://github.com/bytefish/libfacerec
This is the project, that got merged into OpenCV. I made sure it works with OpenCV 2.4.0 and it'll leave you with exactely the same interface as the OpenCV 2.4.2 version. So once you feel like updating to OpenCV 2.4.2, you'll only switch the includes.
I got the same OpenCv error, I try all help that I find here, and it still gives me an exception (exception happend on .Predict() statement).
Problem was in the size of images. Size of an Images must be less then 100px (<100px) (not sure if exactly less then 100,maybe 100 would still work).
I change my pictures size of 150:150 to 80:80 and its working!
Hope I help someone, because this was annoying error.
I answered this question on another post but I want to make sure people searching for help with this error are sure to find the answer.
when you make the model
Ptr<FaceRecognizer> model = createFisherFaceRecognizer();
You need to pass two params
createFisherFaceRecognizer(int num_components=0, double threshold=DBL_MAX);
This page has more information on how createFisherFaceRecognizer works
I have a project, which I want to detect objects in the images; my aim is to use HOG features. By using OpenCV SVM implementation , I could find the code for detecting people, and I read some papers about tuning the parameters in order to detect object instead of people. Unfortunately, I couldn't do that for a few reasons; first of all, I am probably tuning the parameters incorrectly, second of all, I am not a good programmer in C++ but I have to do it with C++/OpenCV... here you can find the code for detecting HOG features for people by using C++/OpenCV.
Let's say that I want to detect the object in this image. Now, I will show you what I have tried to change in the code but it didn't work out with me.
The code that I tried to change:
HOGDescriptor hog;
hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());
I tried to change getDefaultPeopleDetector() with the following parameters, but it didn't work:
(Size(64, 128), Size(16, 16), Size(8, 8), Size(8, 8), 9, 0,-1, 0, 0.2, true, cv::HOGDescriptor::DEFAULT_NLEVELS)
I then tried to make a vector, but when I wanted to print the results, it seems to be empty.
vector<float> detector;
HOGDescriptor hog(Size(64, 128), Size(16, 16), Size(8, 8), Size(8, 8), 9, 0,-1, 0, 0.2, true, cv::HOGDescriptor::DEFAULT_NLEVELS);
hog.setSVMDetector(detector);
Please, I need help solving this problem.
In order to detect arbitrary objects with using opencv HOG descriptors and SVM classifier, you need to first train the classifier. Playing with the parameters will not help here, sorry :( .
In broad terms, you will need to complete the following steps:
Step 1) Prepare some training images of the objects you want to detect (positive samples). Also you will need to prepare some images with no objects of interest (negative samples).
Step 2) Detect HOG features of the training sample and use this features to train an SVM classifier (also provided in OpenCV).
Step 3) Use the coefficients of the trained SVM classifier in HOGDescriptor::setSVMDetector() method.
Only then, you can use the peopledetector.cpp sample code, to detect the objects you want to detect.
I've been dealing with the same problem and surprised with the lack of some clean C++ solutions I have create ~> this wrapper of SVMLight <~, which is a static library that provides classes SVMTrainer and SVMClassifier that simplify the training to something like:
// we are going to use HOG to obtain feature vectors:
HOGDescriptor hog;
hog.winSize = Size(32,48);
// and feed SVM with them:
SVMLight::SVMTrainer svm("features.dat");
then for each training sample:
// obtain feature vector describing sample image:
vector<float> featureVector;
hog.compute(img, featureVector, Size(8, 8), Size(0, 0));
// and write feature vector to the file:
svm.writeFeatureVectorToFile(featureVector, true); // true = positive sample
till the features.dat file contains feature vectors for all samples and at the end you just call:
std::string modelName("classifier.dat");
svm.trainAndSaveModel(modelName);
Once you have a file with model (or features.dat that you can just train the classifier with):
SVMLight::SVMClassifier c(classifierModelName);
vector<float> descriptorVector = c.getDescriptorVector();
hog.setSVMDetector(descriptorVector);
...
vector<Rect> found;
Size padding(Size(0, 0));
Size winStride(Size(8, 8));
hog.detectMultiScale(segment, found, 0.0, winStride, padding, 1.01, 0.1);
just check the documentation of HOGDescriptor for more info :)
I have done similar things as you did: collect samples of positive and negative images using HOG to extract features of car, train the feature set using linear SVM (I use SVM light), then use the model to detect car using HOG multidetect function.
I get lot of false positives, then I retrain the data using positive samples and false positive+negative samples. The resulting model is then tested again. The resulting detection improves (less false positives) but the result is not satisfying (average 50% hit rate and 50% false positives). Tuning up multidetect parameters improve the result but not much (10% less false positives and increase in hit rate).
Edit
I can share you the source code if you'd like, and I am very open for discussion as I have not get satisfactory results using HOG. Anyway, I think the code can be good starting point on using HOG for training and detection
Edit: adding code
static void calculateFeaturesFromInput(const string& imageFilename, vector<float>& featureVector, HOGDescriptor& hog)
{
Mat imageData = imread(imageFilename, 1);
if (imageData.empty()) {
featureVector.clear();
printf("Error: HOG image '%s' is empty, features calculation skipped!\n", imageFilename.c_str());
return;
}
// Check for mismatching dimensions
if (imageData.cols != hog.winSize.width || imageData.rows != hog.winSize.height) {
featureVector.clear();
printf("Error: Image '%s' dimensions (%u x %u) do not match HOG window size (%u x %u)!\n", imageFilename.c_str(), imageData.cols, imageData.rows, hog.winSize.width, hog.winSize.height);
return;
}
vector<Point> locations;
hog.compute(imageData, featureVector, winStride, trainingPadding, locations);
imageData.release(); // Release the image again after features are extracted
}
...
int main(int argc, char** argv) {
// <editor-fold defaultstate="collapsed" desc="Init">
HOGDescriptor hog; // Use standard parameters here
hog.winSize.height = 128;
hog.winSize.width = 64;
// Get the files to train from somewhere
static vector<string> tesImages;
static vector<string> positiveTrainingImages;
static vector<string> negativeTrainingImages;
static vector<string> validExtensions;
validExtensions.push_back("jpg");
validExtensions.push_back("png");
validExtensions.push_back("ppm");
validExtensions.push_back("pgm");
// </editor-fold>
// <editor-fold defaultstate="collapsed" desc="Read image files">
getFilesInDirectory(posSamplesDir, positiveTrainingImages, validExtensions);
getFilesInDirectory(negSamplesDir, negativeTrainingImages, validExtensions);
/// Retrieve the descriptor vectors from the samples
unsigned long overallSamples = positiveTrainingImages.size() + negativeTrainingImages.size();
// </editor-fold>
// <editor-fold defaultstate="collapsed" desc="Calculate HOG features and save to file">
// Make sure there are actually samples to train
if (overallSamples == 0) {
printf("No training sample files found, nothing to do!\n");
return EXIT_SUCCESS;
}
/// #WARNING: This is really important, some libraries (e.g. ROS) seems to set the system locale which takes decimal commata instead of points which causes the file input parsing to fail
setlocale(LC_ALL, "C"); // Do not use the system locale
setlocale(LC_NUMERIC,"C");
setlocale(LC_ALL, "POSIX");
printf("Reading files, generating HOG features and save them to file '%s':\n", featuresFile.c_str());
float percent;
/**
* Save the calculated descriptor vectors to a file in a format that can be used by SVMlight for training
* #NOTE: If you split these steps into separate steps:
* 1. calculating features into memory (e.g. into a cv::Mat or vector< vector<float> >),
* 2. saving features to file / directly inject from memory to machine learning algorithm,
* the program may consume a considerable amount of main memory
*/
fstream File;
File.open(featuresFile.c_str(), ios::out);
if (File.good() && File.is_open()) {
File << "# Use this file to train, e.g. SVMlight by issuing $ svm_learn -i 1 -a weights.txt " << featuresFile.c_str() << endl; // Remove this line for libsvm which does not support comments
// Iterate over sample images
for (unsigned long currentFile = 0; currentFile < overallSamples; ++currentFile) {
storeCursor();
vector<float> featureVector;
// Get positive or negative sample image file path
const string currentImageFile = (currentFile < positiveTrainingImages.size() ? positiveTrainingImages.at(currentFile) : negativeTrainingImages.at(currentFile - positiveTrainingImages.size()));
// Output progress
if ( (currentFile+1) % 10 == 0 || (currentFile+1) == overallSamples ) {
percent = ((currentFile+1) * 100 / overallSamples);
printf("%5lu (%3.0f%%):\tFile '%s'", (currentFile+1), percent, currentImageFile.c_str());
fflush(stdout);
resetCursor();
}
// Calculate feature vector from current image file
calculateFeaturesFromInput(currentImageFile, featureVector, hog);
if (!featureVector.empty()) {
/* Put positive or negative sample class to file,
* true=positive, false=negative,
* and convert positive class to +1 and negative class to -1 for SVMlight
*/
File << ((currentFile < positiveTrainingImages.size()) ? "+1" : "-1");
// Save feature vector components
for (unsigned int feature = 0; feature < featureVector.size(); ++feature) {
File << " " << (feature + 1) << ":" << featureVector.at(feature);
}
File << endl;
}
}
printf("\n");
File.flush();
File.close();
} else {
printf("Error opening file '%s'!\n", featuresFile.c_str());
return EXIT_FAILURE;
}
// </editor-fold>
// <editor-fold defaultstate="collapsed" desc="Pass features to machine learning algorithm">
/// Read in and train the calculated feature vectors
printf("Calling SVMlight\n");
SVMlight::getInstance()->read_problem(const_cast<char*> (featuresFile.c_str()));
SVMlight::getInstance()->train(); // Call the core libsvm training procedure
printf("Training done, saving model file!\n");
SVMlight::getInstance()->saveModelToFile(svmModelFile);
// </editor-fold>
// <editor-fold defaultstate="collapsed" desc="Generate single detecting feature vector from calculated SVM support vectors and SVM model">
printf("Generating representative single HOG feature vector using svmlight!\n");
vector<float> descriptorVector;
vector<unsigned int> descriptorVectorIndices;
// Generate a single detecting feature vector (v1 | b) from the trained support vectors, for use e.g. with the HOG algorithm
SVMlight::getInstance()->getSingleDetectingVector(descriptorVector, descriptorVectorIndices);
// And save the precious to file system
saveDescriptorVectorToFile(descriptorVector, descriptorVectorIndices, descriptorVectorFile);
// </editor-fold>
// <editor-fold defaultstate="collapsed" desc="Test detecting vector">
cout << "Test Detecting Vector" << endl;
hog.setSVMDetector(descriptorVector); // Set our custom detecting vector
cout << "descriptorVector size: " << sizeof(descriptorVector) << endl;
getFilesInDirectory(tesSamplesDir, tesImages, validExtensions);
namedWindow("Test Detector", 1);
for( size_t it = 0; it < tesImages.size(); it++ )
{
cout << "Process image " << tesImages[it] << endl;
Mat image = imread( tesImages[it], 1 );
detectAndDrawObjects(image, hog);
for(;;)
{
int c = waitKey();
if( (char)c == 'n')
break;
else if( (char)c == '\x1b' )
exit(0);
}
}
// </editor-fold>
return EXIT_SUCCESS;
}