I would like to cluster an BW image using the k means clustering algorithm that I found in the DLIB library and later the spectral clustering algorithm.
The result that i have at the moment is very strange (at least for me) and I would be grateful for any help.
Original image:
The current result for now is:
cv::Mat inputImage = cv::imread("lixo.png");
cv::cvtColor(inputImage, inputImage, CV_RGB2GRAY);
cv::imshow("Display Image", inputImage);
cv::waitKey(0);
// from cv::mat to dlib vector of points
dlib::matrix<double,2,1> sample_point;
std::vector<dlib::matrix<double,2,1>> samples;
for( long i = 0; i < inputImage.rows; ++i) {
for( long j = 0; j < inputImage.cols; ++j ) {
if (inputImage.at<uchar>(i,j) == (uchar)255) {
sample_point(0) = i;
sample_point(1) = j;
samples.push_back(sample_point);
}
}
}
// typedef for the kind of kernel we want to use
typedef dlib::radial_basis_kernel<dlib::matrix<double,2,1>> kernel_type;
// the kcentroid object
dlib::kcentroid<kernel_type> kc(kernel_type(0.1),0.01, 8);
// kkmeans object and tell it to use kcentroid objects
dlib::kkmeans<kernel_type> test(kc);
// tell the kkmeans we want 3 clusters
int nclus = 3;
test.set_number_of_centers(nclus);
// pick some initial centers for the k-means algorithm
std::vector<dlib::matrix<double,2,1>> initial_centers;
pick_initial_centers(nclus,initial_centers, samples,test.get_kernel());
// now run the k-means algorithm on our set of samples
test.train(samples, initial_centers);
// show result
int r = inputImage.rows;
int c = inputImage.cols;
cv::Mat result1 = cv::Mat::zeros(r, c, CV_8UC1);
cv::Mat result2 = cv::Mat::zeros(r, c, CV_8UC1);
cv::Mat result3 = cv::Mat::zeros(r, c, CV_8UC1);
int n1 = 0;
int n2 = 0;
int n3 = 0;
std::cout << " Result" << std::endl;
for (long i = 0; i < samples.size(); ++i) {
sample_point = samples[i];
int result = test(sample_point);
if(result == 0) {
n1++;
result1.at<uchar>(sample_point(0), sample_point(1)) = (uchar)255;
} else if(result == 1) {
n2++;
result2.at<uchar>(sample_point(0), sample_point(1)) = (uchar)255;
} else if(result == 2) {
n3++;
result3.at<uchar>(sample_point(0), sample_point(1)) = (uchar)255;
}
}
cv::imshow("result1", result1);
cv::imshow("result2", result2);
cv::imshow("result3", result3);
cv::waitKey(0);
I have been able to train my SVM. The program can run until it comes to prediction. I'm getting an error for SVM prediction with the testing images.
What have I missed in the code? Can anybody help me?
OpenCV Error: Assertion failed (samples.cols == var_count && samples.type() == CV_32F) in cv::ml::SVMImpl::predict, file C:\buildslave64\win64_amdocl\master_PackSlave-win64-vc14-shared\opencv\modules\ml\src\svm.cpp, line 1930
My prediction code is found below:
#include <opencv2/core.hpp>
#include <opencv2/opencv.hpp>
#include <opencv2/imgproc.hpp>
#include "opencv2/imgcodecs.hpp"
#include <opencv2/highgui.hpp>
#include <opencv2/ml.hpp>
#include <iostream>
#include <fstream>
#include<string.h>
using namespace std;
using namespace cv;
using namespace cv::ml;
int main(int, char**)
{
HOGDescriptor hog(cv::Size(64, 128), cv::Size(16, 16), cv::Size(8, 8), cv::Size(8, 8), 9, 1, -1, 0, 0.2, true, HOGDescriptor::DEFAULT_NLEVELS);
vector<cv::Point> locations;
std::vector<float> extractedFeature;
vector<vector< float>> features;
vector<Mat> testingImages;
vector<int> testingLabels;
int numFiles = 11; //no. of rows in matrix
int img_area = 320 * 240; //no. of columns - area of image 76800
FileStorage myfile("features.xml", FileStorage::READ);
const char* path = "C:/Testing Set/Extracted_Frames/image";
//set up labels for each training image
float label = 1.0; //positive image +1
Mat testingMat(img_area, numFiles, CV_32FC1);// 1D training matrix
cout << testingMat.rows << endl;
cout << testingMat.cols << endl;
Mat res; // output
//set up labels for each training image
Mat labels(testingMat.rows, 1, CV_32SC1, label); //flatten 1D label matrix
Ptr<ml::SVM> svm = Algorithm::load<ml::SVM>("test.xml");
std::cout << "Model Loaded" << std::endl;
for (int i = 0; i < labels.rows; i++) {
labels.at<int>(i, 0) = labels.at<int>(i, 0);
}
for (int file_num = 0; file_num < numFiles; file_num++)
{
stringstream ss(stringstream::in | stringstream::out);
ss << path << file_num << ".jpg";
cout << "read path = " << ss.str() << endl;
myfile["Descriptors" + ss.str()] >> extractedFeature;
Mat img = imread(ss.str());
int ii = 0; // Current column in training_mat
for (int i = 0; i < img.rows; i++) {
for (int j = 0; j < img.cols; j++) {
testingMat.at<float>(ii++, file_num) = img.at<uchar>(i, j);
Mat sampleMat = (Mat_<float>(1, 2) << i, j);
float response = svm->predict(sampleMat);//error here
}
}
features.push_back(extractedFeature);
testingImages.push_back(img);
testingLabels.push_back(1);
testingLabels.push_back(file_num);
myfile.release();
}
labels.at<int>(1, 0) = -1;
}
I'm working in a detector for half bodies, in order to improve the performance of a normal people detector. I know there are more ways to deal with occlusion but this is what i was asked for to do in my end of degree project. My problem is that I'm not getting a good performance, more over, I'm getting kind a pattern in wich 4 rectangles that represent the detections are shown in almost the same position, not even representing a half body.
I have a set of images with 414 images of top-half bodies cropped by myself, used as positive samples, and 8520 negative images. All of them sized 64x64. I extracted the HOG descriptors as follows
int i;
string imgname, index;
HOGDescriptor hog (Size(64,64), Size(16,16), Size(8,8), Size(8,8), 9, 1, -1, HOGDescriptor::L2Hys, 0.2,false, HOGDescriptor::DEFAULT_NLEVELS, false);
vector<float> pos_rec_descript;
vector<Point> locations;
size_t SizeDesc;
SizeDesc = hog.getDescriptorSize();
FileStorage fpd ("Pos_Descriptors.yml", FileStorage::WRITE);
for (i = 1; i < 415; i++) { // 2416 images in ./pos_rec
stringstream a;
a << i;
imgname = "./pos_rec3/img" + a.str();
imgname += ".png";
Mat img = imread(imgname, CV_LOAD_IMAGE_COLOR);
hog.compute(img, pos_rec_descript, Size (16,16), Size (0,0),locations);
fpd << "Descriptores" + a.str() << pos_rec_descript;
}
fpd.release();
And I did the same with the negative samples.
Then, I trained a SVM as follows.
#define POS 414
#define NEG 8520
#define TOTAL 8934
#define DESCRIPT 1764
float trainingData[TOTAL][DESCRIPT];
int labels[TOTAL];
fstream doc;
void set_labels(){
int i;
for (i = 0; i < TOTAL; i++){
if (i < POS) {
labels[i] = 1;
}
else{
labels[i] = -1;
}
}
return;
}
int main(int, char**)
{
FileStorage fsv ("supvec.yml", FileStorage::WRITE);
FileStorage ftd ("TrainData.yml", FileStorage::WRITE);
//FileStorage flm ("Labels.yml", FileStorage::WRITE);
FileStorage fpd ("../HOG_descriptors/Pos_Descriptors.yml", FileStorage::READ);
FileStorage fnd ("../HOG_descriptors_neg/Neg_Descriptors.yml", FileStorage::READ);
set_labels();
// Set up training data
vector <float> pos_D, neg_D, train_D ;
int k = 0;
for (int i = 1; i < POS+1; i++) {
stringstream a;
a << i;
fpd["Descriptores" + a.str()] >> pos_D;
for (int j = 0; j < pos_D.size() ; j++){
train_D.push_back(pos_D[j]);
}
}
fpd.release();
for (int i = 1; i < NEG+1; i++) {
stringstream a;
a << i;
fnd["Descriptores" + a.str()] >> neg_D;
for (int j = 0; j < neg_D.size() ; j++){
train_D.push_back(neg_D[j]);
}
}
fnd.release();
for (int i = 0; i < TOTAL; i++){
for (int j = 0; j < DESCRIPT; j++){
trainingData[i][j] = train_D[k];
k++;
}
}
Mat trainingDataMat(TOTAL, DESCRIPT, CV_32FC1, trainingData);
//memcpy(trainingDataMat.data, train_D.data(), train_D.size()*sizeof(float));
Mat labelsMat(TOTAL, 1, CV_32SC1, labels);
//ftd << "trainingDataMat" << trainingDataMat;
//flm << "labelsMat" << labelsMat;
// Train the SVM
Ptr<SVM> svm = SVM::create();
svm->setType(SVM::C_SVC);
svm->setKernel(SVM::LINEAR);
svm->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER, 100, 1e-6));
/*Ptr<TrainData> autoTrainData = TrainData::create(trainingDataMat, ROW_SAMPLE, labelsMat);
ParamGrid Cgrid = SVM::getDefaultGrid(SVM::C);
ParamGrid gammaGrid = SVM::getDefaultGrid(SVM::GAMMA);
ParamGrid pGrid = SVM::getDefaultGrid(SVM::P);
pGrid.logStep = 1;
ParamGrid nuGrid = SVM::getDefaultGrid(SVM::NU);
nuGrid.logStep = 1;
ParamGrid coeffGrid = SVM::getDefaultGrid(SVM::COEF);
coeffGrid.logStep = 1;
ParamGrid degreeGrid = SVM::getDefaultGrid(SVM::DEGREE);
degreeGrid.logStep = 1; */
cout << "Está entrenando..." << endl;
//svm->trainAuto(autoTrainData, 10, Cgrid, gammaGrid, pGrid, nuGrid, coeffGrid, degreeGrid, false);
svm->train(trainingDataMat, ROW_SAMPLE, labelsMat);
svm->save("SVM3_WS16_P0_LINEAR.yml");
I've tried with both LINEAR and RBF kernels (that's why you can see an autotrain part of the code commented that I used to swap between types of SVM) but none of them seems to work. Actually, they give nearly the same responses, something that makes me think that, maybe the training phase or the detection phase (code below) are ruining the whole project.
This is how I load the SVM for the HOG detector and try it over images
using namespace cv;
using namespace std;
using namespace cv::ml;
// static void help()
// {
// printf(
// "\nDemonstrate the use of the HoG descriptor using\n"
// " HOGDescriptor::hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());\n"
// "Usage:\n"
// "./peopledetect (<image_filename> | <image_list>.txt)\n\n");
// }
void get_svm_detector(const Ptr<SVM>& svm, vector< float > & hog_detector );
void get_svm_detector(const Ptr<SVM>& svm, vector< float > & hog_detector )
{
// get the support vectors
Mat sv = svm->getSupportVectors();
const int sv_total = sv.rows;
// get the decision function
Mat alpha, svidx;
double rho = svm->getDecisionFunction(0, alpha, svidx);
CV_Assert( alpha.total() == 1 && svidx.total() == 1 && sv_total == 1 );
CV_Assert( (alpha.type() == CV_64F && alpha.at<double>(0) == 1.) ||
(alpha.type() == CV_32F && alpha.at<float>(0) == 1.f) );
CV_Assert( sv.type() == CV_32F );
hog_detector.clear();
hog_detector.resize(sv.cols + 1);
memcpy(&hog_detector[0], sv.ptr(), sv.cols*sizeof(hog_detector[0]));
hog_detector[sv.cols] = (float)-rho;
}
int main(int argc, char** argv)
{
Mat img;
FILE* f = 0;
char _filename[1024];
if( argc == 1 )
{
printf("Usage: peopledetect (People_imgs | People_imgs.txt)\n");
return 0;
}
img = imread(argv[1]);
if( img.data )
{
strcpy(_filename, argv[1]);
}
else
{
f = fopen(argv[1], "rt");
if(!f)
{
fprintf( stderr, "ERROR: the specified file could not be loaded\n");
return -1;
}
}
// Load SVM
Ptr<SVM> svm = SVM::create();
svm = cv::Algorithm::load<ml::SVM>("../SVM_Train/SVM3_WS16_P0_LINEAR.yml");
HOGDescriptor hog (Size(64,64), Size(16,16), Size(8,8), Size(8,8), 9, 1, -1, HOGDescriptor::L2Hys, 0.2,false, HOGDescriptor::DEFAULT_NLEVELS, false);
vector <float> hog_detector;
get_svm_detector (svm, hog_detector);
hog.setSVMDetector(hog_detector);
namedWindow("people detector", 1);
for(;;)
{
char* filename = _filename;
if(f)
{
if(!fgets(filename, (int)sizeof(_filename)-2, f))
break;
//while(*filename && isspace(*filename))
// ++filename;
if(filename[0] == '#')
continue;
int l = (int)strlen(filename);
while(l > 0 && isspace(filename[l-1]))
--l;
filename[l] = '\0';
img = imread(filename);
}
printf("%s:\n", filename);
if(!img.data)
continue;
fflush(stdout);
vector<Rect> found, found_filtered, searchLocations;
vector<double> found_weights;
double t = (double)getTickCount();
// run the detector with default parameters. to get a higher hit-rate
// (and more false alarms, respectively), decrease the hitThreshold and
// groupThreshold (set groupThreshold to 0 to turn off the grouping completely).
hog.detectMultiScale(img, found, found_weights, 0, Size(16,16), Size(0,0), 1.01, 2);
//hog.detect(img, found, 0, Size(16,16), Size(0,0), searchLocations);
t = (double)getTickCount() - t;
printf("tdetection time = %gms\n", t*1000./cv::getTickFrequency());
size_t i, j;
for( i = 0; i < found.size(); i++ )
{
Rect r = found[i];
for( j = 0; j < found.size(); j++ )
if( j != i && (r & found[j]) == r)
break;
if( j == found.size() )
found_filtered.push_back(r);
}
for( i = 0; i < found_filtered.size(); i++ )
{
Rect r = found_filtered[i];
// the HOG detector returns slightly larger rectangles than the real objects.
// so we slightly shrink the rectangles to get a nicer output.
r.x += cvRound(r.width*0.1);
r.width = cvRound(r.width*0.7);
r.y += cvRound(r.height*0.07);
r.height = cvRound(r.height*0.7);
rectangle(img, r.tl(), r.br(), cv::Scalar(0,255,0), 2);
imshow("people detector", img);
waitKey(0);
}
//imshow("people detector", img);
//string imgname = "./Responses/Win_Stride16_4.png";
//imwrite(imgname, img);
int c = waitKey(0) & 255;
if( c == 'q' || c == 'Q' || !f)
break;
}
if(f)
fclose(f);
return 0;
}
I have checked all dimensions for the descriptors, every Mat seems to be ok. But at the time I use detectMultiScale, it shows things like this:
Image 1: It's strange because is missing lots of detections
Image 2: Here I realized there was a kind of pattern with this 4 rects
My problem is that no matter what I change (descriptors, Kernel, winStride and Padding in detectMultiScale), there is always very similar responses, and nothing indicates that there is a correct detection there.
I'm not very sure about how I'm giving the support vectors to HOG, but is the only way I found to do it (found it in one of the post from StackOverflow).
If any of you has any idea of what is going on here, and why the responses are not changing from one configuration to another, I would be greatly thankfull. This code is giving me headaches since weeks now. I've been changing parametres on fucntions, on HOG, changing Kernels, trying different set of images, but nothing seems to give great changes on the final result.
Excuse the hasty code dump. I'm working with OpenCV at the moment. I've been stuck with an error for 2h.
- (IBAction)faceRecognition:(id)sender {
// load images
vector<Mat> images;
vector<int> labels;
int numberOfSubjects = 4;
int numberPhotosPerSubject = 3;
for (int i=1; i<=numberOfSubjects; i++) {
for (int j=1; j<=numberPhotosPerSubject; j++) {
// create grayscale images
Mat src = [self CreateIplImageFromUIImage:[UIImage imageNamed:[NSString stringWithFormat:#"%d_%d.jpg", i, j]]];
Mat dst;
cv::cvtColor(src, dst, CV_BGR2GRAY);
images.push_back(dst);
labels.push_back(i);
}
}
// get test instances
Mat testSample = images[images.size() - 1];
int testLabel = labels[labels.size() - 1];
// ... and delete last element
images.pop_back();
labels.pop_back();
// build the Fisherfaces model
Fisherfaces model(images, labels);
// test model
int predicted = model.predict(testSample);
cout << "predicted class = " << predicted << endl;
cout << "actual class = " << testLabel << endl;
}
I can't figure out how to fix this:
Variable type 'cv::Fisherfaces' is an abstract class
It appears under "//build the Fisherfaces model" in the bottom.
Any assistance greatly appreciated.
I have a class and there is a member vector<cvtrees*> vect. I generate many cvtrees object and push on vect. I use this function for train:
Mat trainingDataMat(trainSize, featureSize, CV_32FC1);
// fill trainingDataMat
for(int i = 0; i < LOOP; i++) {
Mat labelMat(trainSize, 1, CV_32FC1);
// fill labelMat
// learn classifier
CvRTrees *rtrees = new CvRTrees();
(*rtrees).train( trainingDataMat, CV_ROW_SAMPLE, labelMat, Mat(), Mat(), Mat(), Mat(), CvRTParams());
this->rtreesVector.push_back(rtrees);
}
And I use a function for predict. When I run below code, I get an error no source.
Mat testSample(1, featureSize, CV_32FC1);
for(int k = 0; k < featureSize; k++) {
testSample.at<float>(k) = (float)this->trainInvoiceVector[i]->at(j,k);
}
for(int i = 0; i < this->rtreesVector.size(); i++) {
int response = (int)((*(this->rtreesVector[i])).predict( testSample )); // !!!! THIS LINE IS THE PROBLEM
cout << "response" << response << endl;
}