I'd like to do an simple dnn example for getting known with mlpack.
The simple example is to train a dnn with the MNist Digits - the classical
ml-hello world case :-)
I managed to preparate all images using opencv-filters - the result is a
monochannel grayscale opencv::mat matrix.
I also managed to convert the pixle values to the armadillo matrix arma::mat
and to label this "pictures".
But somewhere in the past two operations, I did a mistake.
I got N examples and M Input Neurons
IN means "Input Neuron"
OL means "OutputLabel"
Ex means "Example"
The structure of my train-data is like that:
arma::mat TrainSet = {{IN_1/Ex_1,IN_/Ex_2,IN_1/Ex_3,...,IN_1/Ex_N},
{IN_2/Ex_1, IN_2/Ex_2, IN_2/Ex_3,...,IN_2/Ex_N},
{...},
{IN_M/Ex_1, IN_M/Ex_2, IN_M/Ex_3,...,IN_M/Ex_N}}`
arma::mat LabelSet = {OL_Ex_1, OL_Ex_2, ..., OL_Ex_N}
Training my network leeds to an error.
Error training artificial neural network!
Error details: Mat::operator(): index out of bounds
I'm quite sure, that the structure of my train- and labelstack is not correct.
Does anybody know, what I did wrong?
I tried to follow this example and to translate it to my case:
http://www.mlpack.org/docs/mlpack-git/doxygen/cnetutorial.html
Here is a picture of visual studio showing the structure of my trainset:
Here is a picture of visual studio showing the structure of my lables:
Thank you for any help.
Yours,
Jan
This is my code:
#pragma region Includings
#include <iostream>
#include <stdlib.h>
#include <exception>
#include <string>
#include "opencv2/opencv.hpp"
#include <mlpack\\core.hpp>
#include <mlpack/methods/ann/layer/layer.hpp>
#include <mlpack/methods/ann/ffn.hpp>
#include <mlpack/core/optimizers/cne/cne.hpp>
#pragma endregion
#pragma region Globals
std::string TrainFolder = "C:\\HomeC\\MNist\\MNist\\train-labels\\";
#pragma endregion
#pragma region Structs
typedef struct TInputPair {
double Value;
int Index;
};
typedef struct TDigitPairExample {
TInputPair* InputPairArray;
int nNonZero;
char OutputValue;
};
#pragma endregion
#pragma region Identifier
void DisplayImage(cv::Mat* Img, std::string Title = "CV::DefaultForm");
std::vector<TDigitPairExample> GenerateTrainingSet(std::string TrainFolder, int nExamplesPerClass, bool DisplayAtWindow = false);
void DisplayImage(cv::Mat* Img, std::string Title, int Delay = 0);
TInputPair* MatToArray(cv::Mat* img, int* nEntries);
int CharToOutputInt(char c);
void TransferDataToMLPack(std::vector<TDigitPairExample>* ExStack, arma::mat* DataStack, arma::mat* LabelStack, int nInput);
typedef uchar Pixel;
#pragma endregion
int main() {
#pragma region Get training examples from images
std::vector<TDigitPairExample> TrainExamples = GenerateTrainingSet(TrainFolder, 101);
#pragma endregion
#pragma region Convert training vector to armadillo matrix
arma::mat trainset, labels;
TransferDataToMLPack(&TrainExamples, &trainset, &labels, 784);
#pragma endregion
#pragma region Define network
mlpack::ann::FFN<mlpack::ann::NegativeLogLikelihood<> > network;
network.Add<mlpack::ann::Linear<> >(784, 784);
network.Add<mlpack::ann::SigmoidLayer<> >();
network.Add<mlpack::ann::Linear<> >(784, 10);
network.Add<mlpack::ann::LogSoftMax<> >();
#pragma endregion
#pragma region Train network
try {
network.Train(trainset, labels);
}catch (const std::exception& e) {
std::cout << "Error training artificial neural network!" << std::endl << "Error details: " << e.what() << std::endl;
}
#pragma endregion
std::cout << "Application finished. Press ENTER to exit..." << std::endl;
std::cin.get();
}
#pragma region Private_regions
void DisplayImage(cv::Mat* Img, std::string Title, int Delay) {
/***************/
/*Define window*/
/***************/
cv:cvNamedWindow(Title.c_str(), cv::WINDOW_AUTOSIZE);
cv::imshow(Title.c_str(), *Img);
cv::waitKey(Delay);
//cv::destroyWindow(Title.c_str());
return;
}
TInputPair* MatToArray(cv::Mat* img, int* nEntries) {
uchar* ptr = nullptr, *dptr = nullptr;
TInputPair* InPairArr = nullptr;
int j = 0;
if (img->isContinuous()) {
ptr = img->ptr<uchar>();
}else { return nullptr; }
InPairArr = (TInputPair*)malloc((img->cols) * (img->rows) * sizeof(TInputPair));
if (InPairArr == nullptr) { return nullptr; }
for (int i = 0; i < (img->rows)*(img->cols); i++) {
//std::cout << "Index_" + std::to_string(i) + "; " + std::to_string(ptr[i]) << std::endl;
if (ptr[i] != 255) { InPairArr[j].Index = i; InPairArr[j].Value = (double)(255 - ptr[i]) / 255.0; j++; }
}
InPairArr = (TInputPair*)realloc(InPairArr, j * sizeof(TInputPair));
*nEntries = j;
return InPairArr;
}
std::vector<TDigitPairExample> GenerateTrainingSet(std::string TrainFolder, int nExamplesPerClass, bool DisplayAtWindow) {
/********/
/*Localc*/
/********/
int nEntries = 0;
cv::Mat imgMod, imgGrad, imgInv, ptHull, imgHull, imgResize;
std::vector<std::vector<cv::Point>> contours;
std::vector<TDigitPairExample> TrainExamples;
TDigitPairExample TDPE;
for (int i = 1, j = 0;; i++) {
/**************/
/*Reading file*/
/**************/
cv::Mat imgOrig = cv::imread(TrainFolder + std::to_string(j) + "_" + std::to_string(i) + ".bmp", cv::IMREAD_GRAYSCALE);
if (imgOrig.empty() || i > 100) { j++; i = 1; if (j > 9) { break; } continue; }
/****************/
/*Build negative*/
/****************/
cv::subtract(cv::Scalar::all(255.0), imgOrig, imgMod);
/*****************/
/*Cut by treshold*/
/*****************/
cv::threshold(imgMod, imgMod, 230.0, 255.0, cv::THRESH_BINARY);
/**************/
/*Get contours*/
/**************/
//cv::findContours(imgMod, contours, cv::CHAIN_APPROX_NONE, 1);
//cv::Scalar color = cv::Scalar(255, 0, 0);
//cv::drawContours(imgMod, contours, -1, color, 1, 8);
//cv::Laplacian(imgOrig, imgGrad, 16, 1, 1.0, 0.0, cv::BORDER_REFLECT);
/********************/
/*Resize and display*/
/********************/
cv::resize(imgMod, imgResize, cv::Size(300, 300), .0, .0, cv::INTER_LINEAR);
TDPE.InputPairArray = MatToArray(&imgMod, &nEntries);
TDPE.nNonZero = nEntries;
TDPE.OutputValue = std::to_string(j).c_str()[0];
TrainExamples.push_back(TDPE);
if (DisplayAtWindow) { DisplayImage(&imgResize, std::string("After inversion"), 5); }
}
return TrainExamples;
}
int CharToOutputInt(char c) {
switch (c) {
case '0': return 0;
case '1': return 1;
case '2': return 2;
case '3': return 3;
case '4': return 4;
case '5': return 5;
case '6': return 6;
case '7': return 7;
case '8': return 8;
case '9': return 9;
default: throw new std::exception();
}
}
void TransferDataToMLPack(std::vector<TDigitPairExample>* ExStack, arma::mat* DataStack, arma::mat* LabelStack, int nInput) {
*DataStack = arma::zeros(nInput, ExStack->size());
*LabelStack = arma::zeros(1, ExStack->size()); /*...edit...*/
TDigitPairExample DPE;
TInputPair TIP;
/*Looping all digit examples*/
for (int i = 0; i < ExStack->size(); i++) {
DPE = (*ExStack)[i];
/*Looping all nonZero pixle*/
for (int j = 0; j < DPE.nNonZero; j++) {
TIP = DPE.InputPairArray[j];
try {
(*DataStack)(TIP.Index, i) = TIP.Value;
}catch (std::exception& ex) {
std::cout << "Error adding example[" << std::to_string(j) << "] to training stack!" << std::endl <<
"Error details: " << ex.what() << std::endl;
}
}
/*Adding label*/
try {
(*LabelStack)(0, i) = CharToOutputInt(DPE.OutputValue); /*...edit...*/
}catch (std::exception& ex) {
std::cout << "Error adding example[" << std::to_string(i) << "] to label stack!" << std::endl <<
"Error details: " << ex.what() << std::endl;
}
}
return;
}
#pragma endregion
The NegativeLogLikelihood loss function expects the target to be in the range [1, N], so you would have to increase the return value of CharToOutputInt. In case you haven't seen it already there is an interesting example: mlpack - DigitRecognizerCNN which could be helpful as well.
Ok, thanks to Marcus, I found the error. A label "0" is not allowed. I simply changed the lables to:
1 ==> Zero
2 ==> One
3 ==> Two
...
10 => Nine
That worked.
Related
I have just started using C++ for some image processing tasks. I want to integrate my RGB (OpenCV Mat) and Depth (PCL) data which I get from ros::Subscribe into colored-pointcloud data.
I use the cv::Mat acquiredImage to hold the transmitted image from ros::Subscribe and then the Mat acquiredImage is used for another processes in another threads, but I am facing segmentation fault. Or the error is shown like this:
[xcb] Unknown sequence number while processing queue
[xcb] Most likely this is a multi-threaded client and XInitThreads has not been called
[xcb] Aborting, sorry about that. Viewtest: ../../src/xcb_io.c:260:
poll_for_event: Assertion `!xcb_xlib_threads_sequence_lost' failed.
Aborted (core dumped)
I have tried using std::mutex but it still doesn't work. Could anyone tell me how to properly manage the cv::Mat in two different threads?
typedef pcl::PointXYZRGB XYZRGB;
typedef pcl::PointCloud<XYZRGB> pclXYZRGB;
typedef pcl::PointCloud<XYZRGB>::Ptr pclXYZRGBptr;
typedef pcl::PointCloud<XYZRGB>::ConstPtr pclXYZRGBcptr;
pclXYZRGBptr acquiredCloud (new pclXYZRGB());
pclXYZRGBptr acquiredCloud2 (new pclXYZRGB());
cv::Mat acquiredImage, acquiredImageRotated;
std::thread thread_RunThread;
std::mutex mutexMutex;
bool stopThread, has_data1, has_data2;
inline float PackRGB(uint8_t r, uint8_t g, uint8_t b) {
uint32_t color_uint = ((uint32_t)r << 16 | (uint32_t) g << 8 | (uint32_t)b);
return *reinterpret_cast<float *> (&color_uint);
}
void RunThread(){
while(ros::ok()){
ros::spinOnce();
}
}
void imageReceive(const sensor_msgs::ImageConstPtr& msg){
mutexMutex.lock();
acquiredImage = cv::Mat(cv_bridge::toCvShare(msg, "bgr8")->image);
mutexMutex.unlock();
has_data1 = true;
}
void cloudReceive(const sensor_msgs::PointCloud2ConstPtr& cloudInMsg){
//mutexMutex.lock();
pcl::fromROSMsg(*cloudInMsg, *acquiredCloud);
has_data2 = true;
//mutexMutex.unlock();
}
void StartThread(){
stopThread = false;
has_data1 = has_data2 = false;
thread_RunThread = std::thread(RunThread);
while(!has_data1 && !has_data2){
std::this_thread::sleep_for(std::chrono::milliseconds(1));
std::cout << has_data1 << "-" << has_data2 << std::endl;
}
}
void CloseThread(){
stopThread = true;
thread_RunThread.join();
}
int main(int argc, char **argv){
ros::init(argc, argv, "Viewtest");
ros::NodeHandle nh;
image_transport::ImageTransport it(nh);
cv::startWindowThread();
image_transport::Subscriber sub = it.subscribe("/rsCamera/image", 1, imageReceive);
ros::Subscriber pclsubAcquirer = nh.subscribe("/rsCamera/cloud", 1, cloudReceive);
StartThread();
while (ros::ok()){
if(!has_data1 && !has_data2){
std::this_thread::sleep_for(std::chrono::milliseconds(1));
std::cout << has_data1 << "-" << has_data2 << std::endl;
}
else {
mutexMutex.lock();
cv::rotate(acquiredImage, acquiredImageRotated, cv::ROTATE_180);
mutexMutex.unlock();
copyPointCloud(*acquiredCloud, *acquiredCloud2);
int i = 640, j = 480, k;
for (auto& it : acquiredCloud2->points){
it.x = it.x; it.y = it.y; it.z = it.z;
it.rgb = PackRGB(
acquiredImageRotated.at<cv::Vec3b>(j,i)[2], // r
acquiredImageRotated.at<cv::Vec3b>(j,i)[1], // g
acquiredImageRotated.at<cv::Vec3b>(j,i)[0] // b
);
i--;
if(i <= 0) { i = 640; j--; }
if(j < 0) { break; }
}
}
}
CloseThread();
return 0;
}
I wanted to try that
Vec3b colorsense = {
map.at<Vec3b>(trace.move_up(1)),
map.at<Vec3b>(trace.move_down(1)),
map.at<Vec3b>(trace.move_left(1)),
map.at<Vec3b>(trace.move_right(1)),
map.at<Vec3b>(trace.move_upright(1)),
map.at<Vec3b>(trace.move_upleft(1)),
map.at<Vec3b>(trace.move_downright(1)),
map.at<Vec3b>(trace.move_downleft(1))};
can be usable, but after testing it, I don
t think that it work. oh and the function trace.move_up are all points in the matrix map
This is my entire main code:
#include <iostream>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include "Trace.h"
// Macros
#define log(x) std::cout << x << std::endl;
#define error(x) std::cerr << x << std::endl;
// using namespaces:
//using namespace cv;
//using namespace std;
// enum;
enum {up = 0, down = 1, left = 2, right = 3, upright = 4, upleft = 5, downright = 6, downleft = 7};
// Main function;
int main() {
try {
int x = 0,movement;
Trace trace(0, 0);
Mat map = imread("tmp.bmp"), edges;
resize(map, map, {1028, 672}, 0, 0, INTER_NEAREST_EXACT);
std::vector<Vec3b> colorsense = {
map.at<Vec3b>(trace.move_up(1)),
map.at<Vec3b>(trace.move_down(1)),
map.at<Vec3b>(trace.move_left(1)),
map.at<Vec3b>(trace.move_right(1)),
map.at<Vec3b>(trace.move_upright(1)),
map.at<Vec3b>(trace.move_upleft(1)),
map.at<Vec3b>(trace.move_downright(1)),
map.at<Vec3b>(trace.move_downleft(1))};
switch (movement) {
case up:
colorsense[up];
break;
case down:
colorsense[down];
break;
case left:
colorsense[left];
break;
case right:
colorsense[right];
break;
case upright:
colorsense[upright];
break;
case upleft:
colorsense[upleft];
break;
case downright:
colorsense[downright];
break;
case downleft:
colorsense[downleft];
break;
default:
movement = right;
break;
}
for (int i = 0; i < 1000; i++){
trace.draw(map, 255, 0, 0, 1);
if(colorsense[right][0]== 255 || colorsense[right][1]== 255 || colorsense[right][2]== 255){
trace.move_downright(0);
} else {
trace.move_right(0);
}
namedWindow("map");
imshow("map", map);
waitKey(10);
}
// Canny(map,edges,100,100,3,false);
// Laplacian(map,map, CV_64F);
log(map.rows);
log(map.cols);
if (map.empty()) {
error("Could not load image");
return EXIT_FAILURE;
}
namedWindow("map");
imshow("map", map);
waitKey();
log("Hello World");
}
catch(const char* msg){
error(msg);
}
return 0;
}
The for loop you guys see here is the loop that draws the line. But with the function i wrote, it won't be able to change direction when the black objects is close.
But when i write the if statement similar like this
map.at<Vec3b>(trace.move_up(1))[0]
map.at<Vec3b>(trace.move_up(1))[1]
map.at<Vec3b>(trace.move_up(1))[2]
Then it works..., why?
Mxnet c++ inference with MXPredSetInput segmentation fault
1. background
I have tried https://github.com/apache/incubator-mxnet/tree/master/example/image-classification/predict-cpp successed.
But when I try to deploy mxnet in c++ with my own model, I met a segmentation fault error:
[17:33:07] src/nnvm/legacy_json_util.cc:209: Loading symbol saved by previous version v1.2.1. Attempting to upgrade...
Signal: SIGSEGV (Segmentation fault)
2. code with error:
MXPredSetInput(pred_hnd, "data", image_data.data(), static_cast<mx_uint>(image_size));
3. tips
First I thought it's because of input data shape not compatible with the model input layer.But I ask model designer, it's a resnet model with conv only, so, any kind input shape should be OK.
4. Download model:
Download them, and put them into model dir.
https://drive.google.com/drive/folders/16MEKNOz_iwquVxHMk9c7igmBNuT6w7wz?usp=sharing
4. code: find: https://github.com/jaysimon/mxnet_cpp_infere
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <fstream>
#include <vector>
#include <memory>
#include <thread>
#include <iomanip>
#include <opencv2/opencv.hpp>
// Path for c_predict_api
#include <mxnet/c_predict_api.h>
const mx_float DEFAULT_MEAN = 117.0;
static std::string trim(const std::string& input) {
auto not_space = [](int ch) {
return !std::isspace(ch);
};
auto output = input;
output.erase(output.begin(), std::find_if(output.begin(), output.end(), not_space));
output.erase(std::find_if(output.rbegin(), output.rend(), not_space).base(), output.end());
return output;
}
// Read file to buffer
class BufferFile {
public :
std::string file_path_;
std::size_t length_ = 0;
std::unique_ptr<char[]> buffer_;
explicit BufferFile(const std::string& file_path)
: file_path_(file_path) {
std::ifstream ifs(file_path.c_str(), std::ios::in | std::ios::binary);
if (!ifs) {
std::cerr << "Can't open the file. Please check " << file_path << ". \n";
return;
}
ifs.seekg(0, std::ios::end);
length_ = static_cast<std::size_t>(ifs.tellg());
ifs.seekg(0, std::ios::beg);
std::cout << file_path.c_str() << " ... " << length_ << " bytes\n";
// Buffer as null terminated to be converted to string
buffer_.reset(new char[length_ + 1]);
buffer_[length_] = 0;
ifs.read(buffer_.get(), length_);
ifs.close();
}
std::size_t GetLength() {
return length_;
}
char* GetBuffer() {
return buffer_.get();
}
};
void GetImageFile(const std::string& image_file,
mx_float* image_data, int channels,
cv::Size resize_size, const mx_float* mean_data = nullptr) {
// Read all kinds of file into a BGR color 3 channels image
cv::Mat im_ori = cv::imread(image_file, cv::IMREAD_COLOR);
if (im_ori.empty()) {
std::cerr << "Can't open the image. Please check " << image_file << ". \n";
assert(false);
}
cv::Mat im;
resize(im_ori, im, resize_size);
int size = im.rows * im.cols * channels;
mx_float* ptr_image_r = image_data;
mx_float* ptr_image_g = image_data + size / 3;
mx_float* ptr_image_b = image_data + size / 3 * 2;
float mean_b, mean_g, mean_r;
mean_b = mean_g = mean_r = DEFAULT_MEAN;
mean_b = 103.06;
mean_g = 115.9;
mean_r = 123.15;
for (int i = 0; i < im.rows; i++) {
auto data = im.ptr<uchar>(i);
for (int j = 0; j < im.cols; j++) {
if (channels > 1) {
*ptr_image_b++ = static_cast<mx_float>(*data++) - mean_b;
*ptr_image_g++ = static_cast<mx_float>(*data++) - mean_g;
}
*ptr_image_r++ = static_cast<mx_float>(*data++) - mean_r;
}
}
}
// LoadSynsets
// Code from : https://github.com/pertusa/mxnet_predict_cc/blob/master/mxnet_predict.cc
std::vector<std::string> LoadSynset(const std::string& synset_file) {
std::ifstream fi(synset_file.c_str());
if (!fi.is_open()) {
std::cerr << "Error opening synset file " << synset_file << std::endl;
assert(false);
}
std::vector<std::string> output;
std::string synset, lemma;
while (fi >> synset) {
getline(fi, lemma);
output.push_back(lemma);
}
fi.close();
return output;
}
void PrintOutputResult(const std::vector<float>& data, const std::vector<std::string>& synset) {
if (data.size() != synset.size()) {
std::cerr << "Result data and synset size do not match!" << std::endl;
}
float best_accuracy = 0.0;
std::size_t best_idx = 0;
for (std::size_t i = 0; i < data.size(); ++i) {
std::cout << "Accuracy[" << i << "] = " << std::setprecision(8) << data[i] << std::endl;
if (data[i] > best_accuracy) {
best_accuracy = data[i];
best_idx = i;
}
}
std::cout << "Best Result: " << trim(synset[best_idx]) << " (id=" << best_idx << ", " <<
"accuracy=" << std::setprecision(8) << best_accuracy << ")" << std::endl;
}
void predict(PredictorHandle pred_hnd, const std::vector<mx_float> &image_data,
NDListHandle nd_hnd, const std::string &synset_file, int i) {
auto image_size = image_data.size();
// Set Input
//>>>>>>>>>>>>>>>>>>>> Problem code <<<<<<<<<<<<<<<<<<<<<<<
MXPredSetInput(pred_hnd, "data", image_data.data(), static_cast<mx_uint>(image_size));
// <<<<<<<<<<<<<<<<<<<<<<< Problem code <<<<<<<<<<<<<<<<<<<<<<<
// Do Predict Forward
MXPredForward(pred_hnd);
mx_uint output_index = 0;
mx_uint* shape = nullptr;
mx_uint shape_len;
// Get Output Result
MXPredGetOutputShape(pred_hnd, output_index, &shape, &shape_len);
std::size_t size = 1;
for (mx_uint i = 0; i < shape_len; ++i) { size *= shape[i]; }
std::vector<float> data(size);
MXPredGetOutput(pred_hnd, output_index, &(data[0]), static_cast<mx_uint>(size));
// Release NDList
if (nd_hnd) {
MXNDListFree(nd_hnd);
}
// Release Predictor
MXPredFree(pred_hnd);
// Synset path for your model, you have to modify it
auto synset = LoadSynset(synset_file);
// Print Output Data
PrintOutputResult(data, synset);
}
int main(int argc, char* argv[]) {
if (argc < 2) {
std::cout << "No test image here." << std::endl
<< "Usage: ./image-classification-predict apple.jpg [num_threads]" << std::endl;
return EXIT_FAILURE;
}
std::string test_file(argv[1]);
int num_threads = 1;
if (argc == 3)
num_threads = std::atoi(argv[2]);
// Models path for your model, you have to modify it
std::string json_file = "../model/rfcn_dcn_chicken-0000.json";
std::string param_file = "../model/rfcn_dcn_chicken-0000.params";
std::string synset_file = "../model/synset.txt";
std::string nd_file = "../model/mean_224.nd";
BufferFile json_data(json_file);
BufferFile param_data(param_file);
// Parameters
int dev_type = 1; // 1: cpu, 2: gpu
int dev_id = 0; // arbitrary.
mx_uint num_input_nodes = 1; // 1 for feedforward
const char* input_key[1] = { "data" };
const char** input_keys = input_key;
// Image size and channels
int width = 1000;
int height = 562;
int channels = 3;
const mx_uint input_shape_indptr[2] = { 0, 4 };
const mx_uint input_shape_data[4] = { 1,
static_cast<mx_uint>(channels),
static_cast<mx_uint>(height),
static_cast<mx_uint>(width) };
if (json_data.GetLength() == 0 || param_data.GetLength() == 0) {
return EXIT_FAILURE;
}
auto image_size = static_cast<std::size_t>(width * height * channels);
// Read Mean Data
const mx_float* nd_data = nullptr;
NDListHandle nd_hnd = nullptr;
BufferFile nd_buf(nd_file);
if (nd_buf.GetLength() > 0) {
mx_uint nd_index = 0;
mx_uint nd_len;
const mx_uint* nd_shape = nullptr;
const char* nd_key = nullptr;
mx_uint nd_ndim = 0;
MXNDListCreate(static_cast<const char*>(nd_buf.GetBuffer()),
static_cast<int>(nd_buf.GetLength()),
&nd_hnd, &nd_len);
MXNDListGet(nd_hnd, nd_index, &nd_key, &nd_data, &nd_shape, &nd_ndim);
}
// Read Image Data
std::vector<mx_float> image_data(image_size);
GetImageFile(test_file, image_data.data(), channels, cv::Size(width, height), nd_data);
if (num_threads == 1) {
// Create Predictor
PredictorHandle pred_hnd;
MXPredCreate(static_cast<const char*>(json_data.GetBuffer()),
static_cast<const char*>(param_data.GetBuffer()),
static_cast<int>(param_data.GetLength()),
dev_type,
dev_id,
num_input_nodes,
input_keys,
input_shape_indptr,
input_shape_data,
&pred_hnd);
assert(pred_hnd);
predict(pred_hnd, image_data, nd_hnd, synset_file, 0);
} else {
// Create Predictor
std::vector<PredictorHandle> pred_hnds(num_threads, nullptr);
MXPredCreateMultiThread(static_cast<const char*>(json_data.GetBuffer()),
static_cast<const char*>(param_data.GetBuffer()),
static_cast<int>(param_data.GetLength()),
dev_type,
dev_id,
num_input_nodes,
input_keys,
input_shape_indptr,
input_shape_data,
pred_hnds.size(),
pred_hnds.data());
for (auto hnd : pred_hnds)
assert(hnd);
std::vector<std::thread> threads;
for (int i = 0; i < num_threads; i++)
threads.emplace_back(predict, pred_hnds[i], image_data, nd_hnd, synset_file, i);
for (int i = 0; i < num_threads; i++)
threads[i].join();
}
printf("run successfully\n");
return EXIT_SUCCESS;
}
I am new in OpenCV world and neural networks but I have some coding experience in C++/Java.
I created my first ANN MLP and learned it the XOR:
#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/ml/ml.hpp>
#include <iostream>
#include <iomanip>
using namespace cv;
using namespace ml;
using namespace std;
void print(Mat& mat, int prec)
{
for (int i = 0; i<mat.size().height; i++)
{
cout << "[";
for (int j = 0; j<mat.size().width; j++)
{
cout << fixed << setw(2) << setprecision(prec) << mat.at<float>(i, j);
if (j != mat.size().width - 1)
cout << ", ";
else
cout << "]" << endl;
}
}
}
int main()
{
const int hiddenLayerSize = 4;
float inputTrainingDataArray[4][2] = {
{ 0.0, 0.0 },
{ 0.0, 1.0 },
{ 1.0, 0.0 },
{ 1.0, 1.0 }
};
Mat inputTrainingData = Mat(4, 2, CV_32F, inputTrainingDataArray);
float outputTrainingDataArray[4][1] = {
{ 0.0 },
{ 1.0 },
{ 1.0 },
{ 0.0 }
};
Mat outputTrainingData = Mat(4, 1, CV_32F, outputTrainingDataArray);
Ptr<ANN_MLP> mlp = ANN_MLP::create();
Mat layersSize = Mat(3, 1, CV_16U);
layersSize.row(0) = Scalar(inputTrainingData.cols);
layersSize.row(1) = Scalar(hiddenLayerSize);
layersSize.row(2) = Scalar(outputTrainingData.cols);
mlp->setLayerSizes(layersSize);
mlp->setActivationFunction(ANN_MLP::ActivationFunctions::SIGMOID_SYM);
TermCriteria termCrit = TermCriteria(
TermCriteria::Type::COUNT + TermCriteria::Type::EPS,
100000000,
0.000000000000000001
);
mlp->setTermCriteria(termCrit);
mlp->setTrainMethod(ANN_MLP::TrainingMethods::BACKPROP);
Ptr<TrainData> trainingData = TrainData::create(
inputTrainingData,
SampleTypes::ROW_SAMPLE,
outputTrainingData
);
mlp->train(trainingData
/*, ANN_MLP::TrainFlags::UPDATE_WEIGHTS
+ ANN_MLP::TrainFlags::NO_INPUT_SCALE
+ ANN_MLP::TrainFlags::NO_OUTPUT_SCALE*/
);
for (int i = 0; i < inputTrainingData.rows; i++) {
Mat sample = Mat(1, inputTrainingData.cols, CV_32F, inputTrainingDataArray[i]);
Mat result;
mlp->predict(sample, result);
cout << sample << " -> ";// << result << endl;
print(result, 0);
cout << endl;
}
return 0;
}
It works very well for this simple problem, I also learn this network the 1-10 to binary conversion.
But i need to use MLP for simple image classification - road signs. I write the code for loading training images and preparing matrix for learning but I'm not able to train the network - it "learn" in one second even with 1 000 000 iterations! And it produce garbage results, the same for all inputs!
Here are my test images and the source code:
#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/ml/ml.hpp>
#include <iostream>
#include <chrono>
#include <memory>
#include <iomanip>
#include <climits>
#include <Windows.h>
using namespace cv;
using namespace ml;
using namespace std;
using namespace chrono;
const int WIDTH_SIZE = 50;
const int HEIGHT_SIZE = (int)(WIDTH_SIZE * sqrt(3)) / 2;
const int IMAGE_DATA_SIZE = WIDTH_SIZE * HEIGHT_SIZE;
void print(Mat& mat, int prec)
{
for (int i = 0; i<mat.size().height; i++)
{
cout << "[ ";
for (int j = 0; j<mat.size().width; j++)
{
cout << fixed << setw(2) << setprecision(prec) << mat.at<float>(i, j);
if (j != mat.size().width - 1)
cout << ", ";
else
cout << " ]" << endl;
}
}
}
bool loadImage(string imagePath, Mat& outputImage)
{
// load image in grayscale
Mat image = imread(imagePath, IMREAD_GRAYSCALE);
Mat temp;
// check for invalid input
if (image.empty()) {
cout << "Could not open or find the image" << std::endl;
return false;
}
// resize the image
Size size(WIDTH_SIZE, HEIGHT_SIZE);
resize(image, temp, size, 0, 0, CV_INTER_AREA);
// convert to float 1-channel
temp.convertTo(outputImage, CV_32FC1, 1.0/255.0);
return true;
}
vector<string> getFilesNamesInFolder(string folder)
{
vector<string> names;
char search_path[200];
sprintf(search_path, "%s/*.*", folder.c_str());
WIN32_FIND_DATA fd;
HANDLE hFind = ::FindFirstFile(search_path, &fd);
if (hFind != INVALID_HANDLE_VALUE) {
do {
// read all (real) files in current folder
// , delete '!' read other 2 default folder . and ..
if (!(fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
names.push_back(fd.cFileName);
}
} while (::FindNextFile(hFind, &fd));
::FindClose(hFind);
}
return names;
}
class Sign {
public:
enum class Category { A = 'A', B = 'B', C = 'C', D = 'D' };
Mat image;
Category category;
int number;
Sign(Mat& image, string name) :image(image) {
category = static_cast<Category>(name.at(0));
number = stoi(name.substr(2, name.length()));
};
};
vector<Sign> loadSignsFromFolder(String folderName) {
vector<Sign> roadSigns;
for (string fileName : getFilesNamesInFolder(folderName)) {
Mat image;
loadImage(folderName + fileName, image);
roadSigns.emplace_back(image, fileName.substr(0, (fileName.length() - 4))); //cut .png
}
return roadSigns;
}
void showSignsInWindows(vector<Sign> roadSigns) {
for (Sign sign : roadSigns) {
String windowName = "Sign " + to_string(sign.number);
namedWindow(windowName, WINDOW_AUTOSIZE);
imshow(windowName, sign.image);
}
waitKey(0);
}
Mat getInputDataFromSignsVector(vector<Sign> roadSigns) {
Mat roadSignsImageData;
for (Sign sign : roadSigns) {
Mat signImageDataInOneRow = sign.image.reshape(0, 1);
roadSignsImageData.push_back(signImageDataInOneRow);
}
return roadSignsImageData;
}
Mat getOutputDataFromSignsVector(vector<Sign> roadSigns) {
int signsCount = (int) roadSigns.size();
int signsVectorSize = signsCount + 1;
Mat roadSignsData(0, signsVectorSize, CV_32FC1);
int i = 1;
for (Sign sign : roadSigns) {
vector<float> outputTraningVector(signsVectorSize);
fill(outputTraningVector.begin(), outputTraningVector.end(), -1.0);
outputTraningVector[i++] = 1.0;
Mat tempMatrix(outputTraningVector, false);
roadSignsData.push_back(tempMatrix.reshape(0, 1));
}
return roadSignsData;
}
int main(int argc, char* argv[])
{
if (argc != 2) {
cout << " Usage: display_image ImageToLoadAndDisplay" << endl;
return -1;
}
const int hiddenLayerSize = 500;
vector<Sign> roadSigns = loadSignsFromFolder("../../../Znaki/A/");
Mat inputTrainingData = getInputDataFromSignsVector(roadSigns);
Mat outputTrainingData = getOutputDataFromSignsVector(roadSigns);
Ptr<ANN_MLP> mlp = ANN_MLP::create();
Mat layersSize = Mat(3, 1, CV_16U);
layersSize.row(0) = Scalar(inputTrainingData.cols);
layersSize.row(1) = Scalar(hiddenLayerSize);
layersSize.row(2) = Scalar(outputTrainingData.cols);
mlp->setLayerSizes(layersSize);
mlp->setActivationFunction(ANN_MLP::ActivationFunctions::SIGMOID_SYM, 1.0, 1.0);
mlp->setTrainMethod(ANN_MLP::TrainingMethods::BACKPROP, 0.05, 0.05);
//mlp->setTrainMethod(ANN_MLP::TrainingMethods::RPROP);
TermCriteria termCrit = TermCriteria(
TermCriteria::Type::MAX_ITER //| TermCriteria::Type::EPS,
,100 //(int) INT_MAX
,0.000001
);
mlp->setTermCriteria(termCrit);
Ptr<TrainData> trainingData = TrainData::create(
inputTrainingData,
SampleTypes::ROW_SAMPLE,
outputTrainingData
);
auto start = system_clock::now();
mlp->train(trainingData
//, //ANN_MLP::TrainFlags::UPDATE_WEIGHTS
, ANN_MLP::TrainFlags::NO_INPUT_SCALE
+ ANN_MLP::TrainFlags::NO_OUTPUT_SCALE
);
auto duration = duration_cast<milliseconds> (system_clock::now() - start);
cout << "Training time: " << duration.count() << "ms" << endl;
for (int i = 0; i < inputTrainingData.rows; i++) {
Mat result;
//mlp->predict(inputTrainingData.row(i), result);
mlp->predict(roadSigns[i].image.reshape(0, 1), result);
//cout << result << endl;
print(result, 2);
}
//showSignsInWindows(roadSigns);
return 0;
}
What is wrong in this code, that XOR works but images not? I cheked the input and output matrix and they're correct... could somebody also explain me when to/shoud I use the ANN_MLP::TrainFlags::NO_INPUT_SCALE and ANN_MLP::TrainFlags::NO_OUTPUT_SCALE or what values of setActivationFunction and setTrainMethod parameters should I use?
Thanks!
There was a problem in backprop weight scale parameter - it was too big and the ANN couldn't learn more difficult things.
I changed the line to mlp->setTrainMethod(ANN_MLP::TrainingMethods::BACKPROP, 0.0001); and the hidden layer size to 100 (to speed up the learning) - now it's working!
I have downloaded the libsvm code for object detection. I am having problems in using the train svm code. I can't input the sample files properly. Anyone please help me how to input positive and negative images.Here is the train code.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#include "svm.h"
#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
void print_null(const char *s) {}
void exit_with_help()
{
printf(
"Usage: svm-train [options] training_set_file [model_file]\n"
"options:\n"
"-s svm_type : set type of SVM (default 0)\n"
" 0 -- C-SVC (multi-class classification)\n"
" 1 -- nu-SVC (multi-class classification)\n"
" 2 -- one-class SVM\n"
" 3 -- epsilon-SVR (regression)\n"
" 4 -- nu-SVR (regression)\n"
"-t kernel_type : set type of kernel function (default 2)\n"
" 0 -- linear: u'*v\n"
" 1 -- polynomial: (gamma*u'*v + coef0)^degree\n"
" 2 -- radial basis function: exp(-gamma*|u-v|^2)\n"
" 3 -- sigmoid: tanh(gamma*u'*v + coef0)\n"
" 4 -- precomputed kernel (kernel values in training_set_file)\n"
"-d degree : set degree in kernel function (default 3)\n"
"-g gamma : set gamma in kernel function (default 1/num_features)\n"
"-r coef0 : set coef0 in kernel function (default 0)\n"
"-c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)\n"
"-n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5)\n"
"-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)\n"
"-m cachesize : set cache memory size in MB (default 100)\n"
"-e epsilon : set tolerance of termination criterion (default 0.001)\n"
"-h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1)\n"
"-b probability_estimates : whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0)\n"
"-wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1)\n"
"-v n: n-fold cross validation mode\n"
"-q : quiet mode (no outputs)\n"
);
exit(1);
}
void exit_input_error(int line_num)
{
fprintf(stderr,"Wrong input format at line %d\n", line_num);
exit(1);
}
void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name);
void read_problem(const char *filename);
void do_cross_validation();
struct svm_parameter param; // set by parse_command_line
struct svm_problem prob; // set by read_problem
struct svm_model *model;
struct svm_node *x_space;
int cross_validation;
int nr_fold;
static char *line = NULL;
static int max_line_len;
static char* readline(FILE *input)
{
int len;
if(fgets(line,max_line_len,input) == NULL)
return NULL;
while(strrchr(line,'\n') == NULL)
{
max_line_len *= 2;
line = (char *) realloc(line,max_line_len);
len = (int) strlen(line);
if(fgets(line+len,max_line_len-len,input) == NULL)
break;
}
return line;
}
int main(int argc, char **argv)
{
char input_file_name[1024];
char model_file_name[1024];
const char *error_msg;
parse_command_line(argc, argv, input_file_name, model_file_name);
read_problem(input_file_name);
error_msg = svm_check_parameter(&prob,¶m);
if(error_msg)
{
fprintf(stderr,"ERROR: %s\n",error_msg);
exit(1);
}
if(cross_validation)
{
do_cross_validation();
}
else
{
model = svm_train(&prob,¶m);
if(svm_save_model(model_file_name,model))
{
fprintf(stderr, "can't save model to file %s\n", model_file_name);
exit(1);
}
svm_free_and_destroy_model(&model);
}
svm_destroy_param(¶m);
free(prob.y);
free(prob.x);
free(x_space);
free(line);
return 0;
}
void do_cross_validation()
{
int i;
int total_correct = 0;
double total_error = 0;
double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
double *target = Malloc(double,prob.l);
svm_cross_validation(&prob,¶m,nr_fold,target);
if(param.svm_type == EPSILON_SVR ||
param.svm_type == NU_SVR)
{
for(i=0;i<prob.l;i++)
{
double y = prob.y[i];
double v = target[i];
total_error += (v-y)*(v-y);
sumv += v;
sumy += y;
sumvv += v*v;
sumyy += y*y;
sumvy += v*y;
}
printf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
printf("Cross Validation Squared correlation coefficient = %g\n",
((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/
((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy))
);
}
else
{
for(i=0;i<prob.l;i++)
if(target[i] == prob.y[i])
++total_correct;
printf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
}
free(target);
}
void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name)
{
int i;
void (*print_func)(const char*) = NULL; // default printing to stdout
// default values
param.svm_type = C_SVC;
param.kernel_type = RBF;
param.degree = 3;
param.gamma = 0; // 1/num_features
param.coef0 = 0;
param.nu = 0.5;
param.cache_size = 100;
param.C = 1;
param.eps = 1e-3;
param.p = 0.1;
param.shrinking = 1;
param.probability = 0;
param.nr_weight = 0;
param.weight_label = NULL;
param.weight = NULL;
cross_validation = 0;
// parse options
for(i=1;i<argc;i++)
{
if(argv[i][0] != '-') break;
if(++i>=argc)
exit_with_help();
switch(argv[i-1][1])
{
case 's':
param.svm_type = atoi(argv[i]);
break;
case 't':
param.kernel_type = atoi(argv[i]);
break;
case 'd':
param.degree = atoi(argv[i]);
break;
case 'g':
param.gamma = atof(argv[i]);
break;
case 'r':
param.coef0 = atof(argv[i]);
break;
case 'n':
param.nu = atof(argv[i]);
break;
case 'm':
param.cache_size = atof(argv[i]);
break;
case 'c':
param.C = atof(argv[i]);
break;
case 'e':
param.eps = atof(argv[i]);
break;
case 'p':
param.p = atof(argv[i]);
break;
case 'h':
param.shrinking = atoi(argv[i]);
break;
case 'b':
param.probability = atoi(argv[i]);
break;
case 'q':
print_func = &print_null;
i--;
break;
case 'v':
cross_validation = 1;
nr_fold = atoi(argv[i]);
if(nr_fold < 2)
{
fprintf(stderr,"n-fold cross validation: n must >= 2\n");
exit_with_help();
}
break;
case 'w':
++param.nr_weight;
param.weight_label = (int *)realloc(param.weight_label,sizeof(int)*param.nr_weight);
param.weight = (double *)realloc(param.weight,sizeof(double)*param.nr_weight);
param.weight_label[param.nr_weight-1] = atoi(&argv[i-1][2]);
param.weight[param.nr_weight-1] = atof(argv[i]);
break;
default:
fprintf(stderr,"Unknown option: -%c\n", argv[i-1][1]);
exit_with_help();
}
}
svm_set_print_string_function(print_func);
// determine filenames
if(i>=argc)
exit_with_help();
strcpy(input_file_name, argv[i]);
if(i<argc-1)
strcpy(model_file_name,argv[i+1]);
else
{
char *p = strrchr(argv[i],'/');
if(p==NULL)
p = argv[i];
else
++p;
sprintf(model_file_name,"%s.model",p);
}
}
// read in a problem (in svmlight format)
void read_problem(const char *filename)
{
int max_index, inst_max_index, i;
size_t elements, j;
FILE *fp = fopen(filename,"r");
char *endptr;
char *idx, *val, *label;
if(fp == NULL)
{
fprintf(stderr,"can't open input file %s\n",filename);
exit(1);
}
prob.l = 0;
elements = 0;
max_line_len = 1024;
line = Malloc(char,max_line_len);
while(readline(fp)!=NULL)
{
char *p = strtok(line," \t"); // label
// features
while(1)
{
p = strtok(NULL," \t");
if(p == NULL || *p == '\n') // check '\n' as ' ' may be after the last feature
break;
++elements;
}
++elements;
++prob.l;
}
rewind(fp);
prob.y = Malloc(double,prob.l);
prob.x = Malloc(struct svm_node *,prob.l);
x_space = Malloc(struct svm_node,elements);
max_index = 0;
j=0;
for(i=0;i<prob.l;i++)
{
inst_max_index = -1; // strtol gives 0 if wrong format, and precomputed kernel has <index> start from 0
readline(fp);
prob.x[i] = &x_space[j];
label = strtok(line," \t\n");
if(label == NULL) // empty line
exit_input_error(i+1);
prob.y[i] = strtod(label,&endptr);
if(endptr == label || *endptr != '\0')
exit_input_error(i+1);
while(1)
{
idx = strtok(NULL,":");
val = strtok(NULL," \t");
if(val == NULL)
break;
errno = 0;
x_space[j].index = (int) strtol(idx,&endptr,10);
if(endptr == idx || errno != 0 || *endptr != '\0' || x_space[j].index <= inst_max_index)
exit_input_error(i+1);
else
inst_max_index = x_space[j].index;
errno = 0;
x_space[j].value = strtod(val,&endptr);
if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
exit_input_error(i+1);
++j;
}
if(inst_max_index > max_index)
max_index = inst_max_index;
x_space[j++].index = -1;
}
if(param.gamma == 0 && max_index > 0)
param.gamma = 1.0/max_index;
if(param.kernel_type == PRECOMPUTED)
for(i=0;i<prob.l;i++)
{
if (prob.x[i][0].index != 0)
{
fprintf(stderr,"Wrong input format: first column must be 0:sample_serial_number\n");
exit(1);
}
if ((int)prob.x[i][0].value <= 0 || (int)prob.x[i][0].value > max_index)
{
fprintf(stderr,"Wrong input format: sample_serial_number out of range\n");
exit(1);
}
}
fclose(fp);
}
UPDATE
can i convert to numerical representation using this code?
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <cv.h>
#include <highgui.h>
#include <cvaux.h>
#include <iostream>
#include <vector>
#include<string.h>
using namespace std;
using namespace cv;
int main ( int argc, char** argv )
{
cout << "OpenCV Training SVM Automatic Number Plate Recognition\n";
cout << "\n";
char* path_Plates;
char* path_NoPlates;
int numPlates;
int numNoPlates;
int imageWidth=150;
int imageHeight=150;
//Check if user specify image to process
if(1)
{
numPlates= 12;
numNoPlates= 67 ;
path_Plates= "/home/kaushik/opencv_work/Manas6/Pics/Positive_Images/";
path_NoPlates= "/home/kaushik/opencv_work/Manas6/Pics/Negative_Images/i";
}else{
cout << "Usage:\n" << argv[0] << " <num Plate Files> <num Non Plate Files> <path to plate folder files> <path to non plate files> \n";
return 0;
}
Mat classes;//(numPlates+numNoPlates, 1, CV_32FC1);
Mat trainingData;//(numPlates+numNoPlates, imageWidth*imageHeight, CV_32FC1 );
Mat trainingImages;
vector<int> trainingLabels;
for(int i=1; i<= numPlates; i++)
{
stringstream ss(stringstream::in | stringstream::out);
ss<<path_Plates<<i<<".jpg";
try{
const char* a = ss.str().c_str();
printf("\n%s\n",a);
Mat img = imread(ss.str(), CV_LOAD_IMAGE_UNCHANGED);
img= img.clone().reshape(1, 1);
//imshow("Window",img);
//cout<<ss.str();
trainingImages.push_back(img);
trainingLabels.push_back(1);
}
catch(Exception e){;}
}
for(int i=0; i< numNoPlates; i++)
{
stringstream ss(stringstream::in | stringstream::out);
ss << path_NoPlates<<i << ".jpg";
try
{
const char* a = ss.str().c_str();
printf("\n%s\n",a);
Mat img=imread(ss.str(),CV_LOAD_IMAGE_UNCHANGED);
//imshow("Win",img);
img= img.clone().reshape(1, 1);
trainingImages.push_back(img);
trainingLabels.push_back(0);
//cout<<ss.str();
}
catch(Exception e){;}
}
Mat(trainingImages).copyTo(trainingData);
//trainingData = trainingData.reshape(1,trainingData.rows);
trainingData.convertTo(trainingData, CV_32FC1);
Mat(trainingLabels).copyTo(classes);
FileStorage fs("SVM.xml", FileStorage::WRITE);
fs << "TrainingData" << trainingData;
fs << "classes" << classes;
fs.release();
return 0;
}
What I can see from your code is, that you are mixing OpenCV and LIBSVM.
Basically you can follow one of the following ways. Personally I would suggest to use OpenCV only.
OpenCV
OpenCV is a very powerfull library for working with images. Hence they implement their own machine learning algorithms including SVMs.
As described in a very good way here it is very easy to perform classification with images via OpenCV since the algorithms use a common interface for this purpose.
LIBSVM
LIBSVM a standalone library for SVM classification in various form (e.g. multiclass, two-class, with probability estimates, etc). If you go this way, you have to perform the following steps in order to do successful classification:
Think about how many different classes you want to differentiate (e.g. + / -)
Maybe preprocess your images (filters, ...)
Extract so called "features" rom your images using a feature selection method (for example: Mutual Information). Those methods will tell you, which points are significant for your given classes since we follow the basic assumption, that not every singel pixel in an image is important.
According to your extracted features you transform your images to an vectorial representation.
Write it into an file according to the LIBSVM data format:
label feature_id1:feature_value1 feature_id2:feature_value2
+1 1:0.53265 2:0.5232
-1 1:0.78543 2:0.64326
Proceed with "svm_train" according to its description. Classification would be a combination of 2.) 4.) 5.) and a run of "svm_predict".