DCGAN for RGB dataset strange results - c++

I am implementing a DCGAN network in LibTorch/Pytorch. I am following the official example in https://github.com/pytorch/examples/blob/master/cpp/dcgan/dcgan.cpp .
The only differences between my problem and the example are:
My dataset is composed by RGB pictures (CelebA dataset) while the one from the example is black and white (MNIST)
The dimensions of my pictures are 64x64 while MNIST pictures are 28x28
That said here is my code:
#include <torch/torch.h>
#include <cmath>
#include <cstdio>
#include <iostream>
#include "CustomDataset.h"
#include "parameters.h"
// The size of the noise vector fed to the generator.
const int64_t kNoiseSize = 100;
// The batch size for training.
const int64_t kBatchSize = 64;
// The number of epochs to train.
const int64_t kNumberOfEpochs = 30;
// Where to find the MNIST dataset.
const char* kDataFolder = "./data";
// After how many batches to create a new checkpoint periodically.
const int64_t kCheckpointEvery = 20;
// How many images to sample at every checkpoint.
const int64_t kNumberOfSamplesPerCheckpoint = 10;
// After how many batches to log a new update with the loss value.
const int64_t kLogInterval = 10;
using namespace torch;
struct DCGANGeneratorImpl : nn::Module {
DCGANGeneratorImpl(int kNoiseSize)
: conv1(nn::ConvTranspose2dOptions(kNoiseSize, 256, 4)
.bias(false)),
batch_norm1(256),
conv2(nn::ConvTranspose2dOptions(256, 128, 4)
.stride(2)
.padding(1)
.bias(false)),
batch_norm2(128),
conv3(nn::ConvTranspose2dOptions(128, 64, 4)
.stride(2)
.padding(1)
.bias(false)),
batch_norm3(64),
conv4(nn::ConvTranspose2dOptions(64, 32, 4)
.stride(2)
.padding(1)
.bias(false)),
batch_norm4(32),
conv5(nn::ConvTranspose2dOptions(32, 3, 4)
.stride(2)
.padding(1)
.bias(false))
{
register_module("conv1", conv1);
register_module("conv2", conv2);
register_module("conv3", conv3);
register_module("conv4", conv4);
register_module("conv5", conv5);
register_module("batch_norm1", batch_norm1);
register_module("batch_norm2", batch_norm2);
register_module("batch_norm3", batch_norm3);
register_module("batch_norm4", batch_norm4);
}
torch::Tensor forward(torch::Tensor x)
{
x = torch::relu(batch_norm1(conv1(x)));
x = torch::relu(batch_norm2(conv2(x)));
x = torch::relu(batch_norm3(conv3(x)));
x = torch::relu(batch_norm4(conv4(x)));
x = torch::tanh(conv5(x));
return x;
}
nn::ConvTranspose2d conv1, conv2, conv3, conv4, conv5;
nn::BatchNorm2d batch_norm1, batch_norm2, batch_norm3, batch_norm4;
};
TORCH_MODULE(DCGANGenerator);
int main(int argc, const char* argv[]) {
torch::manual_seed(1);
// Create the device we pass around based on whether CUDA is available.
torch::Device device(torch::kCPU);
if (torch::cuda::is_available()) {
std::cout << "CUDA is available! Training on GPU." << std::endl;
device = torch::Device(torch::kCUDA);
}
DCGANGenerator generator(kNoiseSize);
generator->to(device);
nn::Sequential discriminator(
// Layer 1
nn::Conv2d(
nn::Conv2dOptions(3, 64, 4).stride(2).padding(1).bias(false)),
nn::LeakyReLU(nn::LeakyReLUOptions().negative_slope(0.2)),
//output is 32x32
// Layer 2
nn::Conv2d(
nn::Conv2dOptions(64, 128, 4).stride(2).padding(1).bias(false)),
nn::BatchNorm2d(128),
nn::LeakyReLU(nn::LeakyReLUOptions().negative_slope(0.2)),
//output is 16x16
// Layer 3
nn::Conv2d(
nn::Conv2dOptions(128, 64, 4).stride(2).padding(1).bias(false)),
nn::BatchNorm2d(64),
nn::LeakyReLU(nn::LeakyReLUOptions().negative_slope(0.2)),
//output is 8x8
// Layer 4
nn::Conv2d(
nn::Conv2dOptions(64, 32, 5).stride(1).padding(0).bias(false)),
nn::LeakyReLU(nn::LeakyReLUOptions().negative_slope(0.2)),
// output is 4x4
// Layer 5
nn::Conv2d(
nn::Conv2dOptions(32, 1, 4).stride(1).padding(0).bias(false)),
nn::Sigmoid());
discriminator->to(device);
// Where all my pictures are;
std::string file_location{"dataset/img_align_celeba/*.jpg"};
auto dataset = CustomDataset(file_location).map(data::transforms::Stack<>());
const int64_t batches_per_epoch =
std::ceil(dataset.size().value() / static_cast<double>(kBatchSize));
auto data_loader = torch::data::make_data_loader(
std::move(dataset),
torch::data::DataLoaderOptions().batch_size(kBatchSize).workers(2));
torch::optim::Adam generator_optimizer(
generator->parameters(), torch::optim::AdamOptions(2e-4).beta1(0.5));
torch::optim::Adam discriminator_optimizer(
discriminator->parameters(), torch::optim::AdamOptions(2e-4).beta1(0.5));
int64_t checkpoint_counter = 1;
for (int64_t epoch = 1; epoch <= kNumberOfEpochs; ++epoch) {
int64_t batch_index = 0;
for (torch::data::Example<>& batch : *data_loader) {
// Train discriminator with real images.
discriminator->zero_grad();
torch::Tensor real_images = batch.data.to(device);
torch::Tensor real_labels =
torch::empty(batch.data.size(0), device).uniform_(0.8, 1.0);
torch::Tensor real_output = discriminator->forward(real_images);
torch::Tensor d_loss_real =
torch::binary_cross_entropy(real_output, real_labels);
d_loss_real.backward();
// Train discriminator with fake images.
torch::Tensor noise =
torch::randn({batch.data.size(0), kNoiseSize, 1, 1}, device);
torch::Tensor fake_images = generator->forward(noise);
torch::Tensor fake_labels = torch::zeros(batch.data.size(0), device);
torch::Tensor fake_output = discriminator->forward(fake_images.detach());
torch::Tensor d_loss_fake =
torch::binary_cross_entropy(fake_output, fake_labels);
d_loss_fake.backward();
torch::Tensor d_loss = d_loss_real + d_loss_fake;
discriminator_optimizer.step();
// Train generator.
generator->zero_grad();
fake_labels.fill_(1);
fake_output = discriminator->forward(fake_images);
torch::Tensor g_loss =
torch::binary_cross_entropy(fake_output, fake_labels);
g_loss.backward();
generator_optimizer.step();
batch_index++;
if (batch_index % kCheckpointEvery == 0) {
// Checkpoint the model and optimizer state.
torch::save(generator, "generator-checkpoint.pt");
torch::save(generator_optimizer, "generator-optimizer-checkpoint.pt");
torch::save(discriminator, "discriminator-checkpoint.pt");
torch::save(
discriminator_optimizer, "discriminator-optimizer-checkpoint.pt");
// Sample the generator and save the images.
torch::Tensor samples = generator->forward(torch::randn(
{kNumberOfSamplesPerCheckpoint, kNoiseSize, 1, 1}, device));
torch::save(
samples,
torch::str("dcgan-sample-", checkpoint_counter, ".pt"));
std::cout << "\n-> checkpoint " << ++checkpoint_counter << '\n';
}
}
}
std::cout << "Training complete!" << std::endl;
}
I save the minibatches from time to time and plot the result of inputing noise over the Generator. The problem is that in the MNIST example results are correct but in my case for each output picture I see like 9 smaller pictures with faces instead of one (see the picture attached).
How is it possible that the generator is outputting a correct shape but with 9 almost identical faces instead of one?

Related

ScalableTSDFVolume Integrate from TUM-RGBD Dataset

I am using Open3D 0.15 and C++11 on Ubuntu 18.04.
The main function I'm interested in is the ScalabeTSDFVolume Integrate() function, using the TUM RGBD dataset (the xyz set to be exact), based off of the IntegrateRGBD example from the Open3D repo.
Since the TUM-RGBD dataset does not provide an association file that matches the RGBD images and the trajectory info, I've created my own small code that matches the timestamp on the TUM dataset's image data and the trajectory information, and converting the 7-dimension [x y z rx ry rz rw] trajectory information into Eigen::Matrix4d, using the same equation that Open3D's FileTUM.cpp uses:
do
{
// Read the timestamp first
gt >> p_gt.timestamp;
double poseArr[7];
// push the remaining 7 numbers to the poseArr
for (int i = 0; i < 7; i++)
gt >> poseArr[i];
// copy paste of the tum trajectory reader
Eigen::Matrix4d transform;
transform.setIdentity();
transform.topLeftCorner<3, 3>() =
Eigen::Quaterniond(poseArr[6], poseArr[3], poseArr[4], poseArr[5]).toRotationMatrix();
transform.topRightCorner<3, 1>() = Eigen::Vector3d(poseArr[0], poseArr[1], poseArr[2]);
p_gt.pose = transform.inverse();
gtF.push_back(p_gt);
} while (std::getline(gt, line));
The code runs fine, but the issue is when I try to integrate multiple frames into the same volume and extract its pointcloud or mesh.
I can tell that the RGBD information is being fed into the program correctly, by extracting the mesh at the very first frame:
first frame mesh extraction
But there is a significant artifact when I try to extract the mesh when more frames are integrated, like this:
30 frames mesh extraction
From my previous experience, this probably has to do with the fact that the transformation matrices are not in the correct axis. If anyone has tried to use the TUM dataset with Open3D and encountered the same problem, I would greatly appreciate any info on this.
Edit:
For reference, this is the modified code I'm using for the reconstruction.
int main(int argc, char *argv[]) {
using namespace open3d;
std::string filebase("/home/geometry/Documents/rgbd_dataset_freiburg1_xyz");
VirtualSensor::CameraParameters kinect{ 525.0,525.0,319.5,239.5,5000};
VirtualSensor::CameraParameters camPar = kinect;
VirtualSensor v1(filebase,camPar);
bool save_pointcloud = true;
bool save_mesh = true;
bool save_voxel = false;
int every_k_frames = 50;
double length = 4.0;
double uLength = 6.0;
int resolution = 512;
double sdf_trunc_percentage = 0.01;
int verbose = 2;
utility::SetVerbosityLevel((utility::VerbosityLevel)verbose);
auto camera_intrinsic = camera::PinholeCameraIntrinsic(640, 480, 525.0, 525.0, 319.5, 239.5);
int index = 0;
int save_index = 0;
int pairSize = 30;
// initialise TSDF
pipelines::integration::ScalableTSDFVolume volume(
length / (double)resolution, length * sdf_trunc_percentage,
pipelines::integration::TSDFVolumeColorType::RGB8);
//pipelines::integration::UniformTSDFVolume uVolume(uLength, resolution, uLength*sdf_trunc_percentage, pipelines::integration::TSDFVolumeColorType::RGB8);
utility::FPSTimer timer("Process RGBD stream",
pairSize);
geometry::Image depth, color;
// start loop
for(int i = 0; i < pairSize; i++){
utility::LogInfo("Processing frame {:d} ...", index);
io::ReadImage(v1.GetDepthPath(i), depth);
io::ReadImage(v1.GetColorPath(i), color);
auto rgbd = geometry::RGBDImage::CreateFromColorAndDepth(
color, depth, 5000.0, 6.0, false);
if (index == 0 ||
(every_k_frames > 0 && index % every_k_frames == 0))
volume.Reset();
}
volume.Integrate(*rgbd,
camera_intrinsic, // intrinsic never changes
v1.GetCounterGT(i)); // get the groundtruth pose from my class
index++;
// saving mesh/pc logic
if (index == pairSize ||
(every_k_frames > 0 && index % every_k_frames == 0)) {
utility::LogInfo("Saving fragment {:d} ...", save_index);
std::string save_index_str = std::to_string(save_index);
if (save_pointcloud) {
utility::LogInfo("Saving pointcloud {:d} ...", save_index);
auto pcd = volume.ExtractPointCloud();
io::WritePointCloud("pointcloud_" + save_index_str + ".ply",
*pcd);
}
if (save_mesh) {
utility::LogInfo("Saving mesh {:d} ...", save_index);
auto mesh = volume.ExtractTriangleMesh();
io::WriteTriangleMesh("mesh_" + save_index_str + ".ply",
*mesh);
}
if (save_voxel) {
utility::LogInfo("Saving voxel {:d} ...", save_index);
auto voxel = volume.ExtractVoxelPointCloud();
io::WritePointCloud("voxel_" + save_index_str + ".ply",
*voxel);
}
save_index++;
}
timer.Signal();
}
return 0;
}

What should you store or append a batch of tensors to in C++ when using LibTorch?

In C++, when using LibTorch (The C++ version of PyTorch), what should you store a batch of tensors in? I'm running into the problem of not being able to reset the batch on the next step because C++ doesn't allow storing a new variable over an existing variable.
In my attempt my batch of tensors is one single 385x385 tensor. The batch size is 385. In a for loop I use torch::cat to concatenate 385 smaller 1D tensors, which are 385 numbers long. (Maybe 'stack' or 'append' are better terms for what I'm doing since the are stacked together picket fence style more than 'concatenated', but that's what I'm using.) Anyways, there is not problem with this shape. It seems to work fine for one forward and backward pass but then the tensor becomes 770x385 on the next pass instead of a 385x385 tensor of the next 385, 385 long arrays. I hope I am painting a picture and not being too verbose.
The code.
Near the bottom I have the line all_step_obs = torch::tensor({}); to try to wipe out the contents of the tensor, AKA, the batch, but this gives me a Segmentation fault (core dumped). I guess for trying to access the tensor outside of the loop(?)
If I don't have this line I get a 770x385 tensor after the next step.
The model
#include "mujoco/mujoco.h"
struct Net : torch::nn::Module {
torch::Tensor action_high, action_low;
public:
Net(torch::Tensor action_high, torch::Tensor action_low) : action_high(action_high), action_low(action_low){
// Construct and register two Linear submodules.
fc1 = torch::nn::Linear(385, 385);
fc2 = torch::nn::Linear(385, 385);
fc3 = torch::nn::Linear(385, 42);
// cholesky_layer = torch::nn::Linear(385, (42 * (42 + 1)) / 2);
cholesky_layer = torch::nn::Linear(385, 385);
}
// Implement the Net's algorithm.
torch::Tensor forward(torch::Tensor x) {
// Use one of many tensor manipulation functions.
x = torch::relu(fc1->forward(x));
x = torch::dropout(x, /*p=*/0.2, /*train=*/is_training());
x = torch::relu(fc2->forward(x));
auto mean_layer = fc3->forward(x);
auto mean = action_low + (action_high - action_low) * mean_layer;
auto chol_l = cholesky_layer->forward(x);
// auto chol = torch::rand({385, 385});
auto chol = torch::matmul(chol_l, chol_l.transpose(0, 1));
chol = torch::nan_to_num(chol, 0, 2.0);
chol = chol.add(torch::eye(385));
auto cholesky = torch::linalg::cholesky(chol);
// return torch::cat({mean, cholesky}, 0);
return mean_layer;
}
// Use one of many "standard library" modules.
torch::nn::Linear fc1{nullptr}, fc2{nullptr}, fc3{nullptr}, cholesky_layer{nullptr};
};
The training
auto high = torch::ones({385, 42}) * 0.4;
auto low = torch::ones({385, 42}) * -0.4;
auto actor = Net(low, high);
int max_steps = 385;
int steps = 2000;
auto l1_loss = torch::smooth_l1_loss;
auto optimizer = torch::optim::Adam(actor.parameters(), 3e-4);
torch::Tensor train() {
torch::Tensor all_step_obs;
for (int i = 0; i<steps; ++i)
{
for (int i = 0; i<max_steps; ++i)
{
all_step_obs = torch::cat({torch::rand({385}).unsqueeze(0), all_step_obs});
}
auto mean = actor.forward(all_step_obs);
auto loss = l1_loss(mean, torch::rand({385, 42}), 1, 0);
optimizer.zero_grad();
loss.backward();
optimizer.step();
all_step_obs = torch::tensor({});
if (steps == 1999) {
return loss;
}
}
};
int main (int argc, const char** argv) {
std::cout << train();
}

Use multiple images for batch inference cppflow C++

I'm trying to use cppflow library in windows 10 x64 machine in VS2019 C++. I want to inference my model for batch of images (vector <cv::Mat> ). I write a simple code as below for single image and it works correctly:
string im_path{ "..." };
string model_path{ "...\\ocr_model" };
cv::Mat tmp, im;
cv::resize(cv::imread(im_path, cv::IMREAD_GRAYSCALE), tmp, cv::Size(127, 25), 0, 0, cv::INTER_CUBIC);
cv::transpose(tmp, im);
int rows = im.rows; int cols = im.cols; int channels = im.channels();
// Put image in tensor
std::vector<uint8_t> img_data;
auto e = std::end(img_data);
img_data.insert(e, im.data, im.data + im.total() * channels);
auto input = cppflow::tensor(img_data, {rows, cols, channels});
input = cppflow::cast(input, TF_UINT8, TF_FLOAT);
auto t = input.get_data<float>();
input = input / 255.f;
input = cppflow::expand_dims(input, 0);
cppflow::model model{ model_path };
auto output = model({ {"serving_default_input:0", input}}, { "StatefulPartitionedCall:0"});
I want to load multiple images (in code below I use a cloned image as second image). here is what I really want to do:
string im_path{ "..." };
string model_path{ "...\\ocr_model" };
cv::Mat tmp, im;
cv::resize(cv::imread(im_path, cv::IMREAD_GRAYSCALE), tmp, cv::Size(127, 25), 0, 0, cv::INTER_CUBIC);
cv::transpose(tmp, im);
int rows = im.rows; int cols = im.cols; int channels = im.channels();
// Put image in tensor
std::vector<uint8_t> img_data;
auto im_clone = im.clone();
auto e = std::end(img_data);
img_data.insert(e, im.data, im.data + im.total() * channels);
e = std::end(img_data);
img_data.insert(e, im_clone.data, im_clone.data + im_clone.total() * channels);
auto input = cppflow::tensor(img_data, {2, rows, cols, channels});
input = cppflow::cast(input, TF_UINT8, TF_FLOAT);
input = input / 255.f;
input = cppflow::expand_dims(input, 0);
cppflow::model model{ model_path };
auto output = model({ {"serving_default_input:0", input}}, { "StatefulPartitionedCall:0"});
As you see the difference between the codes are img_data preparation and tensor definition but unfortunately, I get this error:
Unhandled exception at 0x00007FFFF4514ED9 in cppflow_Test.exe:
Microsoft C++ exception: std::runtime_error at memory location
0x00000031F72FDBD8.
How can I load multiple images (vector< cv::Mat >) to a tensor and use its corresponding outputs? in other words I need a example for batch inference using cppflow library.
Try using std::copy to insert multiple copies of the image (or multiple images) into the img_data vector.
int data_size = rows * cols * channels;
std::vector<uint8> img_data;
for (size_t i = 0; i < batch_size; i++)
{
std::copy(im.data, im.data + data_size, std::begin(img_data) + i * data_size);
}
auto input = cppflow::tensor(img_data, {batch_size, rows, cols, channels});
Use cppflow::concat to concatenate the n tensors you want to send to the model:
auto input1 = cppflow::decode_jpeg(file1);
input1 = cppflow::expand_dims(input1, 0);
auto input2 = cppflow::decode_jpeg(file2);
input2 = cppflow::expand_dims(input2, 0);
cppflow::tensor dim({0});
std::vector<cppflow::tensor> values;
values.push_back(input1);
values.push_back(input2);
auto inputs = cppflow::concat(dim, values);
auto output = model({ {"serving_default_input:0", inputs}}, { "StatefulPartitionedCall:0"});

Dimension mismatch CNN LibTorch/PyTorch

I have a CNN structure in LibTorch but the dimensions are not ok. My objective is to input a 3 channel 64x64 image and output a logistic regression float for a DGAN. Last layer I set as input channels 36 because if I remove that layer the output neuron had 6x6 dimension so I guesses that was the required dimension for the input of the fully connected. I would like to know:
What do you normally do to check dimensions in LibTorch or Pytorch (i.e. check the required size for the last module, check how many trainable parameters has each layer ...)
What is the error in this case
#include <torch/torch.h>
#include "parameters.h"
using namespace torch;
class DCGANDiscriminatorImpl: public nn::Module {
private:
nn::Conv2d conv1, conv2, conv3, conv4;
nn::BatchNorm2d batch_norm1, batch_norm2;
nn::Linear fc1;
public:
DCGANDiscriminatorImpl()
:conv1(nn::Conv2dOptions(3, 64, 4).stride(2).padding(1).bias(false)),
conv2(nn::Conv2dOptions(64, 128, 4).stride(2).padding(1).bias(false)),
batch_norm1(128),
conv3(nn::Conv2dOptions(128, 256, 4).stride(2).padding(1).bias(false)),
batch_norm2(256),
conv4(nn::Conv2dOptions(256, 1, 3).stride(1).padding(0).bias(false)),
fc1(6*6, 1)
{
register_module("conv1", conv1);
register_module("conv2", conv2);
register_module("conv3", conv3);
register_module("conv4", conv4);
register_module("batch_norm1", batch_norm1);
register_module("batch_norm2", batch_norm2);
register_module("fc1", fc1);
}
Tensor forward(torch::Tensor x)
{
x = leaky_relu(conv1(x), cte::NEGATIVE_SLOPE);
x = leaky_relu(batch_norm1(conv2(x)), cte::NEGATIVE_SLOPE);
x = leaky_relu(batch_norm2(conv3(x)), cte::NEGATIVE_SLOPE);
x = sigmoid(fc1(x));
return x;
}
};
TORCH_MODULE(DCGANDiscriminator);
The error I get is:
libc++abi.dylib: terminating with uncaught exception of type std::runtime_error: size mismatch, m1: [131072 x 8], m2: [36 x 1] at ../aten/src/TH/generic/THTensorMath.cpp:136
I had several issues but at the end this architecture worked.
using namespace torch;
class DCGANDiscriminatorImpl: public nn::Module {
private:
nn::Conv2d conv1, conv2, conv3, conv4;
nn::BatchNorm2d batch_norm1, batch_norm2;
nn::Linear fc1;
public:
DCGANDiscriminatorImpl()
:conv1(nn::Conv2dOptions(3, 64, 4).stride(2).padding(1).bias(false)),
conv2(nn::Conv2dOptions(64, 128, 4).stride(2).padding(1).bias(false)),
batch_norm1(128),
conv3(nn::Conv2dOptions(128, 256, 4).stride(2).padding(1).bias(false)),
batch_norm2(256),
conv4(nn::Conv2dOptions(256, 64, 3).stride(1).padding(0).bias(false)),
fc1(6*6*64, 1)
{
register_module("conv1", conv1);
register_module("conv2", conv2);
register_module("conv3", conv3);
register_module("conv4", conv4);
register_module("batch_norm1", batch_norm1);
register_module("batch_norm2", batch_norm2);
register_module("fc1", fc1);
}
Tensor forward(torch::Tensor x)
{
x = leaky_relu(conv1(x), cte::NEGATIVE_SLOPE);
x = leaky_relu(batch_norm1(conv2(x)), cte::NEGATIVE_SLOPE);
x = leaky_relu(batch_norm2(conv3(x)), cte::NEGATIVE_SLOPE);
x = leaky_relu(conv4(x), cte::NEGATIVE_SLOPE);
x = x.view({x.size(0), -1});
x = sigmoid(fc1(x));
return x;
}
};
TORCH_MODULE(DCGANDiscriminator);

Does Opencv 3 SVM trainAuto scale labels too?

I'm using OpenCV 3.0.0. When running OpenCV's SVM example 1 I noticed that when auto trained instead of trained, the predicted values are between 0 and 1. I don't see the same behavior when I run the same example directly with libsvm.
Is this a bug or an intentional scaling of the labels along with the other features? The behavior also seems to be undocumented.
Here is the code I'm running:
// Set up training data
size_t numberOfSamples = 4;
cv::Mat1i labelsMat(numberOfSamples, 1);
labelsMat(0, 0) = 1;
labelsMat(1, 0) = -1;
labelsMat(2, 0) = -1;
labelsMat(3, 0) = -1;
cv::Mat1f trainingDataMat(numberOfSamples, 2);
// Sample 0
trainingDataMat(0, 0) = 501;
trainingDataMat(0, 1) = 10;
// Sample 1
trainingDataMat(1, 0) = 255;
trainingDataMat(1, 1) = 10;
// Sample 2
trainingDataMat(2, 0) = 501;
trainingDataMat(2, 1) = 255;
// Sample 3
trainingDataMat(3, 0) = 10;
trainingDataMat(3, 1) = 501;
// Set up SVM's parameters
cv::Ptr<cv::ml::SVM> svm = cv::ml::SVM::create();
svm->setType(cv::ml::SVM::C_SVC);
svm->setKernel(cv::ml::SVM::LINEAR);
svm->setTermCriteria(cv::TermCriteria(cv::TermCriteria::MAX_ITER, 100, 1e-6));
// Train the SVM with given parameters
cv::Ptr<cv::ml::TrainData> td =
cv::ml::TrainData::create(trainingDataMat, cv::ml::ROW_SAMPLE, labelsMat);
// train the SVM
// svm->train(td);
// or auto train
svm->trainAuto(td);
// predict
// first point used for training
cv::Mat point1 = (cv::Mat_<float>(1, 2) << 501, 10);
float response1 = svm->predict(point1);
// second point used for training
cv::Mat point2 = (cv::Mat_<float>(1, 2) << 255, 10);
float response2 = svm->predict(point2);
std::cout << "first point: " << response1 << "\n" <<
<< "second point: " << response2 << std::endl;
If ran with trainAuto it will output 0 and 1 instead of -1 and 1.
Apparently it was a bug that is fixed now:
http://code.opencv.org/issues/4464