Related
I am implementing a DCGAN network in LibTorch/Pytorch. I am following the official example in https://github.com/pytorch/examples/blob/master/cpp/dcgan/dcgan.cpp .
The only differences between my problem and the example are:
My dataset is composed by RGB pictures (CelebA dataset) while the one from the example is black and white (MNIST)
The dimensions of my pictures are 64x64 while MNIST pictures are 28x28
That said here is my code:
#include <torch/torch.h>
#include <cmath>
#include <cstdio>
#include <iostream>
#include "CustomDataset.h"
#include "parameters.h"
// The size of the noise vector fed to the generator.
const int64_t kNoiseSize = 100;
// The batch size for training.
const int64_t kBatchSize = 64;
// The number of epochs to train.
const int64_t kNumberOfEpochs = 30;
// Where to find the MNIST dataset.
const char* kDataFolder = "./data";
// After how many batches to create a new checkpoint periodically.
const int64_t kCheckpointEvery = 20;
// How many images to sample at every checkpoint.
const int64_t kNumberOfSamplesPerCheckpoint = 10;
// After how many batches to log a new update with the loss value.
const int64_t kLogInterval = 10;
using namespace torch;
struct DCGANGeneratorImpl : nn::Module {
DCGANGeneratorImpl(int kNoiseSize)
: conv1(nn::ConvTranspose2dOptions(kNoiseSize, 256, 4)
.bias(false)),
batch_norm1(256),
conv2(nn::ConvTranspose2dOptions(256, 128, 4)
.stride(2)
.padding(1)
.bias(false)),
batch_norm2(128),
conv3(nn::ConvTranspose2dOptions(128, 64, 4)
.stride(2)
.padding(1)
.bias(false)),
batch_norm3(64),
conv4(nn::ConvTranspose2dOptions(64, 32, 4)
.stride(2)
.padding(1)
.bias(false)),
batch_norm4(32),
conv5(nn::ConvTranspose2dOptions(32, 3, 4)
.stride(2)
.padding(1)
.bias(false))
{
register_module("conv1", conv1);
register_module("conv2", conv2);
register_module("conv3", conv3);
register_module("conv4", conv4);
register_module("conv5", conv5);
register_module("batch_norm1", batch_norm1);
register_module("batch_norm2", batch_norm2);
register_module("batch_norm3", batch_norm3);
register_module("batch_norm4", batch_norm4);
}
torch::Tensor forward(torch::Tensor x)
{
x = torch::relu(batch_norm1(conv1(x)));
x = torch::relu(batch_norm2(conv2(x)));
x = torch::relu(batch_norm3(conv3(x)));
x = torch::relu(batch_norm4(conv4(x)));
x = torch::tanh(conv5(x));
return x;
}
nn::ConvTranspose2d conv1, conv2, conv3, conv4, conv5;
nn::BatchNorm2d batch_norm1, batch_norm2, batch_norm3, batch_norm4;
};
TORCH_MODULE(DCGANGenerator);
int main(int argc, const char* argv[]) {
torch::manual_seed(1);
// Create the device we pass around based on whether CUDA is available.
torch::Device device(torch::kCPU);
if (torch::cuda::is_available()) {
std::cout << "CUDA is available! Training on GPU." << std::endl;
device = torch::Device(torch::kCUDA);
}
DCGANGenerator generator(kNoiseSize);
generator->to(device);
nn::Sequential discriminator(
// Layer 1
nn::Conv2d(
nn::Conv2dOptions(3, 64, 4).stride(2).padding(1).bias(false)),
nn::LeakyReLU(nn::LeakyReLUOptions().negative_slope(0.2)),
//output is 32x32
// Layer 2
nn::Conv2d(
nn::Conv2dOptions(64, 128, 4).stride(2).padding(1).bias(false)),
nn::BatchNorm2d(128),
nn::LeakyReLU(nn::LeakyReLUOptions().negative_slope(0.2)),
//output is 16x16
// Layer 3
nn::Conv2d(
nn::Conv2dOptions(128, 64, 4).stride(2).padding(1).bias(false)),
nn::BatchNorm2d(64),
nn::LeakyReLU(nn::LeakyReLUOptions().negative_slope(0.2)),
//output is 8x8
// Layer 4
nn::Conv2d(
nn::Conv2dOptions(64, 32, 5).stride(1).padding(0).bias(false)),
nn::LeakyReLU(nn::LeakyReLUOptions().negative_slope(0.2)),
// output is 4x4
// Layer 5
nn::Conv2d(
nn::Conv2dOptions(32, 1, 4).stride(1).padding(0).bias(false)),
nn::Sigmoid());
discriminator->to(device);
// Where all my pictures are;
std::string file_location{"dataset/img_align_celeba/*.jpg"};
auto dataset = CustomDataset(file_location).map(data::transforms::Stack<>());
const int64_t batches_per_epoch =
std::ceil(dataset.size().value() / static_cast<double>(kBatchSize));
auto data_loader = torch::data::make_data_loader(
std::move(dataset),
torch::data::DataLoaderOptions().batch_size(kBatchSize).workers(2));
torch::optim::Adam generator_optimizer(
generator->parameters(), torch::optim::AdamOptions(2e-4).beta1(0.5));
torch::optim::Adam discriminator_optimizer(
discriminator->parameters(), torch::optim::AdamOptions(2e-4).beta1(0.5));
int64_t checkpoint_counter = 1;
for (int64_t epoch = 1; epoch <= kNumberOfEpochs; ++epoch) {
int64_t batch_index = 0;
for (torch::data::Example<>& batch : *data_loader) {
// Train discriminator with real images.
discriminator->zero_grad();
torch::Tensor real_images = batch.data.to(device);
torch::Tensor real_labels =
torch::empty(batch.data.size(0), device).uniform_(0.8, 1.0);
torch::Tensor real_output = discriminator->forward(real_images);
torch::Tensor d_loss_real =
torch::binary_cross_entropy(real_output, real_labels);
d_loss_real.backward();
// Train discriminator with fake images.
torch::Tensor noise =
torch::randn({batch.data.size(0), kNoiseSize, 1, 1}, device);
torch::Tensor fake_images = generator->forward(noise);
torch::Tensor fake_labels = torch::zeros(batch.data.size(0), device);
torch::Tensor fake_output = discriminator->forward(fake_images.detach());
torch::Tensor d_loss_fake =
torch::binary_cross_entropy(fake_output, fake_labels);
d_loss_fake.backward();
torch::Tensor d_loss = d_loss_real + d_loss_fake;
discriminator_optimizer.step();
// Train generator.
generator->zero_grad();
fake_labels.fill_(1);
fake_output = discriminator->forward(fake_images);
torch::Tensor g_loss =
torch::binary_cross_entropy(fake_output, fake_labels);
g_loss.backward();
generator_optimizer.step();
batch_index++;
if (batch_index % kCheckpointEvery == 0) {
// Checkpoint the model and optimizer state.
torch::save(generator, "generator-checkpoint.pt");
torch::save(generator_optimizer, "generator-optimizer-checkpoint.pt");
torch::save(discriminator, "discriminator-checkpoint.pt");
torch::save(
discriminator_optimizer, "discriminator-optimizer-checkpoint.pt");
// Sample the generator and save the images.
torch::Tensor samples = generator->forward(torch::randn(
{kNumberOfSamplesPerCheckpoint, kNoiseSize, 1, 1}, device));
torch::save(
samples,
torch::str("dcgan-sample-", checkpoint_counter, ".pt"));
std::cout << "\n-> checkpoint " << ++checkpoint_counter << '\n';
}
}
}
std::cout << "Training complete!" << std::endl;
}
I save the minibatches from time to time and plot the result of inputing noise over the Generator. The problem is that in the MNIST example results are correct but in my case for each output picture I see like 9 smaller pictures with faces instead of one (see the picture attached).
How is it possible that the generator is outputting a correct shape but with 9 almost identical faces instead of one?
I am trying to apply a Kalman Filter in C++ with OpenCV in order to filter some tracks. The first step to make it work for me was to predict the points with the filter from a vector of Points2f.
My code is the following one :
cv::KalmanFilter kalmanFilter(4,2,0, CV_32F);
kalmanFilter.transitionMatrix = transitionMat;
for(int i = 0 ; i < oldTrackeables.size() ; i++)
for(int j = 0 ; j < oldTrackeables[i].getTrack().size() ; j++)
{
cv::Size msmtSize(2,1);
cv::Mat measurementMat(msmtSize, CV_32F);
measurementMat.setTo(cv::Scalar(0));
measurementMat.at<float>(0) = oldTrackeables[i].getTrack()[j].x;
measurementMat.at<float>(1) = oldTrackeables[i].getTrack()[j].y;
//Initialisation of the Kalman filter
kalmanFilter.statePre.at<float>(0) = (float) oldTrackeables[i].getTrack()[j].x;
kalmanFilter.statePre.at<float>(1) = (float) oldTrackeables[i].getTrack()[j].y;
kalmanFilter.statePre.at<float>(2) = (float) 2;
kalmanFilter.statePre.at<float>(3) = (float) 3;
cv::setIdentity(kalmanFilter.measurementMatrix);
cv::setIdentity(kalmanFilter.processNoiseCov, cv::Scalar::all(1e-4));
cv::setIdentity(kalmanFilter.measurementNoiseCov, cv::Scalar::all(.1));
cv::setIdentity(kalmanFilter.errorCovPost, cv::Scalar::all(.1));
//Prediction
cv::Mat prediction = kalmanFilter.predict();
kalmanFilter.statePre.copyTo(kalmanFilter.statePost);
kalmanFilter.errorCovPre.copyTo(kalmanFilter.errorCovPost);
cv::Point predictPt(prediction.at<float>(0), prediction.at<float>(1));
cv::Point Mc = oldTrackeables[i].getMassCenter();
cv::circle(kalmat, predictPt, 16, cv::Scalar(0,255,0), 3, 2, 1);
std::cout<<"prediction : x = " << predictPt.x << " - y = " << predictPt.y <<std::endl;
std::cout<<"position captée : x = " << oldTrackeables[i].getTrack()[j].x << " - y = " << oldTrackeables[i].getTrack()[j].y << std::endl;
std::cout<<"size of frame : rows = " << frame.rows << " - width = " << frame.cols <<std::endl;
std::cout<<"size of kalmat : rows = " << kalmat.rows << " - width = " << kalmat.cols <<std::endl;
cv::imshow("kalmat", kalmat);
Where oldTrackeables[i].getTrack()[j] are just some Points2f from a vector.
The tracking is correct, but the Kalman filter does not give "correct" values for the prediction - For example, the program displays :
prediction : x = 0 - y = 0 -
position captée : x = 138.29 - y = 161.078 (position of the original point).
I've really been looking a lot for answers and trying many different ways to do it but I can't find anything that really helps me... The closer one I found was this one : http://answers.opencv.org/question/24865/why-kalman-filter-keeps-returning-the-same-prediction/ But it did not help me solve my problem...
If any of you has an element of answer of could help me understand the problem, I'd be very grateful.
Thank you.
First of all I would have moved all the init stuff outside the loop otherwise you will override the internal states in the filter. Also change the statePre to statPost
//Initialisation of the Kalman filter
kalmanFilter.statePost.at<float>(0) = (float) 0;
kalmanFilter.statePost.at<float>(1) = (float) 0;
kalmanFilter.statePost.at<float>(2) = (float) 2;
kalmanFilter.statePost.at<float>(3) = (float) 3;
cv::setIdentity(kalmanFilter.measurementMatrix);
cv::setIdentity(kalmanFilter.processNoiseCov, cv::Scalar::all(1e-4));
cv::setIdentity(kalmanFilter.measurementNoiseCov,cv::Scalar::all(.1));
cv::setIdentity(kalmanFilter.errorCovPost, cv::Scalar::all(.1));
The part:
kalmanFilter.statePre.copyTo(kalmanFilter.statePost);
kalmanFilter.errorCovPre.copyTo(kalmanFilter.errorCovPost);
should be removed since this is done internally in the predict phase.
Finally as #Mozfox says, the correct phase is not present in the loop code you provided. Add:
kalmanFilter.predict(measurementMat);
I think you are missing correction phase for measurement computation.
I'm trying to run a sample through a pre trained model on ios. session->Run() takes as input a tensor to my understanding. I have initialized a tensor, but how do i set it's value? I don't have much experience using C++.
I have successfully created a test model that accepts 3 dimensional tensor of shape {1, 1, 10}.
I pulled the following line of code from Tensorflow's simple example to create the input tensor.
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/ios_examples/simple/RunModelViewController.mm#L189
tensorflow::Tensor input_tensor(tensorflow::DT_FLOAT, tensorflow::TensorShape({1,1,10}));
From here, I cannot figure out how I would set the data of input_tensor. I would like to set the tensor to something like {{{.0, .1, .2, .3, .4, .5, .6, .7, .8, .9}}}
I had a similar problem and was trying to set the tensor input values in C++ for a model trained in Python. The model is a simple NN with one hidden layer to learn to calculate the XOR operation.
I first created an output graph file with both the graph structure and the model parameters by following steps 1-4 of this nice post: https://medium.com/#hamedmp/exporting-trained-tensorflow-models-to-c-the-right-way-cf24b609d183#.j4l51ptvb.
Then in C++ (the TensorFlow iOS simple example), I used the following code:
tensorflow::Tensor input_tensor(tensorflow::DT_FLOAT, tensorflow::TensorShape({4,2}));
// input_tensor_mapped is an interface to the data of a tensor and used to copy data into the tensor
auto input_tensor_mapped = input_tensor.tensor<float, 2>();
// set the (4,2) possible input values for XOR
input_tensor_mapped(0, 0) = 0.0;
input_tensor_mapped(0, 1) = 0.0;
input_tensor_mapped(1, 0) = 0.0;
input_tensor_mapped(1, 1) = 1.0;
input_tensor_mapped(2, 0) = 1.0;
input_tensor_mapped(2, 1) = 0.0;
input_tensor_mapped(3, 0) = 1.0;
input_tensor_mapped(3, 1) = 1.0;
tensorflow::Status run_status = session->Run({{input_layer, input_tensor}},
{output_layer}, {}, &outputs);
After this, GetTopN(output->flat<float>(), kNumResults, kThreshold, &top_results); returns the same 4 values (0.94433498, 0.94425952, 0.06565627, 0.05823805), as in my Python test code for XOR after the model is trained, in top_results.
So if your tensor's shape is {1,1,10}, you can set the values as follows:
auto input_tensor_mapped = input_tensor.tensor<float, 3>();
input_tensor_mapped(0, 0, 0) = 0.0;
input_tensor_mapped(0, 0, 1) = 0.1;
....
input_tensor_mapped(0, 0, 9) = 0.9;
Credit: the answer at How do I pass an OpenCV Mat into a C++ Tensorflow graph? is very helpful.
If you want to directly set the value of a tensor you can use few utilities functions provided by the Tensor interface. For the most common linear access you can use flat<T>.
From tensor_test
void ExpectClose(const Tensor& x, const Tensor& y, double atol, double rtol) {
auto Tx = x.flat<T>();
auto Ty = y.flat<T>();
for (int i = 0; i < Tx.size(); ++i) {
if (!IsClose(Tx(i), Ty(i), atol, rtol)) {
LOG(ERROR) << "x = " << x.DebugString();
LOG(ERROR) << "y = " << y.DebugString();
LOG(ERROR) << "atol = " << atol << " rtol = " << rtol
<< " tol = " << atol + rtol * std::fabs(Tx(i));
EXPECT_TRUE(false) << i << "-th element is not close " << Tx(i) << " vs. "
<< Ty(i);
}
}
}
to create a tensor you can use one of the constructors
Tensor(DT_FLOAT, new TensorShape(..))
If you want to set the value of a tensor or a placeholder at run time you need to pass it through the Run() interface:
Status run_status = session->Run({{input_layer, resized_tensor}},
{output_layer}, {}, &outputs);
if (!run_status.ok()) {
LOG(ERROR) << "Running model failed: " << run_status;
return -1;
}
If you want to have a predefine value of a tensor you can use the Const constructor
tensorflow::ops::Const({input_height, input_width})
i'm going to implement the equalization proposed in a paper.
the method consists of substitute each value of each channel with the formula
in the 16th slide of this presentantion slides
First of all i've implemeted this equalization function in Matlab in two ways: in the first i compute the histograms (counts) of each channel in order to know
the number of values less then a specific value in the range [0 255]. Alternatively, in the second way i use some matrix operations (R<=value... G<=value .... V<=value).
Initially, i've thought that the second method was the best in terms of time to execution but it seems not.. and I was surprised for the first time.
Then i've implemented this function in OpenCV, and i'm now surprised because the execution in Matlab is faster then C++!! Using Matlab i have these timer values
Matlab, method 1: 1,36 seconds
Matlab, method 2: 1,74 seconds
In C++ using OpenCV i found these values:
OpenCV, method 1: 2380 milliseconds
OpenCV, method 2: 4651 milliseconds
I obtained the same results so the function is correct, but i think there is something wrong or something that could be enhanched in terms of computation time due to my inexperience in OpenCV because i expect that a compiled C++ function is faster than Matlab!!.... So my question is about how can i optimize the C++ code. In the following i put the C++ code using both methods
//I have an RGB image in the Mat 'image'
Mat channel[3];
// Splitting method 1
split(image, channel);
Mat Red, Green, Blue;
Blue = channel[0];
Green = channel[1];
Red = channel[2];
//Splitting method 2
// Separate the image in 3 places ( B, G and R )
// vector<Mat> bgr_planes;
// split(image, bgr_planes);
double maxB, maxG, maxR, Npx;
double min;
double coeffB, coeffG, coeffR;
Mat newB, newG, newR;
Mat mapB, mapG, mapR;
int P_Bi, P_Gi, P_Ri;
Mat rangeValues;
double intpart;
double TIME;
int histSize = 256;
/// Set the ranges ( for B,G,R) )
float range[] = { 0, 256 };
const float* histRange = { range };
bool uniform = true; bool accumulate = false;
Mat countB, countG, countR;
//Start the timer for the method 1
TIME = (double)getTickCount();
// Compute the histograms
calcHist(&Blue, 1, 0, Mat(), countB, 1, &histSize, &histRange, uniform, accumulate);
calcHist(&Green, 1, 0, Mat(), countG, 1, &histSize, &histRange, uniform, accumulate);
calcHist(&Red, 1, 0, Mat(), countR, 1, &histSize, &histRange, uniform, accumulate);
// Get the max from each channel
minMaxLoc(Blue, &min, &maxB);
minMaxLoc(Green, &min, &maxG);
minMaxLoc(Red, &min, &maxR);
//Number of pixels
Npx = Blue.rows * Blue.cols;
// Compute the coefficient of the formula
coeffB = maxB / Npx;
coeffG = maxG / Npx;
coeffR = maxR / Npx;
//Initialize the new channels
newB = Mat(Blue.rows, Blue.cols, Blue.type(), cvScalar(0));
newG = Mat(Green.rows, Green.cols, Green.type(), cvScalar(0));
newR = Mat(Red.rows, Red.cols, Red.type(), cvScalar(0));
//For each value of the range
for (int value = 0; value < 255; value++)
{
mapB = (Blue == value)/255;
mapG = (Green == value)/255;
mapR = (Red == value)/255;
//Number of pixels less or equal then 'value'
rangeValues = countB(Range(0, value+1), Range(0, 1));
P_Bi = cv::sum(rangeValues)[0];
rangeValues = countG(Range(0, value + 1), Range(0, 1));
P_Gi = cv::sum(rangeValues)[0];
rangeValues = countR(Range(0, value + 1), Range(0, 1));
P_Ri = cv::sum(rangeValues)[0];
//Substitution of the value in the new channel plane
modf((coeffB * P_Bi), &intpart);
newB = newB + mapB * intpart;
modf((coeffG * P_Gi), &intpart);
newG = newG + mapG * intpart;
modf((coeffR * P_Ri), &intpart);
newR = newR + mapR * intpart;
}
TIME = 1000 * ((double)getTickCount() - TIME) / getTickFrequency();
cout << "Method 1 - elapsed time: " << TIME << "milliseconds." << endl;
//Here it takes 2380 milliseconds
//....
//....
//....
//Start timer of method 2
TIME = 0;
TIME = (double)getTickCount();
//Get the max
minMaxLoc(Blue, &min, &maxB);
minMaxLoc(Green, &min, &maxG);
minMaxLoc(Red, &min, &maxR);
Npx = Blue.rows * Blue.cols;
coeffB = maxB / Npx;
coeffG = maxG / Npx;
coeffR = maxR / Npx;
newB = Mat(Blue.rows, Blue.cols, Blue.type(), cvScalar(0));
newG = Mat(Green.rows, Green.cols, Green.type(), cvScalar(0));
newR = Mat(Red.rows, Red.cols, Red.type(), cvScalar(0));
Mat mask = cvCreateImage(Blue.size(), IPL_DEPTH_8U, 1);
for (int value = 0; value < 255; value++)
{
mapB = (Blue == value) / 255;
mapG = (Green == value) / 255;
mapR = (Red == value) / 255;
//Instead, there i used matrices operations
mask = (Blue <= value)/255;
P_Bi = cv::sum(mask)[0];
mask = (Green <= value) / 255;
P_Gi = cv::sum(mask)[0];
mask = (Red <= value) / 255;
P_Ri = cv::sum(mask)[0];
modf((coeffB * P_Bi), &intpart);
newB = newB + mapB * intpart;
modf((coeffG * P_Gi), &intpart);
newG = newG + mapG * intpart;
modf((coeffR * P_Ri), &intpart);
newR = newR + mapR * intpart;
}
//End of the timer
TIME = 1000 * ((double)getTickCount() - TIME) / getTickFrequency();
cout << "Method 2 - elapsed time: " << TIME << "milliseconds." << endl;
//Here it takes 4651 milliseconds
I'm trying to write a method that will find the proper threshold values in HSV space for an object placed at the center of the screen. These values are used for an object tracking algorithm. I've tested that piece of code with hand coded threshold values and it works well. The idea behind the method is that it should calculate the histograms for each of the channels and then return the 5th and 95th percentile for each to be used as the threshold values. (credit: How to find RGB/HSV color parameters for color tracking?) The image being passed is a picture of the object to be tracked (which is set by the user before the whole process begins. Here is the code
std::vector<cv::Scalar> HSV_Threshold_Determiner::Get_Threshold_Values(const cv::Mat& image)
{
cv::Mat inputImage;
cv::cvtColor(image, inputImage, CV_BGR2HSV);
std::vector<cv::Mat> bgrPlanes;
cv::split(inputImage, bgrPlanes);
cv::Mat hHist, sHist, vHist;
int hMax = 180, svMax = 256;
float hRanges[] = { 0, (float)hMax };
const float* hRange = { hRanges };
float svRanges[] = { 0, (float)svMax };
const float* svRange = { svRanges };
//float sRanges[] = { 0, 256 };
cv::calcHist(&bgrPlanes[0], 1, 0, cv::Mat(), hHist, 1, &hMax, &hRange);
cv::calcHist(&bgrPlanes[1], 1, 0, cv::Mat(), sHist, 1, &svMax, &svRange);
cv::calcHist(&bgrPlanes[2], 1, 0, cv::Mat(), vHist, 1, &svMax, &svRange);
int totalEntries = image.cols * image.rows;
int fiveCutoff = (int)(totalEntries * .05);
int ninetyFiveCutoff = (int)(totalEntries * .95);
float hTotal = 0, sTotal = 0, vTotal = 0;
bool hMinFound = false, hMaxFound = false, sMinFound = false, sMaxFound = false,
vMinFound = false, vMaxFound = false;
cv::Scalar hThresholds;
cv::Scalar sThresholds;
cv::Scalar vThresholds;
for(int i = 0; i < vHist.rows; ++i)
{
if(i < hHist.rows)
{
hTotal += hHist.at<float>(i, 0);
if(hTotal >= fiveCutoff && !hMinFound)
{
hThresholds.val[0] = i;
hMinFound = true;
}
else if(hTotal>= ninetyFiveCutoff && !hMaxFound)
{
hThresholds.val[1] = i;
hMaxFound = true;
}
}
sTotal += sHist.at<float>(i, 0);
vTotal += vHist.at<float>(i, 0);
if(sTotal >= fiveCutoff && !sMinFound)
{
sThresholds.val[0] = i;
sMinFound = true;
}
else if(sTotal >= ninetyFiveCutoff && !sMaxFound)
{
sThresholds.val[1] = i;
sMaxFound = true;
}
if(vTotal >= fiveCutoff && !vMinFound)
{
vThresholds.val[0] = i;
vMinFound = true;
}
else if(vTotal >= ninetyFiveCutoff && !vMaxFound)
{
vThresholds.val[1] = i;
vMaxFound = true;
}
if(vMaxFound && sMaxFound && hMaxFound)
{
break;
}
}
std::vector<cv::Scalar> returnVect;
returnVect.push_back(hThresholds);
returnVect.push_back(sThresholds);
returnVect.push_back(vThresholds);
return returnVect;
}
What I am trying to do is sum up the number of entries in each bucket until I get to a number that is greater than or equal to five percent and ninety-five percent of the total. Unfortunately the numbers I get are never close to the ones I get if I do the thresholding by hand.
Mat img = ... // from camera or some other source
// STEP 1: learning phase
Mat hsv, imgThreshed, processed, denoised;
cv::GaussianBlur(img, denoised, cv::Size(5,5), 2, 2); // remove noise
cv::cvtColor(denoised, hsv, CV_BGR2HSV);
// lets say we picked manually a region of 100x100 px with the interested color/object using mouse
cv::Mat roi = hsv (cv::Range(mousex-50, mousey+50), cv::Range(mousex-50, mousey+50));
// must split all channels to get Hue only
std::vector<cv::Mat> hsvPlanes;
cv::split(roi, hsvPlanes);
// compute statistics for Hue value
cv::Scalar mean, stddev;
cv::meanStdDev(hsvPlanes[0], mean, stddev);
// ensure we get 95% of all valid Hue samples (statistics 3*sigma rule)
float minHue = mean[0] - stddev[0]*3;
float maxHue = mean[0] + stddev[0]*3;
// STEP 2: detection phase
cv::inRange(hsvPlanes[0], cv::Scalar(minHue), cv::Scalar(maxHue), imgThreshed);
imshow("thresholded", imgThreshed);
cv_erode(imgThreshed, processed, 5); // minimizes noise
cv_dilate(processed, processed, 20); // maximize left regions
imshow("final", processed);
//STEP 3: do some blob/contour detection on processed image & find maximum blob/region, etc ...
A much simpler solution - just calculate mean & std. deviation for a region of interest, i.e. containing the Hue value.
Since Hue is the most stable component in the image, the other components saturation & value should be discarded as they vary too much. However you can still compute mean for them if needed.