Understanding the HOG feature layout - c++

I am doing a project which involve multi-class object detection. My aim is to detect the following objects.
1. Truck
2. Car
3. Person
As I have three different objects, it means I will have three different window sizes. But HOG features for blocks will remain the same. I hacked into OpenCV hog.cpp and made two new functions to calculate the HOG descriptors for blocks only. Here is my code.
void cv::gpu::HOGDescriptor::getDescriptorsBlock(const GpuMat& img, Size win_stride, GpuMat& descriptors, FileStorage fs3, string fileName, double scale, int width, int height, size_t lev)
{
CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
size_t block_hist_size = getBlockHistogramSize();
computeBlockHistograms(img);
Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride);
// Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
// Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);
// copy block_hists from GPU to CPU/
float dest_ptr[block_hist_size * blocks_per_img.area()];
cudaMemcpy( &dest_ptr[0], block_hists.ptr<float>(), block_hist_size *blocks_per_img.area()*sizeof(CV_32F), cudaMemcpyDeviceToHost);
std::cout<<"( "<<width<< " ," << height<< ")"<< std::endl;
std::cout <<lev<< std::endl;
// write to yml file
int level = lev;
fs3<<"Scale"<<scale;
fs3 <<"Level"<<level;
fs3<<"Width"<<width<<"Height"<<height;
fs3 << "features" << "[";
for (unsigned int i = 0; i < (block_hist_size * blocks_per_img.area()) ; i++ )
{
fs3 << dest_ptr[i];
}
fs3 << "]";
}
Similarly to get block descriptors for multi-scale
void cv::gpu::HOGDescriptor::getDescriptorsMultiScale(const GpuMat& img,
Size win_stride, double scale0, unsigned int count)
{
CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
vector<double> level_scale;
double scale = 1.;
int levels = 0;
for (levels = 0; levels < nlevels; levels++)
{
level_scale.push_back(scale);
if (cvRound(img.cols/scale) < win_size.width ||
cvRound(img.rows/scale) < win_size.height || scale0 <= 1)
break;
scale *= scale0;
}
levels = std::max(levels, 1);
level_scale.resize(levels);
image_scales.resize(levels);
// open yml file with image ID
FileStorage fs3;
char fileName[20];
GpuMat descriptors;
sprintf (fileName, "%04d", count);
fs3.open(fileName, FileStorage::WRITE);
for (size_t i = 0; i < level_scale.size(); i++)
{
scale = level_scale[i];
Size sz(cvRound(img.cols / scale), cvRound(img.rows / scale));
GpuMat smaller_img;
if (sz == img.size())
smaller_img = img;
else
{
image_scales[i].create(sz, img.type());
switch (img.type())
{
case CV_8UC1: hog::resize_8UC1(img, image_scales[i]); break;
case CV_8UC4: hog::resize_8UC4(img, image_scales[i]); break;
}
smaller_img = image_scales[i];
}
std::cout<<"scale "<<level_scale[i]<<std::endl;
// calculate descriptors for blocks
getDescriptorsBlock( smaller_img, win_stride, descriptors, fs3, fileName, scale, smaller_img.cols, smaller_img.rows, i);
// detect(smaller_img, locations, hit_threshold, win_stride, padding);
}
// close yml file
fs3.release();
}
My question would be to understand the layout structure of HOG descriptors for block only. Can some one share his thoughts

Generally, using an image pyramid is often applied to become scale invariant. If you want to get more sophisticated, have a look at this paper "Object Detection with Discriminatively Trained
Part Based Models" [1]. They were really successful in using HoG on different scales. Of course the original HoG paper might be of use to understand the structure of the feature itself [2], if that is more what you are after.
[1] http://vision.ics.uci.edu/papers/FelzenszwalbGMR_PAMI_2009/FelzenszwalbGMR_PAMI_2009.pdf
[2] http://lear.inrialpes.fr/people/triggs/pubs/Dalal-cvpr05.pdf

Related

Compute the similarity rate between two Images with opencv/c++

I'm using OpenCV/C++ to compute the similarity rate between two images. I want to tell the user how much % image A looks like image B.
Let's take a look at the code below :
double getSimilarityRate(const cv::Mat A, const cv::Mat B){
double cpt = 0.0;
cv::Mat imgGray1, imgGray2;
cv::cvtColor(A, imgGray1, CV_BGR2GRAY);
cv::cvtColor(B, imgGray2, CV_BGR2GRAY);
imgGray1 = imgGray1 > 128;
imgGray2 = imgGray2 > 128;
double total = imgGray1.cols * imgGray1.rows;
if(imgGray1.rows > 0 && imgGray1.rows == B.rows && imgGray1.cols > 0 && imgGray1.cols == B.cols){
for(int rows = 0; rows < imgGray1.rows; rows++){
for(int cols = 0; cols < imgGray1.cols; cols++){
if(imgGray1.at<int>(rows, cols) == imgGray2.at<int>(rows,cols)) cpt ++;
}
}
}else{
std::cout << "No similartity between the two images ... [EXIT]" << std::endl;
exit(0);
}
double rate = cpt / total;
return rate * 100.0;
}
int main(void)
{
/* ------------------------------------------ # ALGO GETSIMILARITY BETWEEN 2 IMAGES # -------------------------------------- */
double rate;
string fileNameImage1("C:\\Users\\hugoo\\Documents\\Prog\\NexterMU\\Qt\\OpenCV\\DetectionShapeProgram\\mire.jpg");
cv::Mat image1 = imread(fileNameImage1);
string fileNameImage2("C:\\Users\\hugoo\\Documents\\Prog\\NexterMU\\Qt\\OpenCV\\DetectionShapeProgram\\mire.jpg");
cv::Mat image2 = imread(fileNameImage2);
if(image1.empty() || image2.empty()){
std::cout << "Images couldn't be loaded" << std::endl;
exit(-1);
}
rate = getSimilarityRate(image1, image2) ;
First I convert the matrices from BGR to GRAY. So I have only one channel remaining. (Much more easier to compare).
cv::Mat imgGray1, imgGray2;
cv::cvtColor(A, imgGray1, CV_BGR2GRAY);
cv::cvtColor(B, imgGray2, CV_BGR2GRAY);
Then I make them binary (255 or 0 --> pixel's White or Black) :
imgGray1 = imgGray1 > 128;
imgGray2 = imgGray2 > 128;
In my for loops I pass through each pixel and compare him to other one in the second image.
If it matches I increase a variable (cpt ++).
I compute the rate and turn it to a %, with :
double rate = cpt / total;
return rate * 100.0;
The thing is it doesn't seem to compute correctly, because it doesn't return me the rate value in the console...
I think the problem comes from the at() function maybe I don't use it properly.
I suspect imgGray1.at<int>(rows, cols) should be imgGray1.at<uchar>(rows, cols) instead.
Currently .at() function call has int as a template argument, but typically cv::Mat consist of uchar elements. Are you pretty sure that your image has int elements? If it does consist of uchar elements, then using int template argument will result in accessing memory beyond what corresponds to the image (basically all pointer offsets would now be 4x as large as they should be).
More generally, if you use cv::Mat::at(), you need to use different template arguments depending on the output of cv::Mat::type():
8-bit 3-channel image (CV_8UC3) --> .at<cv::Vec3b>(row, column)
8-bit 1-channel image (CV_8UC1) --> .at<uchar>(row, column)
32-bit 3-channel image (CV_32FC3) --> .at<cv::Vec3f>(row, column)
32-bit 1-channel image (CV_32FC1) --> .at<float>(row, column)
For this reason, if a function should support arbitrary cv::Mat's, one either needs to write a bunch of if-else clauses, or to avoid .at() altogether. In your situation, since imgGray1 and imgGray2 are "binarized", I wonder if rate can be calculated using cv::norm, possibly like so:
// NORM_INF counts the number of non-equal elements.
int num_non_equal = cv::norm(imgGray1, imgGray2, NORM_INF);
double rate = 1.0 - num_non_equal / static_cast<double>(total);

fftw + opencv inconsistent output

I recently tried to implement an FFT function for Opencv's Mat.
I inspired my implementation mainly from FFTW's code samples and from :
FFTW-OpenCV
I payed close attention to adapt the size of the input image in order to fasten the processing.
It seems that I did something wrong because the output is always a black image.
Here is my implementation:
void fft2_32f(const cv::Mat1f& _src, cv::Mat2f& dst)
{
cv::Mat2f src;
const int rows = cv::getOptimalDFTSize(_src.rows);
const int cols = cv::getOptimalDFTSize(_src.cols);
// const int total = cv::alignSize(rows*cols,steps);
if(_src.isContinuous() && _src.rows == rows && _src.cols == cols)
{
src = cv::Mat2f::zeros(src.size());
dst = cv::Mat2f::zeros(src.size());
// 1) copy the source into a complex matrix (the imaginary component is set to 0).
cblas_scopy(src.total(), _src.ptr<float>(), 1, src.ptr<float>(), 2);
// 2) prepare and apply the transform.
fftwf_complex* ptr_in = reinterpret_cast<fftwf_complex*>(src.ptr<float>());
fftwf_complex* ptr_out = reinterpret_cast<fftwf_complex*>(dst.ptr<float>());
// fftwf_plan fft = fftwf_plan_dft_1d(src.total(), ptr_in, ptr_out, FFTW_FORWARD, FFTW_ESTIMATE);
fftwf_plan fft = fftwf_plan_dft_2d(src.rows, src.cols, ptr_in, ptr_out, FFTW_FORWARD, FFTW_ESTIMATE);
fftwf_execute(fft);
fftwf_destroy_plan(fft);
// 3) normalize
cblas_saxpy(dst.rows * dst.step1(), 1.f/dst.total(), dst.ptr<float>(), 1, dst.ptr<float>(), 1);
}
else
{
src = cv::Mat2f::zeros(rows, cols);
dst = cv::Mat2f::zeros(rows, cols);
// 1) copy the source into a complex matrix (the imaginary component is set to 0).
support::parallel_for(cv::Range(0, _src.rows), [&src, &_src](const cv::Range& range)->void
{
for(int r=range.start; r<range.end; r++)
{
int c=0;
const float* it_src = _src[r];
float* it_dst = src.ptr<float>(r);
#if CV_ENABLE_UNROLLED
for(;c<=_src.cols-4; c+=4, it_src+=4, it_dst+=8)
{
*it_dst = *it_src;
*(it_dst+2) = *(it_src+1);
*(it_dst+4) = *(it_src+2);
*(it_dst+6) = *(it_src+3);
}
#endif
for(; c<_src.cols; c++, it_src++, it_dst+=2)
*it_dst = *it_src;
}
}, 0x80);
// 2) prepare and apply the transform.
fftwf_complex* ptr_in = reinterpret_cast<fftwf_complex*>(src.ptr<float>());
fftwf_complex* ptr_out = reinterpret_cast<fftwf_complex*>(dst.ptr<float>());
fftwf_plan fft = fftwf_plan_dft_2d(src.rows, src.cols, ptr_in, ptr_out, FFTW_FORWARD, FFTW_ESTIMATE);
fftwf_execute(fft);
fftwf_destroy_plan(fft);
double min(0.);
double max(0.);
// 3) normalize
cblas_saxpy(dst.rows * dst.step1(), 1.f/dst.total(), dst.ptr<float>(), 1, dst.ptr<float>(), 1);
}
}
Note:
The parallel_for implementation is inspired by: How to use lambda as a parameter to parallel_for_
Thanks in advance for any help.
I figure out my issue.
This function written as is does work perfectly (at least for the purpose I made it for).
My issue was that :
cv::Mat dst = cv::Mat::zeros(src.size(), CV_32FC2);
cv::Mat1f srcw = src;
cv::Mat1f dstw = dst;
fft2_32f(srcw, dstw); // realocate dstw to the optimal size for receive the output depending on the size of srcw. ... so the dstw is reallocate but not dst.
dst.copyTo(_outputVariable);
In that case the correct information is store in dstw but not in dst because of the reallocation inside the function.
So when I try to visualize my data I had a black image because of that.
The proper call use to be:
cv::Mat dst;
cv::Mat1f srcw = src;
cv::Mat1f dstw;
fft2_32f(srcw, dstw); // realocate dstw to the optimal size for receive the output depending on the size of srcw. ... so the dstw is reallocate but not dst.
dst = dstw;
dst.copyTo(_outputVariable); // or dstw.copyTo(_outputVariable);
With that code I got the proper output.
Note depending on the application a roi (take a look to the operator()(const cv::Rect&) of OpenCV's Mat container) corresponding to the size of the input may be usefull in order to preserve the dimensions.
Thank you for your help :).
Can someone help me to mark this topic as close ? please.

OPENCV : CUDA context initialization for different methods

I'm working on a simple c++ program to evaluate the performance of some Opencv GPU methods (cv::cuda).
I am using Opencv 3.1 on Ubuntu 15 (with CUDA 7.5) with a GeForce 770.
I previously read that we need to initialize CUDA environment to avoid slow process at first call. So, I initialize my program with a cv::cuda::getDevice() and setDevice().
Then, I test 2 methods:
cv::cuda::resize() (factor 0.5)
and cv::cuda::meanStdDev.
Initialization takes 400ms. Then, resizing takes 2 or 3 ms, that's OK.
But... meanStdDev takes 476ms!
If I run 2 successive meanStdDev, the second one is much faster (3ms).
I really don't understand why the initialization has an effect on resize() but not on meanStdDev().
I compile OPENCV with -DCUDA_ARCH_BIN=3.0. I try with -DCUDA_ARCH_PTX="" but the problem is still the same.
#include <opencv2/opencv.hpp>
#include <opencv2/cudaimgproc.hpp>
#include "opencv2/cudawarping.hpp"
#include "opencv2/cudaarithm.hpp"
using namespace std;
int main(int argc, char *argv[])
{
double t_init_cuda = (double)cv::getTickCount();
int CudaDevice;
if(cv::cuda::getCudaEnabledDeviceCount()==0)
{
cerr<<endl<<"ERROR: NO CudaEnabledDevice"<<endl;
exit(2);
}
else
{
CudaDevice = cv::cuda::getDevice();
cv::cuda::setDevice(CudaDevice);
}
t_init_cuda = ((double)cv::getTickCount() - t_init_cuda)/cv::getTickFrequency() * 1000;
cout<<endl<<"\t*T_INIT_CUDA="<<t_init_cuda<<"ms\n";;
cv::Mat src = cv::imread(argv[1], 0);
if (!src.data) exit(1);
cv::cuda::GpuMat d_src(src);
//CV::CUDA::RESIZE
cv::cuda::GpuMat d_dst;
double factor = 0.5;
double t_gpu_resize = cv::getTickCount();
cv::cuda::resize(d_src, d_dst, cv::Size( (int) ((float) (d_src.cols)*factor) , (int) ((float) (d_src.rows)*factor)), 0, 0, CV_INTER_AREA);
t_gpu_resize = ((double)cv::getTickCount() - t_gpu_resize)/cv::getTickFrequency() * 1000;
cout<<endl<<"D_SRC="<<d_src.rows<<"x"<<d_src.cols<<" => D_DST="<<d_dst.rows<<"x"<<d_dst.cols<<endl;
cout<<endl<<"\t*T_GPU_RESIZE="<<t_gpu_resize<<"ms\n";;
//CV::CUDA::MEANSTDDEV
double t_meanstddev = (double)cv::getTickCount();
cv::Scalar mean, stddev;
std::vector<cv::cuda::GpuMat> d_src_split;
cv::cuda::split(d_src, d_src_split);
cv::cuda::meanStdDev (d_src_split[0], mean, stddev);
t_meanstddev = ((double)cv::getTickCount() - t_meanstddev)/cv::getTickFrequency() * 1000.0;
cout<<endl<<"mean="<<mean.val[0]<<" | stddev="<<stddev.val[0]<<endl;
cout<<endl<<"\t*T_GPU_MEANSTDDEV="<<t_meanstddev<<"ms\n";
return 0;
}
My friend, When you call same function twice :
1- First time you allocate new memory at Device for resized. "According to WIKI of OpenCV"
2- Second time you reuse allocated memory so it will be fast.
I get that function from OpenCV for you so you can understand why it said that.
void cv::cuda::meanStdDev(InputArray _src, OutputArray _dst, Stream& stream)
{
if (!deviceSupports(FEATURE_SET_COMPUTE_13))
CV_Error(cv::Error::StsNotImplemented, "Not sufficient compute capebility");
const GpuMat src = getInputMat(_src, stream);
CV_Assert( src.type() == CV_8UC1 );
GpuMat dst = getOutputMat(_dst, 1, 2, CV_64FC1, stream);
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
int bufSize;
#if (CUDA_VERSION <= 4020)
nppSafeCall( nppiMeanStdDev8uC1RGetBufferHostSize(sz, &bufSize) );
#else
nppSafeCall( nppiMeanStdDevGetBufferHostSize_8u_C1R(sz, &bufSize) );
#endif
BufferPool pool(stream);
GpuMat buf = pool.getBuffer(1, bufSize, CV_8UC1); // <--- this line create new GpuMat
NppStreamHandler h(StreamAccessor::getStream(stream));
nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, buf.ptr<Npp8u>(), dst.ptr<Npp64f>(), dst.ptr<Npp64f>() + 1) );
syncOutput(dst, _dst, stream);
}
this function
GpuMat cv::cuda::BufferPool::getBuffer(int rows, int cols, int type)
{
GpuMat buf(allocator_);
buf.create(rows, cols, type);
return buf;
}
I hope this will help you.

Unknown pooling method when testing caffe with cuda but not cudnn

I built the caffe deep learning library in windows as shown in this link:
https://initialneil.wordpress.com/2015/07/15/caffe-vs2013-opencv-in-windows-tutorial-i/
I deactivated the cuDNN because my nvidia card didnot support this and changed the targert architecture to fermi architecture.
I built caffe as static library to use it in the test project shown below:
int main(int argc, char** argv)
{
// get a testing image and display
Mat img = imread(CAFFE_ROOT + "/examples/images/mnist_5.png");
cvtColor(img, img, CV_BGR2GRAY);
imshow("img", img);
waitKey(1);
// Set up Caffe
Caffe::set_mode(Caffe::GPU);
int device_id = 0;
Caffe::SetDevice(device_id);
LOG(INFO) << "Using GPU";
// Load net
Net<float> net(CAFFE_ROOT + "/examples/mnist/lenet_test-memory-1.prototxt");
string model_file = CAFFE_ROOT + "/examples/mnist/lenet_iter_10000.caffemodel";
net.CopyTrainedLayersFrom(model_file);
// set the patch for testing
vector<Mat> patches;
patches.push_back(img);
// push vector<Mat> to data layer
float loss = 0.0;
boost::shared_ptr<MemoryDataLayer<float> > memory_data_layer;
memory_data_layer = boost::static_pointer_cast<MemoryDataLayer<float>>(net.layer_by_name("data"));
vector<int> labels(patches.size());
memory_data_layer->AddMatVector(patches, labels);
// Net forward
//ERROR IN THE LINE BELOW
const vector<Blob<float>*> & results = net.ForwardPrefilled(&loss);// HERE THE ERROR
float *output = results[1]->mutable_cpu_data();
// Display the output
for (int i = 0; i < 10; i++) {
printf("Probability to be Number %d is %.3f\n", i, output[i]);
}
waitKey(0);
}
But I get an error when accessing the file: pooling_layer.cu in the function described below:
template <typename Dtype>
void PoolingLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
const Dtype* bottom_data = bottom[0]->gpu_data();
Dtype* top_data = (*top)[0]->mutable_gpu_data();
int count = (*top)[0]->count();
// We'll output the mask to top[1] if it's of size >1.
const bool use_top_mask = top->size() > 1;
int* mask = NULL;
Dtype* top_mask = NULL;
switch (this->layer_param_.pooling_param().pool()) {
case PoolingParameter_PoolMethod_MAX:
if (use_top_mask) {
top_mask = (*top)[1]->mutable_gpu_data();
} else {
mask = max_idx_.mutable_gpu_data();
}
// NOLINT_NEXT_LINE(whitespace/operators)
MaxPoolForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>> (
count, bottom_data, bottom[0]->num(), channels_,
height_, width_, pooled_height_, pooled_width_, kernel_h_,
kernel_w_, stride_h_, stride_w_, pad_h_, pad_w_, top_data,
mask, top_mask);
break;
case PoolingParameter_PoolMethod_AVE:
// NOLINT_NEXT_LINE(whitespace/operators)
AvePoolForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, bottom_data, bottom[0]->num(), channels_,
height_, width_, pooled_height_, pooled_width_, kernel_h_,
kernel_w_, stride_h_, stride_w_, pad_h_, pad_w_, top_data);
break;
case PoolingParameter_PoolMethod_STOCHASTIC:
if (Caffe::phase() == Caffe::TRAIN) {
// We need to create the random index as well.
caffe_gpu_rng_uniform(count, Dtype(0), Dtype(1),
rand_idx_.mutable_gpu_data());
// NOLINT_NEXT_LINE(whitespace/operators)
StoPoolForwardTrain<Dtype><<<CAFFE_GET_BLOCKS(count),
CAFFE_CUDA_NUM_THREADS>>>(
count, bottom_data, bottom[0]->num(), channels_,
height_, width_, pooled_height_, pooled_width_, kernel_h_,
kernel_w_, stride_h_, stride_w_,
rand_idx_.mutable_gpu_data(), top_data);
} else {
// NOLINT_NEXT_LINE(whitespace/operators)
StoPoolForwardTest<Dtype><<<CAFFE_GET_BLOCKS(count),
CAFFE_CUDA_NUM_THREADS>>>(
count, bottom_data, bottom[0]->num(), channels_,
height_, width_, pooled_height_, pooled_width_, kernel_h_,
kernel_w_, stride_h_, stride_w_, top_data);
}
break;
default:
LOG(FATAL) << "Unknown pooling method.";
}
CUDA_POST_KERNEL_CHECK;
}
And get the message "Unknown pooling method." as shown in the window below:
The normal execution of my project is described in the image below:
Could someone give me an idea about the possible solution?
The pooling layer which by default should be max pooling was translated into some other layers. You might add a breakpoint at pooling_layer.cu (line 163) or add cout << this->layer_param_.pooling_param().pool() << endl; before that line to see what pooling layer it was using. I guess it doesn't equal to PoolingParameter_PoolMethod_MAX here.
I'm not sure why it happened, maybe there some error in the prototxt file or the protobuf. A brutal trick would be overlapping line 206 with line 165-176 in order to force using max pooling.

Finding HSV Thresholds Via Histograms with OpenCV

I'm trying to write a method that will find the proper threshold values in HSV space for an object placed at the center of the screen. These values are used for an object tracking algorithm. I've tested that piece of code with hand coded threshold values and it works well. The idea behind the method is that it should calculate the histograms for each of the channels and then return the 5th and 95th percentile for each to be used as the threshold values. (credit: How to find RGB/HSV color parameters for color tracking?) The image being passed is a picture of the object to be tracked (which is set by the user before the whole process begins. Here is the code
std::vector<cv::Scalar> HSV_Threshold_Determiner::Get_Threshold_Values(const cv::Mat& image)
{
cv::Mat inputImage;
cv::cvtColor(image, inputImage, CV_BGR2HSV);
std::vector<cv::Mat> bgrPlanes;
cv::split(inputImage, bgrPlanes);
cv::Mat hHist, sHist, vHist;
int hMax = 180, svMax = 256;
float hRanges[] = { 0, (float)hMax };
const float* hRange = { hRanges };
float svRanges[] = { 0, (float)svMax };
const float* svRange = { svRanges };
//float sRanges[] = { 0, 256 };
cv::calcHist(&bgrPlanes[0], 1, 0, cv::Mat(), hHist, 1, &hMax, &hRange);
cv::calcHist(&bgrPlanes[1], 1, 0, cv::Mat(), sHist, 1, &svMax, &svRange);
cv::calcHist(&bgrPlanes[2], 1, 0, cv::Mat(), vHist, 1, &svMax, &svRange);
int totalEntries = image.cols * image.rows;
int fiveCutoff = (int)(totalEntries * .05);
int ninetyFiveCutoff = (int)(totalEntries * .95);
float hTotal = 0, sTotal = 0, vTotal = 0;
bool hMinFound = false, hMaxFound = false, sMinFound = false, sMaxFound = false,
vMinFound = false, vMaxFound = false;
cv::Scalar hThresholds;
cv::Scalar sThresholds;
cv::Scalar vThresholds;
for(int i = 0; i < vHist.rows; ++i)
{
if(i < hHist.rows)
{
hTotal += hHist.at<float>(i, 0);
if(hTotal >= fiveCutoff && !hMinFound)
{
hThresholds.val[0] = i;
hMinFound = true;
}
else if(hTotal>= ninetyFiveCutoff && !hMaxFound)
{
hThresholds.val[1] = i;
hMaxFound = true;
}
}
sTotal += sHist.at<float>(i, 0);
vTotal += vHist.at<float>(i, 0);
if(sTotal >= fiveCutoff && !sMinFound)
{
sThresholds.val[0] = i;
sMinFound = true;
}
else if(sTotal >= ninetyFiveCutoff && !sMaxFound)
{
sThresholds.val[1] = i;
sMaxFound = true;
}
if(vTotal >= fiveCutoff && !vMinFound)
{
vThresholds.val[0] = i;
vMinFound = true;
}
else if(vTotal >= ninetyFiveCutoff && !vMaxFound)
{
vThresholds.val[1] = i;
vMaxFound = true;
}
if(vMaxFound && sMaxFound && hMaxFound)
{
break;
}
}
std::vector<cv::Scalar> returnVect;
returnVect.push_back(hThresholds);
returnVect.push_back(sThresholds);
returnVect.push_back(vThresholds);
return returnVect;
}
What I am trying to do is sum up the number of entries in each bucket until I get to a number that is greater than or equal to five percent and ninety-five percent of the total. Unfortunately the numbers I get are never close to the ones I get if I do the thresholding by hand.
Mat img = ... // from camera or some other source
// STEP 1: learning phase
Mat hsv, imgThreshed, processed, denoised;
cv::GaussianBlur(img, denoised, cv::Size(5,5), 2, 2); // remove noise
cv::cvtColor(denoised, hsv, CV_BGR2HSV);
// lets say we picked manually a region of 100x100 px with the interested color/object using mouse
cv::Mat roi = hsv (cv::Range(mousex-50, mousey+50), cv::Range(mousex-50, mousey+50));
// must split all channels to get Hue only
std::vector<cv::Mat> hsvPlanes;
cv::split(roi, hsvPlanes);
// compute statistics for Hue value
cv::Scalar mean, stddev;
cv::meanStdDev(hsvPlanes[0], mean, stddev);
// ensure we get 95% of all valid Hue samples (statistics 3*sigma rule)
float minHue = mean[0] - stddev[0]*3;
float maxHue = mean[0] + stddev[0]*3;
// STEP 2: detection phase
cv::inRange(hsvPlanes[0], cv::Scalar(minHue), cv::Scalar(maxHue), imgThreshed);
imshow("thresholded", imgThreshed);
cv_erode(imgThreshed, processed, 5); // minimizes noise
cv_dilate(processed, processed, 20); // maximize left regions
imshow("final", processed);
//STEP 3: do some blob/contour detection on processed image & find maximum blob/region, etc ...
A much simpler solution - just calculate mean & std. deviation for a region of interest, i.e. containing the Hue value.
Since Hue is the most stable component in the image, the other components saturation & value should be discarded as they vary too much. However you can still compute mean for them if needed.