Related
I wrote a program that uses the openCV and boost::filesystem libraries, and the program crops images to fit the object in the image. (Photoshop has already been used to replace most of the backgrounds with white). However, I have thousands and thousands of pictures that I need to sort through. I already know how to use the filesystem library and have no issue traversing the system's directories. However, how do I detect images that have a non-white background (missed in the photoshop process)? This incorrect crop is formatted to have a margin and have a 1:1 aspect ratio, but it still has the odd grayish background. The image should end up looking like this correct crop. So, how do I determine if the image has a background like the incorrect crop?
could you try the code below
( to test the code you should create a directory c:/cropping and some subdirs on it. and put some images in the dirs you created.)
hope it will be helpful
#include "opencv2/imgproc.hpp"
#include "opencv2/highgui.hpp"
#include <iostream>
using namespace cv;
using namespace std;
vector<Rect> divideHW(Mat src, int dim, double threshold1, double threshold2)
{
Mat gray, reduced, canny;
if (src.channels() == 1)
{
gray = src;
}
if (src.channels() == 3)
{
Laplacian(src, gray, CV_8UC1);
cvtColor(gray, gray, COLOR_BGR2GRAY);
imshow("sobel", gray);
}
reduce(gray, reduced, dim, REDUCE_AVG);
Canny(reduced, canny, threshold1, threshold2);
vector<Point> pts;
findNonZero(canny, pts);
vector<Rect> rects;
Rect rect(0, 0, gray.cols, gray.rows);
if (!pts.size())
{
rects.push_back(rect);
}
int ref_x = 0;
int ref_y = 0;
for (size_t i = 0; i< pts.size(); i++)
{
if (dim)
{
rect.height = pts[i].y - ref_y;
rects.push_back(rect);
rect.y = pts[i].y;
ref_y = rect.y;
if (i == pts.size() - 1)
{
rect.height = gray.rows - pts[i].y;
rects.push_back(rect);
}
}
else
{
rect.width = pts[i].x - ref_x;
rects.push_back(rect);
rect.x = pts[i].x;
ref_x = rect.x;
if (i == pts.size() - 1)
{
rect.width = gray.cols - pts[i].x;
rects.push_back(rect);
}
}
}
return rects;
}
int main( int argc, char** argv )
{
int wait_time = 0; // set this value > 0 for not waiting
vector<String> filenames;
String folder = "c:/cropping/*.*"; // you can change this value or set it by argv[1]
glob(folder, filenames, true);
for (size_t i = 0; i < filenames.size(); ++i)
{
Mat src = imread(filenames[i]);
if (src.data)
{
vector<Rect> rects = divideHW(src, 0, 0, 0);
if (rects.size() < 3) continue;
Rect border;
border.x = rects[0].width;
border.width = src.cols - rects[rects.size() - 1].width - border.x;
rects = divideHW(src, 1, 0, 20);
if (rects.size() < 3) continue;
border.y = rects[0].height;
border.height = src.rows - rects[rects.size() - 1].height - border.y;
Mat cropped = src(border).clone();
src(border).setTo(Scalar(255, 255, 255));
Scalar _mean = mean(src);
int mean_total = _mean[0] + _mean[1] + _mean[2];
if (mean_total > 763)
{
imwrite(filenames[i] + ".jpg", cropped);
imshow("cropped", cropped);
waitKey(wait_time);
}
}
}
return 0;
}
I that you can compute the gradient of an ROI of your image (all rows in column 10 to 15 for exemple).
Then you compute the energy of your gradient (sum of all pixels of the gradient image).
If the energy is very low, you have an uniform background (you can't know the background color with this algorithm). Else you have a textured backgroud.
This is a first approach. You can found in OpenCV all the functions required to do that.
A second approach :
If you are sure that your background is white, you can get the ROI of the first approach, then iterate over all pixels, and check for its color. If there are more than "n" pixels with a different color than "255,255,255", you can mark your image as "non white Background".
Folks,
I have a realsense SR300, but when I display my depth image in a opencv window, it looks too dark. How can I fix this? When I run the realsense examples the images look good, but the examples use OpenGL. But I need OpenCV for my projects. Here is my code:
int main(int argc, char ** argv)
{
// realsense camera setup
rs::log_to_console(rs::log_severity::warn);
// Create a context object. This object owns the handles to all connected realsense devices
rs::context ctx;
if (ctx.get_device_count() == 0)
{
throw std::runtime_error("No device detected. Is it plugged in?");
}
// Access the first available RealSense device
rs::device * dev = ctx.get_device(0);
// Configure depth to run at VGA resolution at 30 frames per second
dev->enable_stream(rs::stream::depth, 640, 480, rs::format::z16, 30);
rs::intrinsics depth_intrin;
rs::format depth_format;
depth_intrin = dev->get_stream_intrinsics(rs::stream::depth);
depth_format = dev->get_stream_format(rs::stream::depth);
cv::namedWindow("Send Display Image", CV_WINDOW_AUTOSIZE);
/* Set callbacks prior to calling start(). */
auto depth_callback = [depth_intrin, depth_format](rs::frame f)
{
cv::Mat image(cv::Size(640, 480), CV_16UC1,
(void*)f.get_data(), cv::Mat::AUTO_STEP);
cv::imshow("Send Display Image", image);
cv::waitKey(1000/80);
};
/* callback to grab depth fream and publish it. */
dev->set_frame_callback(rs::stream::depth, depth_callback);
// Start streaming
dev->start();
While(1)
{
}
return 0;
}
I am not sure why my image is so dark. I want it to look something like the kinect or the Xtion when I run openni_launch from ROS
Edit:
The normalized function below produces some flickering:
I suspect that is due to the maximal depth value flickering.
The minimal depth value is always 0 as this value is used when the depth is invalid and thus the depth range becomes false.
Instead you should use this:
void make_depth_histogram(const Mat &depth, Mat &normalized_depth) {
normalized_depth = Mat(depth.size(), CV_8U);
int width = depth.cols, height = depth.rows;
static uint32_t histogram[0x10000];
memset(histogram, 0, sizeof(histogram));
for(int i = 0; i < height; ++i) {
for (int j = 0; j < width; ++j) {
++histogram[depth.at<ushort>(i,j)];
}
}
for(int i = 2; i < 0x10000; ++i) histogram[i] += histogram[i-1]; // Build a cumulative histogram for the indices in [1,0xFFFF]
for(int i = 0; i < height; ++i) {
for (int j = 0; j < width; ++j) {
if (uint16_t d = depth.at<ushort>(i,j)) {
int f = histogram[d] * 255 / histogram[0xFFFF]; // 0-255 based on histogram location
normalized_depth.at<uchar>(i,j) = static_cast<uchar>(f);
} else {
normalized_depth.at<uchar>(i,j) = 0;
}
}
}
}
What you observe is because the depth stream is coded on 16 bits (rs::stream::z16) whereas when displayed only 8 bits will be used.
You can normalized your depth map:
double min, max;
minMaxLoc(depth, &min, &max);
Mat depth_normalized;
double alpha = 255.0/(max-min);
depth.convertTo(depth_normalized, CV_8U, alpha, -min*alpha);
Or use a kind of colormap to display the depth: make_depth_histogram().
Full demo code:
inline void make_depth_histogram(const Mat &depth, Mat &color_depth) {
color_depth = Mat(depth.size(), CV_8UC3);
int width = depth.cols, height = depth.rows;
static uint32_t histogram[0x10000];
memset(histogram, 0, sizeof(histogram));
for(int i = 0; i < height; ++i) {
for (int j = 0; j < width; ++j) {
++histogram[depth.at<ushort>(i,j)];
}
}
for(int i = 2; i < 0x10000; ++i) histogram[i] += histogram[i-1]; // Build a cumulative histogram for the indices in [1,0xFFFF]
for(int i = 0; i < height; ++i) {
for (int j = 0; j < width; ++j) {
if (uint16_t d = depth.at<ushort>(i,j)) {
int f = histogram[d] * 255 / histogram[0xFFFF]; // 0-255 based on histogram location
color_depth.at<Vec3b>(i,j) = Vec3b(f, 0, 255 - f);
} else {
color_depth.at<Vec3b>(i,j) = Vec3b(0, 5, 20);
}
}
}
}
int main(int argc, char *argv[]) {
// Create a context object. This object owns the handles to all connected realsense devices
rs::context ctx;
// Access the first available RealSense device
rs::device * dev = ctx.get_device(0);
// Configure Infrared stream to run at VGA resolution at 30 frames per second
dev->enable_stream(rs::stream::depth, 640, 480, rs::format::z16, 30);
// Start streaming
dev->start();
// Camera warmup - Dropped several first frames to let auto-exposure stabilize
for(int i = 0; i < 30; i++)
dev->wait_for_frames();
// Creating OpenCV Matrix from a color image
Mat depth(Size(640, 480), CV_16U, (void*)dev->get_frame_data(rs::stream::depth), Mat::AUTO_STEP);
// Create a color depth
Mat color_depth;
make_depth_histogram(depth, color_depth);
// Create a normalized depth
double min, max;
minMaxLoc(depth, &min, &max);
Mat depth_normalized;
double alpha = 255.0/(max-min);
depth.convertTo(depth_normalized, CV_8U, alpha, -min*alpha);
// Display in a GUI
imshow("Display normalized depth", depth_normalized);
imshow("Display color depth", color_depth);
waitKey(0);
return 0;
}
The only solution I found to this problem which gives satisfactory results is the following:
Save the image as a PNG file. (PNG supports saving 16-bit images)
Use matplotlib to view it in a colored map:
#!/usr/bin/python3
import numpy as np
import cv2
import sys
from matplotlib import pyplot as plt
def printCoordinates(event):
x,y = event.xdata,event.ydata
if x != None:
print("X : ",x," Y: ",y," Value = ",img[np.int(y),np.int(x)])
img = cv2.imread(sys.argv[1],cv2.CV_16UC1)
#img = img/65535
fig = plt.figure()
plt.imshow(img,cmap='nipy_spectral')
cid = fig.canvas.mpl_connect('button_press_event',printCoordinates)
plt.colorbar()
plt.show()
The button_press_event is to print the exact pixel value on the pixel clicked.
RGB Image:
Corresponding Depth Image:
I am trying to calculate the skew of text in an image so I can correct it for the best OCR results.
Currently this is the function I am using:
double compute_skew(Mat &img)
{
// Binarize
cv::threshold(img, img, 225, 255, cv::THRESH_BINARY);
// Invert colors
cv::bitwise_not(img, img);
cv::Mat element = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(5, 3));
cv::erode(img, img, element);
std::vector<cv::Point> points;
cv::Mat_<uchar>::iterator it = img.begin<uchar>();
cv::Mat_<uchar>::iterator end = img.end<uchar>();
for (; it != end; ++it)
if (*it)
points.push_back(it.pos());
cv::RotatedRect box = cv::minAreaRect(cv::Mat(points));
double angle = box.angle;
if (angle < -45.)
angle += 90.;
cv::Point2f vertices[4];
box.points(vertices);
for(int i = 0; i < 4; ++i)
cv::line(img, vertices[i], vertices[(i + 1) % 4], cv::Scalar(255, 0, 0), 1, CV_AA);
return angle;
}
When I look at then angle in debug I get 0.000000
However when I give it this image I get proper results of a skew of about 16 degrees:
How can I properly detect the skew in the first image?
there are a few other ways to get the skew degree, 1) by hough transform 2) by horizontal projection profile. rotate the image in different angle bins and calculate horizontal projection. the angle with the greatest horizontal histogram value is the deskewed angle.
i have provided below implementation of 1). i believe this to be superior to the boxing method you are using because it requires that you completely clean the image of any noise,which just isnt possible in most of the time.
you should know that the method doesnt work well if there's too much noise. you can reduce noise in different ways depending on what type of "line" you want to treat as the most dominant in the image. i have provided two methods for this. be sure to play with parameters and threshold etc.
results (all run using preprocess2, all run using same parameter set)
code
#include <opencv2/opencv.hpp>
using namespace cv;
using namespace std;
void hough_transform(Mat& im,Mat& orig,double* skew)
{
double max_r=sqrt(pow(.5*im.cols,2)+pow(.5*im.rows,2));
int angleBins = 180;
Mat acc = Mat::zeros(Size(2*max_r,angleBins),CV_32SC1);
int cenx = im.cols/2;
int ceny = im.rows/2;
for(int x=1;x<im.cols-1;x++)
{
for(int y=1;y<im.rows-1;y++)
{
if(im.at<uchar>(y,x)==255)
{
for(int t=0;t<angleBins;t++)
{
double r =(x-cenx)*cos((double)t/angleBins*CV_PI)+(y-ceny)*sin((double)t /angleBins*CV_PI);
r+=max_r;
acc.at<int>(t,int(r))++;
}
}
}
}
Mat thresh;
normalize(acc,acc,255,0,NORM_MINMAX);
convertScaleAbs(acc,acc);
/*debug
Mat cmap;
applyColorMap(acc,cmap,COLORMAP_JET);
imshow("cmap",cmap);
imshow("acc",acc);*/
Point maxLoc;
minMaxLoc(acc,0,0,0,&maxLoc);
double theta = (double)maxLoc.y/angleBins*CV_PI;
double rho = maxLoc.x-max_r;
if(abs(sin(theta))<0.000001)//check vertical
{
//when vertical, line equation becomes
//x = rho
double m = -cos(theta)/sin(theta);
Point2d p1 = Point2d(rho+im.cols/2,0);
Point2d p2 = Point2d(rho+im.cols/2,im.rows);
line(orig,p1,p2,Scalar(0,0,255),1);
*skew=90;
cout<<"skew angle "<<" 90"<<endl;
}else
{
//convert normal form back to slope intercept form
//y = mx + b
double m = -cos(theta)/sin(theta);
double b = rho/sin(theta)+im.rows/2.-m*im.cols/2.;
Point2d p1 = Point2d(0,b);
Point2d p2 = Point2d(im.cols,im.cols*m+b);
line(orig,p1,p2,Scalar(0,0,255),1);
double skewangle;
skewangle= p1.x-p2.x>0? (atan2(p1.y-p2.y,p1.x-p2.x)*180./CV_PI):(atan2(p2.y-p1.y,p2. x-p1.x)*180./CV_PI);
*skew=skewangle;
cout<<"skew angle "<<skewangle<<endl;
}
imshow("orig",orig);
}
Mat preprocess1(Mat& im)
{
Mat ret = Mat::zeros(im.size(),CV_32SC1);
for(int x=1;x<im.cols-1;x++)
{
for(int y=1;y<im.rows-1;y++)
{
int gy = (im.at<uchar>(y-1,x+1)-im.at<uchar>(y-1,x-1))
+2*(im.at<uchar>(y,x+1)-im.at<uchar>(y,x-1))
+(im.at<uchar>(y+1,x+1)-im.at<uchar>(y+1,x-1));
int gx = (im.at<uchar>(y+1,x-1) -im.at<uchar>(y-1,x-1))
+2*(im.at<uchar>(y+1,x)-im.at<uchar>(y-1,x))
+(im.at<uchar>(y+1,x+1)-im.at<uchar>(y-1,x+1));
int g2 = (gy*gy + gx*gx);
ret.at<int>(y,x)=g2;
}
}
normalize(ret,ret,255,0,NORM_MINMAX);
ret.convertTo(ret,CV_8UC1);
threshold(ret,ret,50,255,THRESH_BINARY);
return ret;
}
Mat preprocess2(Mat& im)
{
// 1) assume white on black and does local thresholding
// 2) only allow voting top is white and buttom is black(buttom text line)
Mat thresh;
//thresh=255-im;
thresh=im.clone();
adaptiveThreshold(thresh,thresh,255,CV_ADAPTIVE_THRESH_GAUSSIAN_C,THRESH_BINARY,15,-2);
Mat ret = Mat::zeros(im.size(),CV_8UC1);
for(int x=1;x<thresh.cols-1;x++)
{
for(int y=1;y<thresh.rows-1;y++)
{
bool toprowblack = thresh.at<uchar>(y-1,x)==0 || thresh.at<uchar>(y-1,x-1)==0 || thresh.at<uchar>(y-1,x+1)==0;
bool belowrowblack = thresh.at<uchar>(y+1,x)==0 || thresh.at<uchar>(y+1, x-1)==0 || thresh.at<uchar>(y+1,x+1)==0;
uchar pix=thresh.at<uchar>(y,x);
if((!toprowblack && pix==255 && belowrowblack))
{
ret.at<uchar>(y,x) = 255;
}
}
}
return ret;
}
Mat rot(Mat& im,double thetaRad)
{
cv::Mat rotated;
double rskew = thetaRad* CV_PI/180;
double nw = abs(sin(thetaRad))*im.rows+abs(cos(thetaRad))*im.cols;
double nh = abs(cos(thetaRad))*im.rows+abs(sin(thetaRad))*im.cols;
cv::Mat rot_mat = cv::getRotationMatrix2D(Point2d(nw*.5,nh*.5), thetaRad*180/CV_PI, 1);
Mat pos = Mat::zeros(Size(1,3),CV_64FC1);
pos.at<double>(0)=(nw-im.cols)*.5;
pos.at<double>(1)=(nh-im.rows)*.5;
Mat res = rot_mat*pos;
rot_mat.at<double>(0,2) += res.at<double>(0);
rot_mat.at<double>(1,2) += res.at<double>(1);
cv::warpAffine(im, rotated, rot_mat,Size(nw,nh), cv::INTER_LANCZOS4);
return rotated;
}
int main(int argc, char** argv)
{
string src="C:/data/skew.png";
Mat im= imread(src);
Mat gray;
cvtColor(im,gray,CV_BGR2GRAY);
Mat preprocessed = preprocess2(gray);
imshow("preprocessed2",preprocessed);
double skew;
hough_transform(preprocessed,im,&skew);
Mat rotated = rot(im,skew* CV_PI/180);
imshow("corrected",rotated);
waitKey(0);
return 0;
}
the approach you posted has its own "ideal binarization" assumption. the threshold value directly affects the process. utilize otsu threshold, or think about DFT for a generic solution.
otsu trial:
int main()
{
Mat input = imread("your text");
cvtColor(input, input, CV_BGR2GRAY);
Mat img;
cv::threshold(input, img, 100, 255, cv::THRESH_OTSU);
cv::bitwise_not(img, img);
imshow("img ", img);
waitKey(0);
vector<Point> points;
findNonZero(img, points);
cv::RotatedRect box = cv::minAreaRect(points);
double angle = box.angle;
if (angle < -45.)
angle += 90.;
cv::Point2f vertices[4];
box.points(vertices);
for(int i = 0; i < 4; ++i)
cv::line(img, vertices[i], vertices[(i + 1) % 4], cv::Scalar(255, 0, 0));
imshow("img ", img);
waitKey(0);
return 0;
}
I'm trying to rewrite my very slow naive segmentation using floodFill to something faster. I ruled out meanShiftFiltering a year ago because of the difficulty in labelling the colours and then finding their contours.
The current version of opencv seems to have a fast new function that labels segments using mean shift: gpu::meanShiftSegmentation(). It produces images like the following:
(source: ekran.org)
So this looks to me pretty close to being able to generating contours. How can I run findContours to generate segments?
Seems to me, this would be done by extracting the labelled colours from the image, and then testing which pixel values in the image match each label colour to make a boolean image suitable for findContours. This is what I have done in the following (but its a bit slow and strikes me there should be a better way):
Mat image = imread("test.png");
...
// gpu operations on image resulting in gpuOpen
...
// Mean shift
TermCriteria iterations = TermCriteria(CV_TERMCRIT_ITER, 2, 0);
gpu::meanShiftSegmentation(gpuOpen, segments, 10, 20, 300, iterations);
// convert to greyscale (HSV image)
vector<Mat> channels;
split(segments, channels);
// get labels from histogram of image.
int size = 256;
labels = Mat(256, 1, CV_32SC1);
calcHist(&channels.at(2), 1, 0, Mat(), labels, 1, &size, 0);
// Loop through hist bins
for (int i=0; i<256; i++) {
float count = labels.at<float>(i);
// Does this bin represent a label in the image?
if (count > 0) {
// find areas of the image that match this label and findContours on the result.
Mat label = Mat(channels.at(2).rows, channels.at(2).cols, CV_8UC1, Scalar::all(i)); // image filled with label colour.
Mat boolImage = (channels.at(2) == label); // which pixels in labeled image are identical to this label?
vector<vector<Point>> labelContours;
findContours(boolImage, labelContours, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE);
// Loop through contours.
for (int idx = 0; idx < labelContours.size(); idx++) {
// get bounds for this contour.
bounds = boundingRect(labelContours[idx]);
// create ROI for bounds to extract this region
Mat patchROI = image(bounds);
Mat maskROI = boolImage(bounds);
}
}
}
Is this the best approach or is there a better way to get the label colours? Seems it would be logical for meanShiftSegmentation to provide this information? (vector of colour values, or vector of masks for each label, etc.)
Thank you.
Following is another way of doing this without thowing away the colour information in the meanShiftSegmentation results. I did not compare the two for performance.
// Loop through whole image, pixel and pixel and then use the colour to index an array of bools indicating presence.
vector<Scalar> colours;
vector<Scalar>::iterator colourIter;
vector< vector< vector<bool> > > colourSpace;
vector< vector< vector<bool> > >::iterator colourSpaceBIter;
vector< vector<bool> >::iterator colourSpaceGIter;
vector<bool>::iterator colourSpaceRIter;
// Initialize 3D Vector
colourSpace.resize(256);
for (int i = 0; i < 256; i++) {
colourSpace[i].resize(256);
for (int j = 0; j < 256; j++) {
colourSpace[i][j].resize(256);
}
}
// Loop through pixels in the image (should be fastish, look into LUT for faster)
uchar r, g, b;
for (int i = 0; i < segments.rows; i++)
{
Vec3b* pixel = segments.ptr<Vec3b>(i); // point to first pixel in row
for (int j = 0; j < segments.cols; j++)
{
b = pixel[j][0];
g = pixel[j][1];
r = pixel[j][2];
colourSpace[b][g][r] = true; // this colour is in the image.
//cout << "BGR: " << int(b) << " " << int(g) << " " << int(r) << endl;
}
}
// Get all the unique colours from colourSpace
// loop through colourSpace
int bi=0;
for (colourSpaceBIter = colourSpace.begin(); colourSpaceBIter != colourSpace.end(); colourSpaceBIter++) {
int gi=0;
for (colourSpaceGIter = colourSpaceBIter->begin(); colourSpaceGIter != colourSpaceBIter->end(); colourSpaceGIter++) {
int ri=0;
for (colourSpaceRIter = colourSpaceGIter->begin(); colourSpaceRIter != colourSpaceGIter->end(); colourSpaceRIter++) {
if (*colourSpaceRIter)
colours.push_back( Scalar(bi,gi,ri) );
ri++;
}
gi++;
}
bi++;
}
// For each colour
int segmentCount = 0;
for (colourIter = colours.begin(); colourIter != colours.end(); colourIter++) {
Mat label = Mat(segments.rows, segments.cols, CV_8UC3, *colourIter); // image filled with label colour.
Mat boolImage = Mat(segments.rows, segments.cols, CV_8UC3);
inRange(segments, *colourIter, *colourIter, boolImage); // which pixels in labeled image are identical to this label?
vector<vector<Point> > labelContours;
findContours(boolImage, labelContours, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE);
// Loop through contours.
for (int idx = 0; idx < labelContours.size(); idx++) {
// get bounds for this contour.
Rect bounds = boundingRect(labelContours[idx]);
float area = contourArea(labelContours[idx]);
// Draw this contour on a new blank image
Mat maskImage = Mat::zeros(boolImage.rows, boolImage.cols, boolImage.type());
drawContours(maskImage, labelContours, idx, Scalar(255,255,255), CV_FILLED);
Mat patchROI = frame(bounds);
Mat maskROI = maskImage(bounds);
}
segmentCount++;
}
I am trying to use opencv EM algorithm to do color extraction.I am using the following code based on example in opencv documentation:
cv::Mat capturedFrame ( height, width, CV_8UC3 );
int i, j;
int nsamples = 1000;
cv::Mat samples ( nsamples, 2, CV_32FC1 );
cv::Mat labels;
cv::Mat img = cv::Mat::zeros ( height, height, CV_8UC3 );
img = capturedFrame;
cv::Mat sample ( 1, 2, CV_32FC1 );
CvEM em_model;
CvEMParams params;
samples = samples.reshape ( 2, 0 );
for ( i = 0; i < N; i++ )
{
//from the training samples
cv::Mat samples_part = samples.rowRange ( i*nsamples/N, (i+1)*nsamples/N);
cv::Scalar mean (((i%N)+1)*img.rows/(N1+1),((i/N1)+1)*img.rows/(N1+1));
cv::Scalar sigma (30,30);
cv::randn(samples_part,mean,sigma);
}
samples = samples.reshape ( 1, 0 );
//initialize model parameters
params.covs = NULL;
params.means = NULL;
params.weights = NULL;
params.probs = NULL;
params.nclusters = N;
params.cov_mat_type = CvEM::COV_MAT_SPHERICAL;
params.start_step = CvEM::START_AUTO_STEP;
params.term_crit.max_iter = 300;
params.term_crit.epsilon = 0.1;
params.term_crit.type = CV_TERMCRIT_ITER|CV_TERMCRIT_EPS;
//cluster the data
em_model.train ( samples, Mat(), params, &labels );
cv::Mat probs;
probs = em_model.getProbs();
cv::Mat weights;
weights = em_model.getWeights();
cv::Mat modelIndex = cv::Mat::zeros ( img.rows, img.cols, CV_8UC3 );
for ( i = 0; i < img.rows; i ++ )
{
for ( j = 0; j < img.cols; j ++ )
{
sample.at<float>(0) = (float)j;
sample.at<float>(1) = (float)i;
int response = cvRound ( em_model.predict ( sample ) );
modelIndex.data [ modelIndex.cols*i + j] = response;
}
}
My question here is:
Firstly, I want to extract each model, here totally five, then store those corresponding pixel values in five different matrix. In this case, I could have five different colors seperately. Here I only obtained their indexes, is there any way to achieve their corresponding colors here? To make it easy, I can start from finding the dominant color based on these five GMMs.
Secondly, here my sample datapoints are "100", and it takes about nearly 3 seconds for them. But I want to do all these things in no more than 30 milliseconds. I know OpenCV background extraction, which is using GMM, performs really fast, below 20ms, that means, there must be a way for me to do all these within 30 ms for all 600x800=480000 pixels. I found predict function is the most time consuming one.
First Question:
In order to do color extraction you first need to train the EM with your input pixels. After that you simply loop over all the input pixels again and use predict() to classify each of them. I've attached a small example that utilizes EM for foreground/background separation based on colors. It shows you how to extract the dominant color (mean) of each gaussian and how to access the original pixel color.
#include <opencv2/opencv.hpp>
int main(int argc, char** argv) {
cv::Mat source = cv::imread("test.jpg");
//ouput images
cv::Mat meanImg(source.rows, source.cols, CV_32FC3);
cv::Mat fgImg(source.rows, source.cols, CV_8UC3);
cv::Mat bgImg(source.rows, source.cols, CV_8UC3);
//convert the input image to float
cv::Mat floatSource;
source.convertTo(floatSource, CV_32F);
//now convert the float image to column vector
cv::Mat samples(source.rows * source.cols, 3, CV_32FC1);
int idx = 0;
for (int y = 0; y < source.rows; y++) {
cv::Vec3f* row = floatSource.ptr<cv::Vec3f > (y);
for (int x = 0; x < source.cols; x++) {
samples.at<cv::Vec3f > (idx++, 0) = row[x];
}
}
//we need just 2 clusters
cv::EMParams params(2);
cv::ExpectationMaximization em(samples, cv::Mat(), params);
//the two dominating colors
cv::Mat means = em.getMeans();
//the weights of the two dominant colors
cv::Mat weights = em.getWeights();
//we define the foreground as the dominant color with the largest weight
const int fgId = weights.at<float>(0) > weights.at<float>(1) ? 0 : 1;
//now classify each of the source pixels
idx = 0;
for (int y = 0; y < source.rows; y++) {
for (int x = 0; x < source.cols; x++) {
//classify
const int result = cvRound(em.predict(samples.row(idx++), NULL));
//get the according mean (dominant color)
const double* ps = means.ptr<double>(result, 0);
//set the according mean value to the mean image
float* pd = meanImg.ptr<float>(y, x);
//float images need to be in [0..1] range
pd[0] = ps[0] / 255.0;
pd[1] = ps[1] / 255.0;
pd[2] = ps[2] / 255.0;
//set either foreground or background
if (result == fgId) {
fgImg.at<cv::Point3_<uchar> >(y, x, 0) = source.at<cv::Point3_<uchar> >(y, x, 0);
} else {
bgImg.at<cv::Point3_<uchar> >(y, x, 0) = source.at<cv::Point3_<uchar> >(y, x, 0);
}
}
}
cv::imshow("Means", meanImg);
cv::imshow("Foreground", fgImg);
cv::imshow("Background", bgImg);
cv::waitKey(0);
return 0;
}
I've tested the code with the following image and it performs quite good.
Second Question:
I've noticed that the maximum number of clusters has a huge impact on the performance. So it's better to set this to a very conservative value instead of leaving it empty or setting it to the number of samples like in your example. Furthermore the documentation mentions an iterative procedure to repeatedly optimize the model with less-constrained parameters. Maybe this gives you some speed-up. To read more please have a look at the docs inside the sample code that is provided for train() here.