I'm trying to use cv::warpAffine to perform image transformations, but I have an issue.
Note: I already saw questions/46998895 and it works well too, but with no interpolation and I wanted to use affine matrixes.
So I want to shear the image and the result goes in a bigger image to have the entire sheared image like this:
Right now, my idea was to perform the shear on inverted image if the values are negatives rather than calculate the new image size with negative values etc as seen on paperspace blog.
When we use a negative shear, the direction of shear is right to left, while x2 is not further in the negative direction than x1. One way to solve this could be to get the other set of corners (that would satisfy the constraint, can you prove it?). Apply the shear transformation and then change to the other set of corners because of the notation we follow.
Well, we could do that, but there's a better method. Here's how how perform a negative shear with shearing factor -alpha.
Flip the image and boxes horizontally.
Apply the positive shear transformation with shearing factor alpha
Flip the image and boxes horizontally again.
The left ones are my results, and the right ones are the expected results :
So as you can see it works well when x and y are both positive values or when one is 0 and the other is negative, its ok. When one is positive and the other is negative, or when they are both negative, it provides always the same result for my shear function, see the 4 last lines in the pic above. Again, the left forms are my results, the right ones are the mickaShear() results (and obviously the correct ones).
My question is: what do I have to do in my shear_matrix to perform the correct transformation with negative values? Is the cv::flip() method described by paperSpace the right way to achieve this transformation?
Here you can find the reproductible code with the comparison between my function and the function in this answer. You will need OpenCV. I work on OpenCV 4.5.1
#include <opencv2/core/mat.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/features2d.hpp>
#include <opencv2/xfeatures2d.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/core.hpp>
#include <iostream>
cv::Mat myShear(const cv::Mat & src, float sx, float sy) {
cv::Mat tmp;
cv::Mat dst;
std::vector<cv::Point2f> extremePoints;
extremePoints.emplace_back(cv::Point2f(0, 0));
extremePoints.emplace_back(cv::Point2f((float)src.cols, 0));
extremePoints.emplace_back(cv::Point2f((float)src.cols, (float)src.rows));
extremePoints.emplace_back(cv::Point2f(0, (float)src.rows));
for(auto & pt : extremePoints){
pt = cv::Point2f(pt.x + pt.y * abs(sx), pt.y + pt.x * abs(sy));
}
cv::Rect offsets = cv::boundingRect(extremePoints);
cv::Size new_size = offsets.size();
float mat_values[] = {1.0f, abs(sx), 0.0f, abs(sy), 1.0f, 0.0f};
cv::Mat shear_matrix = cv::Mat(2, 3, CV_32F, mat_values);
dst = cv::Mat::zeros(new_size, src.type());
/*
*cv::flip(img, tmp, INT_FLIP_CODE) where INT_FLIP_CODE can be:
* 0 (Vertically)
* 1 (Horizontally)
* -1 (Both)
*/
if(sx < 0.0f and sy < 0.0f) {
cv::flip(img, tmp, -1);
cv::warpAffine(tmp, dst, shear_matrix, new_size, cv::INTER_LINEAR);
cv::flip(dst, dst, -1);
} else if(sx < 0.0f) {
cv::flip(img, tmp, 1);
cv::warpAffine(tmp, dst, shear_matrix, new_size, cv::INTER_LINEAR);
cv::flip(dst, dst, 1);
} else if(sy < 0.0f) {
cv::flip(img, tmp, 0);
cv::warpAffine(tmp, dst, shear_matrix, new_size, cv::INTER_LINEAR);
cv::flip(dst, dst, 0);
} else {
tmp = src.clone();
cv::warpAffine(tmp, dst, shear_matrix, new_size, cv::INTER_LINEAR);
}
return dst;
}
cv::Mat mickaShear(const cv::Mat & input, float Bx, float By)
{
if (Bx*By == 1)
{
std::cerr << "error == 1" << std::endl;
}
if (input.type() != CV_8UC3) return {};
std::vector<cv::Point2f> extremePoints;
extremePoints.emplace_back(0, 0);
extremePoints.emplace_back(input.cols, 0);
extremePoints.emplace_back(input.cols, input.rows);
extremePoints.emplace_back(0, input.rows);
for (auto & pt : extremePoints)
{
pt = cv::Point2f(pt.x + pt.y*Bx, pt.y + pt.x*By);
}
cv::Rect offsets = cv::boundingRect(extremePoints);
cv::Point2f offset = -offsets.tl();
cv::Size resultSize = offsets.size();
cv::Mat shearedImage = cv::Mat::zeros(resultSize, input.type());
for (int j = 0; j < shearedImage.rows; ++j){
for (int i = 0; i < shearedImage.cols; ++i){
cv::Point2f pp((float)i, (float)j);
pp = pp - offset; // go back to original coordinate system
cv::Point2f p;
p.x = int((-pp.y*Bx + pp.x) / (1 - By*Bx));
p.y = int(pp.y - p.x*By);
if ((p.x >= 0 && p.x < (float)input.cols) && (p.y >= 0 && p.y < (float)input.rows)){
shearedImage.at<cv::Vec3b>(j, i) = input.at<cv::Vec3b>(p);
}
}
}
return shearedImage;
}
int main(int argc, char *argv[]){
float x = -0.2f; //CHANGE SIGN TO TEST
float y = 0.5f; //CHANGE SIGN TO TEST
cv::Mat im = cv::imread("MODIFY BY JPG FILE PATH");
cv::Mat result = im.clone();
cv::Mat output = cv::Mat();
for(auto & aug: augments){
output = mickaShear(im, x, y);
result = myShear(im);
}
cv::imshow("result", result);
cv::imshow("output", output);
auto k = cv::waitKey(0);
if(k == (int)'q'){
cv::destroyAllWindows();
break;
}
cv::destroyAllWindows();
return 0;
}
I currently have a method for detecting a card in an image and for the most part it works when the lighting is fairly consistent and the background is very calm.
Here is the code I am using to preform this operation:
Mat img = inImg.clone();
outImg = Mat(inImg.size(), CV_8UC1);
inImg.copyTo(outImg);
Mat img_fullRes = img.clone();
pyrDown(img, img);
Mat imgGray;
cvtColor(img, imgGray, CV_RGB2GRAY);
outImg_gray = imgGray.clone();
// Find Edges //
Mat detectedEdges = imgGray.clone();
bilateralFilter(imgGray, detectedEdges, 0, 185, 3, 0);
Canny( detectedEdges, detectedEdges, 20, 65, 3 );
dilate(detectedEdges, detectedEdges, Mat::ones(3,3,CV_8UC1));
Mat cdst = img.clone();
vector<Vec4i> lines;
HoughLinesP(detectedEdges, lines, 1, CV_PI/180, 60, 50, 3 );
for( size_t i = 0; i < lines.size(); i++ )
{
Vec4i l = lines[i];
// For debug
//line( cdst, cv::Point(l[0], l[1]), cv::Point(l[2], l[3]), Scalar(0,0,255), 1);
}
//cdst.copyTo(inImg);
// // Find points of intersection //
cv::Rect imgROI;
int ext = 10;
imgROI.x = ext;
imgROI.y = ext;
imgROI.width = img.size().width - ext;
imgROI.height = img.size().height - ext;
int N = lines.size();
// Creating N amount of points // N == lines.size()
cv::Point** poi = new cv::Point*[N];
for( int i = 0; i < N; i++ )
poi[i] = new cv::Point[N];
vector<cv::Point> poiList;
for( int i = 0; i < N; i++ )
{
poi[i][i] = cv::Point(-1,-1);
Vec4i line1 = lines[i];
for( int j = i + 1; j < N; j++ )
{
Vec4i line2 = lines[j];
cv::Point p = computeIntersect(line1, line2, imgROI);
if( p.x != -1 )
{
//line(cdst, p-cv::Point(2,0), p+cv::Point(2,0), Scalar(0,255,0));
//line(cdst, p-cv::Point(0,2), p+cv::Point(0,2), Scalar(0,255,0));
poiList.push_back(p);
}
poi[i][j] = p;
poi[j][i] = p;
}
}
cdst.copyTo(inImg);
if(poiList.size()==0)
{
outImg = inImg.clone();
//circle(outImg, cv::Point(100,100), 50, Scalar(255,0,0), -1);
return;
}
convexHull(poiList, poiList, false, true);
for( int i=0; i<poiList.size(); i++ )
{
cv::Point p = poiList[i];
//circle(cdst, p, 3, Scalar(255,0,0), 2);
}
//Evaluate all possible quadrilaterals
cv::Point cardCorners[4];
float metric_max = 0;
int Npoi = poiList.size();
for( int p1=0; p1<Npoi; p1++ )
{
cv::Point pts[4];
pts[0] = poiList[p1];
for( int p2=p1+1; p2<Npoi; p2++ )
{
pts[1] = poiList[p2];
if( isCloseBy(pts[1],pts[0]) )
continue;
for( int p3=p2+1; p3<Npoi; p3++ )
{
pts[2] = poiList[p3];
if( isCloseBy(pts[2],pts[1]) || isCloseBy(pts[2],pts[0]) )
continue;
for( int p4=p3+1; p4<Npoi; p4++ )
{
pts[3] = poiList[p4];
if( isCloseBy(pts[3],pts[0]) || isCloseBy(pts[3],pts[1])
|| isCloseBy(pts[3],pts[2]) )
continue;
// get the metrics
float area = getArea(pts);
cv::Point a = pts[0]-pts[1];
cv::Point b = pts[1]-pts[2];
cv::Point c = pts[2]-pts[3];
cv::Point d = pts[3]-pts[0];
float oppLenDiff = abs(a.dot(a)-c.dot(c)) + abs(b.dot(b)-d.dot(d));
float metric = area - 0.35*oppLenDiff;
if( metric > metric_max )
{
metric_max = metric;
cardCorners[0] = pts[0];
cardCorners[1] = pts[1];
cardCorners[2] = pts[2];
cardCorners[3] = pts[3];
}
}
}
}
}
// find the corners corresponding to the 4 corners of the physical card
sortPointsClockwise(cardCorners);
// Calculate Homography //
vector<Point2f> srcPts(4);
srcPts[0] = cardCorners[0]*2;
srcPts[1] = cardCorners[1]*2;
srcPts[2] = cardCorners[2]*2;
srcPts[3] = cardCorners[3]*2;
vector<Point2f> dstPts(4);
cv::Size outImgSize(1400,800);
dstPts[0] = Point2f(0,0);
dstPts[1] = Point2f(outImgSize.width-1,0);
dstPts[2] = Point2f(outImgSize.width-1,outImgSize.height-1);
dstPts[3] = Point2f(0,outImgSize.height-1);
Mat Homography = findHomography(srcPts, dstPts);
// Apply Homography
warpPerspective( img_fullRes, outImg, Homography, outImgSize, INTER_CUBIC );
outImg.copyTo(inImg);
Where computeIntersect is defined as:
cv::Point computeIntersect(cv::Vec4i a, cv::Vec4i b, cv::Rect ROI)
{
int x1 = a[0], y1 = a[1], x2 = a[2], y2 = a[3];
int x3 = b[0], y3 = b[1], x4 = b[2], y4 = b[3];
cv::Point p1 = cv::Point (x1,y1);
cv::Point p2 = cv::Point (x2,y2);
cv::Point p3 = cv::Point (x3,y3);
cv::Point p4 = cv::Point (x4,y4);
// Check to make sure all points are within the image boundrys, if not reject them.
if( !ROI.contains(p1) || !ROI.contains(p2)
|| !ROI.contains(p3) || !ROI.contains(p4) )
return cv::Point (-1,-1);
cv::Point vec1 = p1-p2;
cv::Point vec2 = p3-p4;
float vec1_norm2 = vec1.x*vec1.x + vec1.y*vec1.y;
float vec2_norm2 = vec2.x*vec2.x + vec2.y*vec2.y;
float cosTheta = (vec1.dot(vec2))/sqrt(vec1_norm2*vec2_norm2);
float den = ((float)(x1-x2) * (y3-y4)) - ((y1-y2) * (x3-x4));
if(den != 0)
{
cv::Point2f pt;
pt.x = ((x1*y2 - y1*x2) * (x3-x4) - (x1-x2) * (x3*y4 - y3*x4)) / den;
pt.y = ((x1*y2 - y1*x2) * (y3-y4) - (y1-y2) * (x3*y4 - y3*x4)) / den;
if( !ROI.contains(pt) )
return cv::Point (-1,-1);
// no-confidence metric
float d1 = MIN( dist2(p1,pt), dist2(p2,pt) )/vec1_norm2;
float d2 = MIN( dist2(p3,pt), dist2(p4,pt) )/vec2_norm2;
float no_confidence_metric = MAX(sqrt(d1),sqrt(d2));
// If end point ratios are greater than .5 reject
if( no_confidence_metric < 0.5 && cosTheta < 0.707 )
return cv::Point (int(pt.x+0.5), int(pt.y+0.5));
}
return cv::Point(-1, -1);
}
sortPointsClockWise is defined as:
void sortPointsClockwise(cv::Point a[])
{
cv::Point b[4];
cv::Point ctr = (a[0]+a[1]+a[2]+a[3]);
ctr.x /= 4;
ctr.y /= 4;
b[0] = a[0]-ctr;
b[1] = a[1]-ctr;
b[2] = a[2]-ctr;
b[3] = a[3]-ctr;
for( int i=0; i<4; i++ )
{
if( b[i].x < 0 )
{
if( b[i].y < 0 )
a[0] = b[i]+ctr;
else
a[3] = b[i]+ctr;
}
else
{
if( b[i].y < 0 )
a[1] = b[i]+ctr;
else
a[2] = b[i]+ctr;
}
}
}
getArea is defined as:
float getArea(cv::Point arr[])
{
cv::Point diag1 = arr[0]-arr[2];
cv::Point diag2 = arr[1]-arr[3];
return 0.5*(diag1.cross(diag2));
}
isCloseBy is defined as:
bool isCloseBy( cv::Point p1, cv::Point p2 )
{
int D = 10;
// Checking that X values are within 10, same for Y values.
return ( abs(p1.x-p2.x)<=D && abs(p1.y-p2.y)<=D );
}
And finally dist2:
float dist2( cv::Point p1, cv::Point p2 )
{
return float((p1.x-p2.x)*(p1.x-p2.x) + (p1.y-p2.y)*(p1.y-p2.y));
}
Here are several test images and their results:
Sorry for the very lengthy post, however I am hoping someone can suggest a way I can make my method for extracting the card from the image more robust. One that can better handle disruptive backgrounds along with inconsistent lighting.
When a card is placed on a contrasting background with good lighting my method works nearly 90% of the time. But it is clear I need a more robust approach.
Does anyone have any suggestions?
Thanks.
ATTEMPT of dhanushka's soloution
Mat gray, bw; pyrDown(inImg, inImg);
cvtColor(inImg, gray, CV_RGB2GRAY);
int morph_size = 3;
Mat element = getStructuringElement( MORPH_ELLIPSE, cv::Size( 4*morph_size + 1, 2*morph_size+1 ), cv::Point( morph_size, morph_size ) );
morphologyEx(gray, gray, 2, element);
threshold(gray, bw, 160, 255, CV_THRESH_BINARY);
vector<vector<cv::Point> > contours;
vector<Vec4i> hierarchy;
findContours( bw, contours, hierarchy, CV_RETR_TREE, CV_CHAIN_APPROX_SIMPLE, cv::Point(0, 0) );
int largest_area=0;
int largest_contour_index=0;
cv::Rect bounding_rect;
for( int i = 0; i< contours.size(); i++ )
{
double a=contourArea( contours[i],false); // Find the area of contour
if(a>largest_area){
largest_area=a;
largest_contour_index=i; //Store the index of largest contour
bounding_rect=boundingRect(contours[i]);
}
}
//Scalar color( 255,255,255);
rectangle(inImg, bounding_rect, Scalar(0,255,0),1, 8,0);
Mat biggestRect = inImg(bounding_rect);
Mat card1 = biggestRect.clone();
The art of image processing is (in my 10+ years experience) just that: an art. No single answer exists, and there is always more than one way to do it. And it will definitely fail in some cases.
In my experience of working on automatically detecting features in medical images, it takes a long time to build to reliable algorithm, but in hindsight the best result is obtained with a relative simple algorithm. However, it takes a lot of time to get to this simple algorithm.
To get to this, the general approach is always the same:
get started is to build up a large database of test-images (at least 100). This defines 'normal' images which should work. By collecting the images you already start thinking about the problem.
annotate the images to build a kind of 'ground truth'. In this case, the 'ground truth' should contain the 4 corners of the card since these are the interesting points.
create an application which runs over these images an algorithm and compares the result with the ground truth. In this case, the 'comparing with ground truth' would be to take the mean distance of the found 4 corner point with the ground truth corner points.
Output a tab-delimited file which you call .xls, and therefore can be opened (on Windows) in Excel by double clicking. Good to get an quick overview of the cases. Look at the worst cases first. Then open these cases manually to try to understand why they do not work.
Now you are ready to change the algorithm. Change something, and re-run. Compare new Excel sheet to old Excel sheet. Now you start realizing the trade-offs you have to make.
That having said, I think that you need to answer these questions during the tuning of the algorithm:
Do you allow a little folded cards? So no completely straight lines? If so, concentrate more on corners instead of lines / edges.
Do you allow gradual differences in lighting? If so, a local contrast-stretch filter might help.
Do you allow the same color for the card as the background? If so, you have to concentrate on the contents of the card instead of the border of the card.
Do you allow non-perfect lenses? If so, to which extend?
Do you allow rotated cards? If so, to which extend?
Should the background be uniform in color and/or texture?
How small should the smallest detectable card be relative to the image size? If you assume that at least 80% of the width or height should be covered, you get robustness back.
If more than one card is visible in the image, should the algorithm be robust and only pick one, or is any output ok?
If no card is visible, should it detect this case? Building in detection of this case will make it more user friendly ('no card found'), but also less robust.
These will make the requirements and assumptions on the image to acquire. Assumptions on which you can rely are very strong: they make the algorithm fast, robust and simple if you choose the right ones. Also let these requirements and assumptions be part of the testing database.
So what would I choose? Based on the three images you provided I would start with something like:
Assume the cards are filling the image from 50% to 100%.
Assume the cards are rotated at most 10 degrees or so.
Assume the corners are well visible.
Assume the aspect ratio (height divided by width) of the cards to be between 1/3 and 3.
Assume no card-like objects in the background
The algorithm then would look like:
Detect in each quadrant of the image a specific corner with a corner-filter. So in the upper left quadrant of the image the upper left corner of the card. Look for example at http://www.ee.surrey.ac.uk/CVSSP/demos/corners/results3.html , or use an OpenCV function for it like cornerHarris .
To be more robust, calculate more than one corner per quadrant.
Try to build parallelograms with one corner per each quadrant by combining points from each quadrant. Create a fitness function which gives higher score to:
having internal angles close to 90 degrees
be large
optionally, compare the corners of the card based on lighting or another feature.
This fitness function gives a lot of tuning possibilities later on.
Return the parallelogram with the highest score.
So why using corner-detection instead of a hough-transform to do line detection? In my opinion the hough-transform is (next to being slow) quite sensitive to patterns in the background (which is what you see in your first image -- it detects a stronger line in the background then of the card), and it cannot handle a little curved lines that well, unless you use a larger bin size which will worsen the detection.
Good luck!
A more general approach would definitely be something like Rutger Nijlunsing suggested in his answer. However, in your case, at least for the provided sample images, a very simple approach like morphological opening followed by thresholding, contour processing and convexhull would yield the result you want. Use a scaled down version of the images for processing so that you don't have to use a large kernel for morphological operations. Below are the images processed this way.
pyrDown(large, rgb0);
pyrDown(rgb0, rgb0);
pyrDown(rgb0, rgb0);
Mat small;
cvtColor(rgb0, small, CV_BGR2GRAY);
Mat morph;
Mat kernel = getStructuringElement(MORPH_ELLIPSE, Size(11, 11));
morphologyEx(small, morph, MORPH_OPEN, kernel);
Mat bw;
threshold(morph, bw, 0, 255.0, CV_THRESH_BINARY | CV_THRESH_OTSU);
Mat bdry;
kernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3));
erode(bw, bdry, kernel);
subtract(bw, bdry, bdry);
// do contour processing on bdry
This approach will not work in general, so I would strongly recommend something like Rutger suggested.
I need to iterate throug a 3 channel Matrix in the HSV color space and it's awfully slow. It takes about 2.5 sec.
cv::Mat img = cv::imread( "image.jpg" );
cv::Mat img32FC3;
img.convertTo( img32FC3, CV_32FC3 );
cv::cvtColor( img32FC3, img32FC3, CV_BGR2HSV );
int height = img32FC3.rows;
int width = img32FC3.cols;
cv::Size size = img32FC3.size();
if( img32FC3.isContinuous() ) {
size.width *= size.height;
size.height = 1;
}
size.width *= 3;
for( int i = 0; i < size.height; i ++ ) {
float* ptr = img32FC3.ptr<float>(i);
for( int j = 0; j < size.width; j += 3 ) {
h = (ptr[ j ]);
s = (ptr[j +1 ]);
v = (ptr[j +2 ]);
}
}
cv::cvtColor( img32FC3, img32FC3, CV_HSV2BGR );
img32FC3.convertTo( img, CV_8UC3 );
imwrite("test.jpg", img );
The code above is adapted from the documentation of openCV where it states that it's performant. So I wonder how I can speed this up because 2.5 sec is really very very slow :(.
BTW: the image is 3744x5616 pixel
You should not directly access the data of a mat since sometimes it is not stored as you would expect. Check my answer here.
Another way is using the Mat iterator. According to the [opencv_tutorials.pdf, ver 2.4.2 pp. 89-92]
it is slightly slower than the nested loops from my link. (For a large image you are 5% slower but note that they used just MatIterator, not the const one which can be further optimised by a nice compiler.)
MatConstIterator_<Vec3b> it = M.begin<Vec3b>(), it_end = M.end<Vec3b>();
for(; it != it_end; ++it)
{
//do sth read-only otherwise use MatIterator_<Vec3b>
b = (*it)[0];
g = (*it)[1];
r = (*it)[2];
}
<double> if grayscale, from ocv2.4.2 refman p19.
Best chance to speed up is to parallelize the loop. OpenCV uses TBB as multithreading environment, you might want to check it. By the way, you dont need to do all your calculations about the size, etc. You already checked that your matrix isContinuous(), so you can get the pointer as float ptr = reinterpret_cast<float>(img32FC3.data) then your loop:
for (size_t i = ; i < img32FC3.rows*img32FC3.cols; ++i, ptr +=3) {
// do something
}
I am trying to use opencv EM algorithm to do color extraction.I am using the following code based on example in opencv documentation:
cv::Mat capturedFrame ( height, width, CV_8UC3 );
int i, j;
int nsamples = 1000;
cv::Mat samples ( nsamples, 2, CV_32FC1 );
cv::Mat labels;
cv::Mat img = cv::Mat::zeros ( height, height, CV_8UC3 );
img = capturedFrame;
cv::Mat sample ( 1, 2, CV_32FC1 );
CvEM em_model;
CvEMParams params;
samples = samples.reshape ( 2, 0 );
for ( i = 0; i < N; i++ )
{
//from the training samples
cv::Mat samples_part = samples.rowRange ( i*nsamples/N, (i+1)*nsamples/N);
cv::Scalar mean (((i%N)+1)*img.rows/(N1+1),((i/N1)+1)*img.rows/(N1+1));
cv::Scalar sigma (30,30);
cv::randn(samples_part,mean,sigma);
}
samples = samples.reshape ( 1, 0 );
//initialize model parameters
params.covs = NULL;
params.means = NULL;
params.weights = NULL;
params.probs = NULL;
params.nclusters = N;
params.cov_mat_type = CvEM::COV_MAT_SPHERICAL;
params.start_step = CvEM::START_AUTO_STEP;
params.term_crit.max_iter = 300;
params.term_crit.epsilon = 0.1;
params.term_crit.type = CV_TERMCRIT_ITER|CV_TERMCRIT_EPS;
//cluster the data
em_model.train ( samples, Mat(), params, &labels );
cv::Mat probs;
probs = em_model.getProbs();
cv::Mat weights;
weights = em_model.getWeights();
cv::Mat modelIndex = cv::Mat::zeros ( img.rows, img.cols, CV_8UC3 );
for ( i = 0; i < img.rows; i ++ )
{
for ( j = 0; j < img.cols; j ++ )
{
sample.at<float>(0) = (float)j;
sample.at<float>(1) = (float)i;
int response = cvRound ( em_model.predict ( sample ) );
modelIndex.data [ modelIndex.cols*i + j] = response;
}
}
My question here is:
Firstly, I want to extract each model, here totally five, then store those corresponding pixel values in five different matrix. In this case, I could have five different colors seperately. Here I only obtained their indexes, is there any way to achieve their corresponding colors here? To make it easy, I can start from finding the dominant color based on these five GMMs.
Secondly, here my sample datapoints are "100", and it takes about nearly 3 seconds for them. But I want to do all these things in no more than 30 milliseconds. I know OpenCV background extraction, which is using GMM, performs really fast, below 20ms, that means, there must be a way for me to do all these within 30 ms for all 600x800=480000 pixels. I found predict function is the most time consuming one.
First Question:
In order to do color extraction you first need to train the EM with your input pixels. After that you simply loop over all the input pixels again and use predict() to classify each of them. I've attached a small example that utilizes EM for foreground/background separation based on colors. It shows you how to extract the dominant color (mean) of each gaussian and how to access the original pixel color.
#include <opencv2/opencv.hpp>
int main(int argc, char** argv) {
cv::Mat source = cv::imread("test.jpg");
//ouput images
cv::Mat meanImg(source.rows, source.cols, CV_32FC3);
cv::Mat fgImg(source.rows, source.cols, CV_8UC3);
cv::Mat bgImg(source.rows, source.cols, CV_8UC3);
//convert the input image to float
cv::Mat floatSource;
source.convertTo(floatSource, CV_32F);
//now convert the float image to column vector
cv::Mat samples(source.rows * source.cols, 3, CV_32FC1);
int idx = 0;
for (int y = 0; y < source.rows; y++) {
cv::Vec3f* row = floatSource.ptr<cv::Vec3f > (y);
for (int x = 0; x < source.cols; x++) {
samples.at<cv::Vec3f > (idx++, 0) = row[x];
}
}
//we need just 2 clusters
cv::EMParams params(2);
cv::ExpectationMaximization em(samples, cv::Mat(), params);
//the two dominating colors
cv::Mat means = em.getMeans();
//the weights of the two dominant colors
cv::Mat weights = em.getWeights();
//we define the foreground as the dominant color with the largest weight
const int fgId = weights.at<float>(0) > weights.at<float>(1) ? 0 : 1;
//now classify each of the source pixels
idx = 0;
for (int y = 0; y < source.rows; y++) {
for (int x = 0; x < source.cols; x++) {
//classify
const int result = cvRound(em.predict(samples.row(idx++), NULL));
//get the according mean (dominant color)
const double* ps = means.ptr<double>(result, 0);
//set the according mean value to the mean image
float* pd = meanImg.ptr<float>(y, x);
//float images need to be in [0..1] range
pd[0] = ps[0] / 255.0;
pd[1] = ps[1] / 255.0;
pd[2] = ps[2] / 255.0;
//set either foreground or background
if (result == fgId) {
fgImg.at<cv::Point3_<uchar> >(y, x, 0) = source.at<cv::Point3_<uchar> >(y, x, 0);
} else {
bgImg.at<cv::Point3_<uchar> >(y, x, 0) = source.at<cv::Point3_<uchar> >(y, x, 0);
}
}
}
cv::imshow("Means", meanImg);
cv::imshow("Foreground", fgImg);
cv::imshow("Background", bgImg);
cv::waitKey(0);
return 0;
}
I've tested the code with the following image and it performs quite good.
Second Question:
I've noticed that the maximum number of clusters has a huge impact on the performance. So it's better to set this to a very conservative value instead of leaving it empty or setting it to the number of samples like in your example. Furthermore the documentation mentions an iterative procedure to repeatedly optimize the model with less-constrained parameters. Maybe this gives you some speed-up. To read more please have a look at the docs inside the sample code that is provided for train() here.