I want to multiply one image by its transpose. my image size is nxm.
i do as follows
for (int k = 0; k < total_images; k++)
{
Mat img_tp1 = cv::Mat(imgRows, imgCols, CV_32FC1);
Mat img_tp2 = cv::Mat(imgRows, imgRows, CV_32FC1);
subtract(img[k], MeanMat, img_tp1);
img_tp2 = img_tp1 * img_tp2.t();
std::ostringstream name;
name << "sub" << k << ".jpg";
cv::imwrite(name.str(), img_tp2);
}
and i face this error
Unhandled exception at 0x000007FEFDB79E5D in Tracking.exe: Microsoft C++ exception: cv::Exception at memory location 0x00000000001E5EE0.
how can i do this multiplication? in fact i want to compute the covariance matrix of the sequence of images so i need this multiplication.
Thanks.
Then i decide to implement the multiplying for my RGB image and i use this code:
for (int i = 0; i < imgRows; i++)
{
for (int j = 0; j < imgRows; j++)
{
uchar pix1[3];
uchar pix2[3];
uchar pix[3] = { 0, 0, 0 };
for (int k = 0; k < imgCols; k++)
{
img_tp1.at<Vec3b>(i, k) = { pix1[0], pix1[1], pix1[2] };
img_tp1.at<Vec3b>(j, k) = { pix2[0], pix2[1], pix2[2] };
CovMat0.at<Vec3b>(i, j) = { pix[0], pix[1], pix[2] };
pix[0] = (pix1[0] * pix2[0]) + pix[0];
pix[1] = (pix1[1] * pix2[1]) + pix[1];
pix[2] = (pix1[2] * pix2[2]) + pix[2];
CovMat0.at<Vec3b>(i, j) = { pix[0], pix[1], pix[2] };
}
}
}
but it takes lots of time to process it. Is there any better way for that?
(I want to multiply one image by its transpose)
Related
I have two matrices:
cv::Mat bgr(rows, cols, CV_16UC3);
cv::Mat ir(rows, cols, CV_16UC1 );
and I want to subtract ir from each channel of bgr element-wise. I couldn't find an elegant solution yet.
EDIT
One possible solution might be:
// subtract IR from BGR
Vec3u tmp;
for (int i = 0; i < ir.rows; i++) {
for (int j = 0; j < ir.cols; j++) {
tmp = bgr.at<Vec3u>(i,j);
tmp[0] = tmp[0] - ir.at<ushort>(i,j);
tmp[1] = tmp[1] - ir.at<ushort>(i,j);
tmp[2] = tmp[2] - ir.at<ushort>(i,j);
bgr.at<Vec3u>(i, j) = tmp;
}
}
The question is that whether there is a faster solution.
If we're talking about an elegant way, it could be like this:
Mat mat = Mat::ones(2,2,CV_8UC1);
Mat mat1 = Mat::ones(2,2,CV_8UC2)*3;
Mat mats[2];
split(mat1,mats);
mats[0]-=mat;
mats[1]-=mat;
merge(mats,2,mat1);
You shouldn't use at(), if you wanted your code to be more efficient. Use pointers and check Mats for continuity:
int rows = mat.rows;
int cols = mat.cols;
if(mat.isContinuous() && mat1.isContinuous())
{
cols*=rows;
rows = 1;
}
for(int j = 0;j<rows;j++) {
auto channe2limg = mat1.ptr<Vec2b>(j);
auto channelimg = mat.ptr<uchar>(j);
for (int i = 0; i < cols; i++) {
channe2limg[i][0]-=channelimg[i];
channe2limg[i][1]-=channelimg[i];
}
}
I have utilised the OpenCV GrabCut functionality to perform an image segmentation. When viewing the segmented image as per the code below, the segmentation is reasonable/correct. However, when looking at(at attempting to use) the segmrntation mask values, I am getting some very large numbers, and not the enumerated values one would expect from the cv::GrabCutClasses enum.
void doGrabCut(){
Vector2i imgDims = getImageDims();
//Wite image to OpenCV Mat.
const Vector4u *rgb = getRGB();
cv::Mat rgbMat(imgDims.height, imgDims.width, CV_8UC3);
for (int i = 0; i < imgDims.height; i++) {
for (int j = 0; j < imgDims.width; j++) {
int idx = i * imgDims.width + j;
rgbMat.ptr<cv::Vec3b>(i)[j][2] = rgb[idx].x;
rgbMat.ptr<cv::Vec3b>(i)[j][1] = rgb[idx].y;
rgbMat.ptr<cv::Vec3b>(i)[j][0] = rgb[idx].z;
}
}
//Do graph cut.
cv::Mat res, fgModel, bgModel;
cv::Rect bb(bb_begin.x, bb_begin.y, bb_end.x - bb_begin.x, bb_end.y - bb_begin.y);
cv::grabCut(rgbMat, res, bb, bgModel, fgModel, 10, cv::GC_INIT_WITH_RECT);
cv::compare(res, cv::GC_PR_FGD, res, cv::CMP_EQ);
//Write mask.
Vector4u *maskPtr = getMask();//uchar
for (int i = 0; i < imgDims.height; i++) {
for (int j = 0; j < imgDims.width; j++) {
cv::GrabCutClasses classification = res.at<cv::GrabCutClasses>(i, j);
int idx = i * imgDims.width + j;
std::cout << classification << std::endl;//Strange numbers here.
maskPtr[idx].x = (classification == cv::GC_PR_FGD) ? 255 : 0;//This always evaluates to 0.
}
}
cv::Mat foreground(rgbMat.size(), CV_8UC3, cv::Scalar(255, 255, 255));
rgbMat.copyTo(foreground, res);
cv::imshow("GC Output", foreground);
}
Why would one get numbers outside the enumeration when the segmentation is qualitatively correct?
I doubt on your //Write mask. step, why do you re-iterate the res and modify maskPtr as maskPtr[idx].x = (classification == cv::GC_PR_FGD) ? 255 : 0;, Basically you already have a single channel Binary image stored in the res variable, the cv::compare() returns a binary image
However if you still want to debug the values by iteration then you should use the standard technique for iterating a single channel image as:
for (int i = 0; i < m.rows; i++) {
for (int j = 0; j < m.cols; j++) {
uchar classification = res.at<uchar>(i, j);
std::cout << int(classification) << ", ";
}
}
As you are iterating a single channel mat you must use res.at<uchar>(i, j) and not res.at<cv::GrabCutClasses>.
I am trying to make an alphatrimmed filter in openCV library. My code is not working properly and the resultant image is not looking as image after filtering.
The filter should work in the following way.
Chossing some (array) of pixels in my example it is 9 pixels '3x3' window.
Ordering them in increasing way.
Cutting our 'array' both sides for alpha-2.
calculating arithmetic mean of remaining pixels and inserting them in proper place.
int alphatrimmed(Mat img, int alpha)
{
Mat img9 = img.clone();
const int start = alpha/2 ;
const int end = 9 - (alpha/2);
//going through whole image
for (int i = 1; i < img.rows - 1; i++)
{
for (int j = 1; j < img.cols - 1; j++)
{
uchar element[9];
Vec3b element3[9];
int k = 0;
int a = 0;
//selecting elements for window 3x3
for (int m = i -1; m < i + 2; m++)
{
for (int n = j - 1; n < j + 2; n++)
{
element3[a] = img.at<Vec3b>(m*img.cols + n);
a++;
for (int c = 0; c < img.channels(); c++)
{
element[k] += img.at<Vec3b>(m*img.cols + n)[c];
}
k++;
}
}
//comparing and sorting elements in window (uchar element [9])
for (int b = 0; b < end; b++)
{
int min = b;
for (int d = b + 1; d < 9; d++)
{
if (element[d] < element[min])
{
min = d;
const uchar temp = element[b];
element[b] = element[min];
element[min] = temp;
const Vec3b temporary = element3[b];
element3[b] = element3[min];
element3[min] = temporary;
}
}
}
// index in resultant image( after alpha-trimmed filter)
int result = (i - 1) * (img.cols - 2) + j - 1;
for (int l = start ; l < end; l++)
img9.at<Vec3b>(result) += element3[l];
img9.at<Vec3b>(result) /= (9 - alpha);
}
}
namedWindow("AlphaTrimmed Filter", WINDOW_AUTOSIZE);
imshow("AlphaTrimmed Filter", img9);
return 0;
}
Without actual data, it's somewhat of a guess, but an uchar can't hold the sum of 3 channels. It works modulo 256 (at least on any platform OpenCV supports).
The proper solution is std::sort with a proper comparator for your Vec3b :
void L1(Vec3b a, Vec3b b) { return a[0]+a[1]+a[2] < b[0]+b[1]+b[2]; }
everyone I am trying to implement patter matching with FFT but I am not sure what the result should be (I think I am missing something even though a read a lot of stuff about the problem and tried a lot of different implementations this one is the best so far). Here is my FFT correlation function.
void fft2d(fftw_complex**& a, int rows, int cols, bool forward = true)
{
fftw_plan p;
for (int i = 0; i < rows; ++i)
{
p = fftw_plan_dft_1d(cols, a[i], a[i], forward ? FFTW_FORWARD : FFTW_BACKWARD, FFTW_ESTIMATE);
fftw_execute(p);
}
fftw_complex* t = (fftw_complex*)fftw_malloc(rows * sizeof(fftw_complex));
for (int j = 0; j < cols; ++j)
{
for (int i = 0; i < rows; ++i)
{
t[i][0] = a[i][j][0];
t[i][1] = a[i][j][1];
}
p = fftw_plan_dft_1d(rows, t, t, forward ? FFTW_FORWARD : FFTW_BACKWARD, FFTW_ESTIMATE);
fftw_execute(p);
for (int i = 0; i < rows; ++i)
{
a[i][j][0] = t[i][0];
a[i][j][1] = t[i][1];
}
}
fftw_free(t);
}
int findCorrelation(int argc, char* argv[])
{
BMP bigImage;
BMP keyImage;
BMP result;
RGBApixel blackPixel = { 0, 0, 0, 1 };
const bool swapQuadrants = (argc == 4);
if (argc < 3 || argc > 4) {
cout << "correlation img1.bmp img2.bmp" << endl;
return 1;
}
if (!keyImage.ReadFromFile(argv[1])) {
return 1;
}
if (!bigImage.ReadFromFile(argv[2])) {
return 1;
}
//Preparations
const int maxWidth = std::max(bigImage.TellWidth(), keyImage.TellWidth());
const int maxHeight = std::max(bigImage.TellHeight(), keyImage.TellHeight());
const int rowsCount = maxHeight;
const int colsCount = maxWidth;
BMP bigTemp = bigImage;
BMP keyTemp = keyImage;
keyImage.SetSize(maxWidth, maxHeight);
bigImage.SetSize(maxWidth, maxHeight);
for (int i = 0; i < rowsCount; ++i)
for (int j = 0; j < colsCount; ++j) {
RGBApixel p1;
if (i < bigTemp.TellHeight() && j < bigTemp.TellWidth()) {
p1 = bigTemp.GetPixel(j, i);
} else {
p1 = blackPixel;
}
bigImage.SetPixel(j, i, p1);
RGBApixel p2;
if (i < keyTemp.TellHeight() && j < keyTemp.TellWidth()) {
p2 = keyTemp.GetPixel(j, i);
} else {
p2 = blackPixel;
}
keyImage.SetPixel(j, i, p2);
}
//Here is where the transforms begin
fftw_complex **a = (fftw_complex**)fftw_malloc(rowsCount * sizeof(fftw_complex*));
fftw_complex **b = (fftw_complex**)fftw_malloc(rowsCount * sizeof(fftw_complex*));
fftw_complex **c = (fftw_complex**)fftw_malloc(rowsCount * sizeof(fftw_complex*));
for (int i = 0; i < rowsCount; ++i) {
a[i] = (fftw_complex*)fftw_malloc(colsCount * sizeof(fftw_complex));
b[i] = (fftw_complex*)fftw_malloc(colsCount * sizeof(fftw_complex));
c[i] = (fftw_complex*)fftw_malloc(colsCount * sizeof(fftw_complex));
for (int j = 0; j < colsCount; ++j) {
RGBApixel p1;
p1 = bigImage.GetPixel(j, i);
a[i][j][0] = (0.299*p1.Red + 0.587*p1.Green + 0.114*p1.Blue);
a[i][j][1] = 0.0;
RGBApixel p2;
p2 = keyImage.GetPixel(j, i);
b[i][j][0] = (0.299*p2.Red + 0.587*p2.Green + 0.114*p2.Blue);
b[i][j][1] = 0.0;
}
}
fft2d(a, rowsCount, colsCount);
fft2d(b, rowsCount, colsCount);
result.SetSize(maxWidth, maxHeight);
for (int i = 0; i < rowsCount; ++i)
for (int j = 0; j < colsCount; ++j) {
fftw_complex& y = a[i][j];
fftw_complex& x = b[i][j];
double u = x[0], v = x[1];
double m = y[0], n = y[1];
c[i][j][0] = u*m + n*v;
c[i][j][1] = v*m - u*n;
int fx = j;
if (fx>(colsCount / 2)) fx -= colsCount;
int fy = i;
if (fy>(rowsCount / 2)) fy -= rowsCount;
float r2 = (fx*fx + fy*fy);
const double cuttoffCoef = (maxWidth * maxHeight) / 37992.;
if (r2<128 * 128 * cuttoffCoef)
c[i][j][0] = c[i][j][1] = 0;
}
fft2d(c, rowsCount, colsCount, false);
const int halfCols = colsCount / 2;
const int halfRows = rowsCount / 2;
if (swapQuadrants) {
for (int i = 0; i < halfRows; ++i)
for (int j = 0; j < halfCols; ++j) {
std::swap(c[i][j][0], c[i + halfRows][j + halfCols][0]);
std::swap(c[i][j][1], c[i + halfRows][j + halfCols][1]);
}
for (int i = halfRows; i < rowsCount; ++i)
for (int j = 0; j < halfCols; ++j) {
std::swap(c[i][j][0], c[i - halfRows][j + halfCols][0]);
std::swap(c[i][j][1], c[i - halfRows][j + halfCols][1]);
}
}
for (int i = 0; i < rowsCount; ++i)
for (int j = 0; j < colsCount; ++j) {
const double& g = c[i][j][0];
RGBApixel pixel;
pixel.Alpha = 0;
int gInt = 255 - static_cast<int>(std::floor(g + 0.5));
pixel.Red = gInt;
pixel.Green = gInt;
pixel.Blue = gInt;
result.SetPixel(j, i, pixel);
}
BMP res;
res.SetSize(maxWidth, maxHeight);
result.WriteToFile("result.bmp");
return 0;
}
Sample output
This question would probably be more appropriately posted on another site like cross validated (metaoptimize.com used to also be a good one, but it appears to be gone)
That said:
There's two similar operations you can perform with FFT: convolution and correlation. Convolution is used for determining how two signals interact with each-other, whereas correlation can be used to express how similar two signals are to each-other. Make sure you're doing the right operation as they're both commonly implemented throught a DFT.
For this type of application of DFTs you usually wouldn't extract any useful information in the fourier spectrum unless you were looking for frequencies common to both data sources or whatever (eg, if you were comparing two bridges to see if their supports are spaced similarly).
Your 3rd image looks a lot like the power domain; normally I see the correlation output entirely grey except where overlap occurred. Your code definitely appears to be computing the inverse DFT, so unless I'm missing something the only other explanation I've come up with for the fuzzy look could be some of the "fudge factor" code in there like:
if (r2<128 * 128 * cuttoffCoef)
c[i][j][0] = c[i][j][1] = 0;
As for what you should expect: wherever there are common elements between the two images you'll see a peak. The larger the peak, the more similar the two images are near that region.
Some comments and/or recommended changes:
1) Convolution & correlation are not scale invariant operations. In other words, the size of your pattern image can make a significant difference in your output.
2) Normalize your images before correlation.
When you get the image data ready for the forward DFT pass:
a[i][j][0] = (0.299*p1.Red + 0.587*p1.Green + 0.114*p1.Blue);
a[i][j][1] = 0.0;
/* ... */
How you grayscale the image is your business (though I would've picked something like sqrt( r*r + b*b + g*g )). However, I don't see you doing anything to normalize the image.
The word "normalize" can take on a few different meanings in this context. Two common types:
normalize the range of values between 0.0 and 1.0
normalize the "whiteness" of the images
3) Run your pattern image through an edge enhancement filter. I've personally made use of canny, sobel, and I think I messed with a few others. As I recall, canny was "quick'n dirty", sobel was more expensive, but I got comparable results when it came time to do correlation. See chapter 24 of the "dsp guide" book that's freely available online. The whole book is worth your time, but if you're low on time then at a minimum chapter 24 will help a lot.
4) Re-scale the output image between [0, 255]; if you want to implement thresholds, do it after this step because the thresholding step is lossy.
My memory on this one is hazy, but as I recall (edited for clarity):
You can scale the final image pixels (before rescaling) between [-1.0, 1.0] by dividing off the largest power spectrum value from the entire power spectrum
The largest power spectrum value is, conveniently enough, the center-most value in the power spectrum (corresponding to the lowest frequency)
If you divide it off the power spectrum, you'll end up doing twice the work; since FFTs are linear, you can delay the division until after the inverse DFT pass to when you're re-scaling the pixels between [0..255].
If after rescaling most of your values end up so black you can't see them, you can use a solution to the ODE y' = y(1 - y) (one example is the sigmoid f(x) = 1 / (1 + exp(-c*x) ), for some scaling factor c that gives better gradations). This has more to do with improving your ability to interpret the results visually than anything you might use to programmatically find peaks.
edit I said [0, 255] above. I suggest you rescale to [128, 255] or some other lower bound that is gray rather than black.
How can I flatten a 3D Matrix and display it in 2d?
Are there simple ways to display it in 3d?
Edit:
So far I simply tile the images in the 3rd dimension together like thus:
void Flatten3DArray(const cv::Mat& In, cv::Mat& Out2d)
{
CV_Assert(In.dims == 3);
int rows = In.size[0];
int cols = In.size[1];
int third = In.size[2];
int rowTiles = ceil(sqrt(third));
int colTiles = ceil(sqrt(third));
Out2d.create(rowTiles*rows, colTiles*cols, In.type());
Out2d = Scalar(0);
int thirdDimIdx = 0;
for (int i = 0; i < rowTiles; ++i)
{
for (int j = 0; j < colTiles; ++j, ++thirdDimIdx)
{
if (thirdDimIdx >= third)
{
break;
}
Mat roi(Out2d(cv::Rect(j*cols, i*rows, cols, rows)));
uint16_t *ind = (uint16_t*)In.data + thirdDimIdx * rows*cols; // sub-matrix pointer
cv::Mat subMatrix(2, In.size, In.type(), ind);
subMatrix.copyTo(roi);
}
}
}
Is there a better way to do this?