Related
I'm making an image editing program in c++ using sfml and tried to add image filters using:
int clamp(int value, int min, int max)
{
if (value < min)
return min;
if (value > max)
return max;
return value;
}
void MyImage::applyKernel(std::vector<std::vector<int>> kernel)
{
int index(0), tempx(0), tempy(0);
int wr(0), wg(0), wb(0), wa(0), sum(0);
auto newPixels = new sf::Uint8[this->size_y * this->size_x * 4];
// Calculate the sum of the kernel
for (int i = 0; i < kernel.size(); i++) {
for (int j = 0; j < kernel[i].size(); j++) {
sum += kernel[i][j];
}
}
for (int y = 0; y < this->size_y; y++) {
for (int x = 0; x < this->size_x; x++) {
/*
Calculate weighted sum from kernel
*/
wr = wg = wb = wa = 0;
for (int i = 0; i < kernel.size(); i++) {
for (int j = 0; j < kernel[i].size(); j++) {
/*
Calculates the coordinates of the kernel relative to the pixel we are changing
*/
tempx = x + (j - floor(kernel[i].size() / 2));
tempy = y + (i - floor(kernel.size() / 2));
//std::cout << "kernel=(" << j << ", " << i << "), pixel=(" << x << ", " << y << ") tempPos=(" << tempx << ", " << tempy << ")\n";
/*
This code below should have the effect of mirroring the image in the case the kernel coordinate is out of bounds (along the edge of the image)
*/
tempx = (tempx < 0) ? -1 * tempx : tempx;
tempy = (tempy < 0) ? -1 * tempy : tempy;
tempx = (tempx > this->size_x) ? x - (j - floor(kernel[i].size() / 2)) : tempx;
tempy = (tempy > this->size_y) ? y - (i - floor(kernel.size() / 2)) : tempy;
if (tempx >= 0 && tempx < this->size_x && tempy >= 0 && tempy < this->size_y) {
index = (((tempy * this->size_x) - tempy) + (tempx)) * 4;
wr += kernel[i][j] * this->pixels[index];
wg += kernel[i][j] * this->pixels[index + 1];
wb += kernel[i][j] * this->pixels[index + 2];
wa += kernel[i][j] * this->pixels[index + 3];
}
}
}
if (sum) {
wr /= sum;
wg /= sum;
wb /= sum;
wa /= sum;
}
index = (((y * this->size_x) - y) + (x)) * 4;
newPixels[index] = clamp(wr, 0, 255); // Red
newPixels[index + 1] = clamp(wg, 0, 255); // Green
newPixels[index + 2] = clamp(wb, 0, 255); // Blue
newPixels[index + 3] = clamp(wa, 0, 255); // Alpha
}
}
this->pixels = newPixels;
// Copies the data from our sf::Uint8 array to the image object to be displayed => Removes the overhead of calling setPixel(x,y,color) for every pixel {As a side note setPixel() should always be avoided}|
this->im->create(this->size_x, this->size_y, this->pixels);
}
I was trying to use [-1,-1,-1], [-1,8,-1]. [-1,-1,-1] for edge detection but just ended up with a white image except for some pixels near the bottom. I've tried different images and kernels out but any that add to 0 don't work. For example if I take the edge detection kernel above and change the 8 to a 9, it gives an expected result. Is there something wrong with my idea of how convolution kernels work or is it just a bug in my code?
Thank you.
So, I decided to create a simple Canny edge detector just as exercise before biting harder topics with image processing.
I tried to follow the typical path of Canny:
1. Grayscaling the image
2. Gaussian filter to blur the noise
3. Edge detection - I use both Sobel and Scharr
4. Edge thinning - I used non-maximum suppression in direction depending on gradient direction - vertical, horizontal, 45 diagonal or 135 diagonal
5. Hysteresis
I somehow managed to get it working with Scharr's detection but I have recurring problem with double or multiple edges, espacially with Sobel. I can't really find a set of parameters which will make it work.
My algorithm for Sobel:
void sobel(sf::Image &image, pixldata **garray, float division)
{
int t1 = 0, t2 = 0, t3 = 0, t4 = 0;
sf::Color color;
sf::Image bufor;
bufor.create(image.getSize().x, image.getSize().y, sf::Color::Cyan);
for (int i = 1;i < image.getSize().y - 1;i++)
{
for (int j = 1;j < image.getSize().x - 1;j++)
{
t1 = (- image.getPixel(j - 1, i - 1).r - 2 * image.getPixel(j - 1, i).r - image.getPixel(j - 1, i + 1).r + image.getPixel(j + 1, i - 1).r + 2 * image.getPixel(j + 1, i).r + image.getPixel(j + 1, i + 1).r) / division;
t2 = (- image.getPixel(j - 1, i).r - 2 * image.getPixel(j - 1, i + 1).r - image.getPixel(j, i + 1).r + image.getPixel(j + 1, i).r + 2 * image.getPixel(j + 1, i - 1).r + image.getPixel(j, i - 1).r) / division;
t3 = (- image.getPixel(j - 1, i + 1).r - 2 * image.getPixel(j, i + 1).r - image.getPixel(j + 1, i + 1).r + image.getPixel(j - 1, i - 1).r + 2 * image.getPixel(j, i - 1).r + image.getPixel(j + 1, i - 1).r) / division;
t4 = (- image.getPixel(j, i + 1).r - 2 * image.getPixel(j + 1, i + 1).r - image.getPixel(j + 1, i).r + image.getPixel(j - 1, i).r + 2 * image.getPixel(j - 1, i - 1).r + image.getPixel(j, i - 1).r) / division;
color.r = (abs(t1) + abs(t2) + abs(t3) + abs(t4));
color.g = (abs(t1) + abs(t2) + abs(t3) + abs(t4));
color.b = (abs(t1) + abs(t2) + abs(t3) + abs(t4));
garray[j][i].gx = t1;
garray[j][i].gy = t3;
garray[j][i].gtrue = sqrt(t1*t1 + t2*t2 + t3*t3 + t4*t4);
garray[j][i].gsimpl = sqrt(t1*t1 + t2*t2);
t1 = abs(t1);
t2 = abs(t2);
t3 = abs(t3);
t4 = abs(t4);
if (t1 > t4 && t1 > t3 && t1 > t2)
garray[j][i].fi = 0;
else if (t2 > t4 && t2 > t3 && t2 > t1)
garray[j][i].fi = 45;
else if (t3 > t4 && t3 > t2 && t3 > t1)
garray[j][i].fi = 90;
else if (t4 > t3 && t4 > t2 && t4 > t1)
garray[j][i].fi = 135;
else
garray[j][i].fi = 0;
if (sqrt(t1*t1 + t2*t2 + t3*t3 + t4*t4) < 0)
{
color.r = 0;
color.g = 0;
color.b = 0;
}
else if (sqrt(t1*t1 + t2*t2 + t3*t3 + t4*t4) > 255)
{
color.r = 255;
color.g = 255;
color.b = 255;
}
else
{
color.r = sqrt(t1*t1 + t2*t2 + t3*t3 + t4*t4);
color.g = sqrt(t1*t1 + t2*t2 + t3*t3 + t4*t4);
color.b = sqrt(t1*t1 + t2*t2 + t3*t3 + t4*t4);
}
bufor.setPixel(j, i, color);
}
}
image.copy(bufor, 0, 0);
}
Code for Scharr differs only in multiplying the pixels' values.
t1 = (-3 * image.getPixel(j - 1, i - 1).r - 10 * image.getPixel(j - 1, i).r - 3 * image.getPixel(j - 1, i + 1).r + 3 * image.getPixel(j + 1, i - 1).r + 10 * image.getPixel(j + 1, i).r + 3 * image.getPixel(j + 1, i + 1).r) / division;
t2 = (-3 * image.getPixel(j - 1, i).r - 10 * image.getPixel(j - 1, i + 1).r - 3 * image.getPixel(j, i + 1).r + 3 * image.getPixel(j + 1, i).r + 10 * image.getPixel(j + 1, i - 1).r + 3 * image.getPixel(j, i - 1).r) / division;
t3 = (-3 * image.getPixel(j - 1, i + 1).r - 10 * image.getPixel(j, i + 1).r - 3 * image.getPixel(j + 1, i + 1).r + 3 * image.getPixel(j - 1, i - 1).r + 10 * image.getPixel(j, i - 1).r + 3 * image.getPixel(j + 1, i - 1).r) / division;
t4 = (-3 * image.getPixel(j, i + 1).r - 10 * image.getPixel(j + 1, i + 1).r - 3 * image.getPixel(j + 1, i).r + 3 * image.getPixel(j - 1, i).r + 10 * image.getPixel(j - 1, i - 1).r + 3 * image.getPixel(j, i - 1).r) / division;
Thinning code:
void intelligentThin(sf::Image &image, int radius, pixldata **garray)
{
int xmax = image.getSize().x;
int ymax = image.getSize().y;
bool judgeandjury = true;
for (int i = 0;i < xmax;i++)
{
int leftBound = 0, rightBound = 0, ceilBound = 0, bottomBound = 0;
if (i < radius)
{
leftBound = 0;
rightBound = i + radius;
}
else if (i >= xmax - radius)
{
leftBound = i - radius;
rightBound = xmax - 1;
}
else
{
leftBound = i - radius;
rightBound = i + radius;
}
for (int j = 0;j < ymax;j++)
{
if (j < radius)
{
ceilBound = 0;
bottomBound = j + radius;
}
else if (j >= ymax - radius)
{
ceilBound = j - radius;
bottomBound = ymax - 1;
}
else
{
ceilBound = j - radius;
bottomBound = j + radius;
}
if (garray[i][j].fi == 0)
{
for (int t = leftBound; t <= rightBound; t++)
{
if ((image.getPixel(t, j).r >= image.getPixel(i, j).r) && (t != i))
{
judgeandjury = false;
}
}
}
else if (garray[i][j].fi == 135)
{
for (int l = leftBound, t = ceilBound; (l <= rightBound && t <= bottomBound); l++, t++)
{
if ((image.getPixel(l, t).r >= image.getPixel(i, j).r) && (t != j))
{
judgeandjury = false;
}
}
}
else if (garray[i][j].fi == 90)
{
for (int t = ceilBound; t <= bottomBound; t++)
{
if ((image.getPixel(i, t).r >= image.getPixel(i, j).r) && (t != j))
{
judgeandjury = false;
}
}
}
else if (garray[i][j].fi == 45)
{
for (int l = rightBound, t = ceilBound; (l >= leftBound && t <= bottomBound); l--, t++)
{
if ((image.getPixel(l, t).r >= image.getPixel(i, j).r) && (t != j))
{
judgeandjury = false;
}
}
}
if (judgeandjury == false)
{
image.setPixel(i, j, sf::Color::Black);
}
judgeandjury = true;
}
leftBound = rightBound = 0;
}
}
Hysteresis code:
void hysteresis(sf::Image &image, int radius, int uplevel, int lowlevel)
{
int xmax = image.getSize().x;
int ymax = image.getSize().y;
bool judgeandjury = false;
sf::Image bufor;
bufor.create(image.getSize().x, image.getSize().y, sf::Color::Cyan);
for (int i = 0;i < xmax;i++)
{
int leftBound = 0, rightBound = 0, ceilBound = 0, bottomBound = 0;
if (i < radius)
{
leftBound = 0;
rightBound = i + radius;
}
else if (i >= xmax - radius)
{
leftBound = i - radius;
rightBound = xmax - 1;
}
else
{
leftBound = i - radius;
rightBound = i + radius;
}
for (int j = 0;j < ymax;j++)
{
int currentPoint = image.getPixel(i, j).r;
if (j < radius)
{
ceilBound = 0;
bottomBound = j + radius;
}
else if (j >= ymax - radius)
{
ceilBound = j - radius;
bottomBound = ymax - 1;
}
else
{
ceilBound = j - radius;
bottomBound = j + radius;
}
if (currentPoint > uplevel)
{
judgeandjury = true;
}
else if (currentPoint > lowlevel)
{
for (int t = leftBound; t <= rightBound; t++)
{
for (int l = ceilBound; l <= bottomBound; l++)
{
if (image.getPixel(t, l).r > uplevel)
{
judgeandjury = true;
}
}
}
}
else judgeandjury = false;
if (judgeandjury == true)
{
bufor.setPixel(i, j, sf::Color::White);
}
else
{
bufor.setPixel(i, j, sf::Color::Black);
}
judgeandjury = false;
currentPoint = 0;
}
leftBound = rightBound = 0;
}
image.copy(bufor, 0, 0);
}
The results are quite unsatisfactionary for Sobel:
Thinning the Sobel
Sobel after hysteresis
With Scharr the results are way better:
Thinned Scharr
Scharr after hysteresis
Set of parameters:
#define thinsize 1
#define scharrDivision 1
#define sobelDivision 1
#define hysteresisRadius 1
#define level 40
#define hysteresisUpperLevelSobel 80
#define hysteresisLowerLevelSobel 60
#define hysteresisUpperLevelScharr 200
#define hysteresisLowerLevelScharr 100
As you can see, there is a problem with Sobel, which generate double edges. Scharr also generates some noise but I think it's acceptable. Of course, it always can get better, if someone could give some advice :)
What is the cause of this behaviour? Does it result from my mistakes or poor algorithms or maybe is it just a case of parameters?
EDIT:
posting main()
sf::Image imydz;
imydz.loadFromFile("lena.jpg");
int x = imydz.getSize().x;
int y = imydz.getSize().y;
pixldata **garray = new pixldata *[x];
for (int i = 0;i < x;i++)
{
garray[i] = new pixldata[y];
}
monochrome(imydz);
gauss(imydz, radius, sigma);
//sobel(imydz, garray, sobelDivision);
scharr(imydz, garray, scharrDivision);
intelligentThin(imydz, thinsize, garray);
hysteresis(imydz, hysteresisRadius, hysteresisUpperLevel, hysteresisLowerLevel);
Second edit - repaired suppression:
sf::Image bufor;
bufor.create(image.getSize().x, image.getSize().y, sf::Color::Black);
for (int i = 1;i < xmax - 1;i++)
{
for (int j = 1;j < ymax - 1;j++)
{
if (garray[i][j].fi == 0)
{
if (((image.getPixel(i, j).r >= image.getPixel(i + 1, j).r) && (image.getPixel(i, j).r > image.getPixel(i - 1, j).r)) ||
((image.getPixel(i, j).r > image.getPixel(i + 1, j).r) && (image.getPixel(i, j).r >= image.getPixel(i - 1, j).r)))
{
judgeandjury = true;
}
else judgeandjury = false;
}
...
if (judgeandjury == false)
{
bufor.setPixel(i, j, sf::Color::Black);
}
else bufor.setPixel(i, j, image.getPixel(i, j));
judgeandjury = false;
}
}
image.copy(bufor, 0, 0);
Repaired Scharr on Lena
It seems strange
Another test image - strange results
Before binarization
Ready gears
I haven't read your whole code in detail, there is much too much code there. But obviously your non-maximum suppression code is wrong. Let's look at what it does for one pixel in the middle of the image, where the gradient is close to 0 degrees:
leftBound = i - radius;
rightBound = i + radius;
// ...
for (int t = leftBound; t <= rightBound; t++)
{
if ((image.getPixel(t, j).r >= image.getPixel(i, j).r) && (t != i))
{
judgeandjury = false; // it's not a maximum: suppress
}
}
// ...
if (judgeandjury == false)
{
image.setPixel(i, j, sf::Color::Black);
}
Here, radius is set to 1 by the calling code. Any other value would be bad, so this is OK. I would remove that as a parameter altogether. Now your loop is:
for (int t = i-1; t <= t+1; t++)
if (t != i)
This means that you hit exactly two values of t. So this should of course be replaced with simpler code that does not loop, it will be more readable.
This is what it now does:
if ( (image.getPixel(i-1, j).r >= image.getPixel(i, j).r)
|| (image.getPixel(i+1, j).r >= image.getPixel(i, j).r)) {
judgeandjury = false; // it's not a maximum: suppress
}
So you suppress the pixel if it is not strictly larger than its neighbors. Looking back at the Wikipedia article, it seems that they suggest the same. But in fact, this is not correct, you want the point to be strictly larger than one of the two neighbors, and larger or equal to the other. This prevents the situation where the gradient happens to be equally strong on two neighboring pixels. The actual maximum can fall right in the middle of two pixels, yielding two pixels on this local maximum gradient with exactly the same value. But let's ignore this case for now, it is possible but not all that likely.
Next, you suppress the maximum... in the input image! This means that, when you reach the next pixel on this line, you will compare its value to this value that was just suppressed. Of course it will be larger, even though it was smaller than the original value at that location. That is, non-maxima will look like maxima because you put a neighboring pixel to 0.
So: write the result of the algorithm to an output image:
if (judgeandjury == true)
{
output.setPixel(i, j, image.getPixel(i, j));
}
...which of course you need to allocate, but you already know that.
Your second problem is in the sobel function, where you compute the gradient magnitude. It clips (saturates) the output. By cutting values of the output above 255 to 255, you create very broad lines along the edges of a constant value. The test of the non-maximum suppression is satisfied at the two edges of this line, but not in the middle, where pixels have the same value as both its neighbors.
To solve this, either:
Use a floating-point buffer to store the gradient magnitude. Here you don’t need to worry about data ranges.
Divide the magnitude by some value such that it will never exceed 255. Now you’re quantifying the magnitude rather than clipping it. Quantizing should be fine in this case.
I strongly recommend that you follow (1). I typically use floating-point—values images for everything, and only convert to 8-bit ints for display. This simplified a lot of things!
I am working on creating my own implementation of a separable sobel filter implementation. My function has as input the kernelSize, the horizontal filter of gradient Y as pixelsY1, the vertical filter of gradient Y as pixelsY2, the horizontal filter of gradient X as pixelsX1, the vertical filter of gradient X as pixelsX2.
The input of X1 is [1, 0, -1] (horizontal)
The input of X2 is [1, 2, 1] (vertical)
The input of Y1 is [1, 2, 1] (horizontal)
The input of Y2 is [1, 0 -1] (vertical)
void gradientFilter1D(Mat& img, int kernelSize, vector<double> pixelsY1, vector<double> pixelsY2, vector<double> pixelsX1, vector<double> pixelsX2)
{
int sumMin = INT_MAX, sumMax = INT_MIN;
//gradient X
vector<vector<int>> pixelsX(img.rows, vector<int>(img.cols, 0));
//gradient Y
vector<vector<int>> pixelsY(img.rows, vector<int>(img.cols, 0));
vector<vector<int>> sumArray(img.rows, vector<int>(img.cols, 0));
for (int j = kernelSize / 2; j < img.rows - kernelSize / 2; j++)
{
for (int i = kernelSize / 2; i < img.cols - kernelSize / 2; i++)
{
double totalX = 0;
double totalY = 0;
//this is the horizontal multiplication
for (int x = -kernelSize / 2; x <= kernelSize / 2; x++)
{
totalY += img.at<uchar>(j, i + x) * pixelsY1[x + (kernelSize / 2)];
totalX += img.at<uchar>(j, i + x) * pixelsX1[x + (kernelSize / 2)];
//cout << int(img.at<uchar>(j, i + x)) << " " << pixelsY1[x + (kernelSize / 2)] << endl;
}
pixelsX[j][i] = totalX;
pixelsY[j][i] = totalY;
}
}
for (int j = kernelSize / 2; j < img.rows - kernelSize / 2; j++)
{
for (int i = kernelSize / 2; i < img.cols - kernelSize / 2; i++)
{
double totalX = 0;
double totalY = 0;
//this is the vertical multiplication
for (int x = -kernelSize / 2; x <= kernelSize / 2; x++)
{
totalY += pixelsY[j + x][i] * pixelsY2[x + (kernelSize / 2)];
totalX += pixelsX[j + x][i] * pixelsX2[x + (kernelSize / 2)];
//cout << int(img.at<uchar>(j, i + x)) << " " << pixelsY1[x + (kernelSize / 2)] << endl;
}
pixelsX[j][i] = totalX;
pixelsY[j][i] = totalY;
}
}
for (int j = 0; j < img.rows; j++)
{
for (int i = 0; i < img.cols; i++)
{
int sum;
sum = sqrt(pow(pixelsX[j][i], 2) + pow(pixelsY[j][i], 2));
sumArray[j][i] = sum;
sumMin = sumMin < sum ? sumMin : sum;
sumMax = sumMax > sum ? sumMax : sum;
}
}
//normalization
for (int j = 0; j < img.rows; j++)
for (int i = 0; i < img.cols; i++)
{
sumArray[j][i] = (sumArray[j][i] - sumMin) * ((255.0 - 0) / (sumMax - sumMin)) + 0;
img.at<uchar>(j, i) = sumArray[j][i];
}
}
Input Image:
Output Image:
What am I doing wrong?
The separable filter is computed in what are effectively two passes. (The passes can be interleaved, but all the values used by the vertical filter have to have already been computed by the horizontal filter if doing it in that order.) Right below the comment //then here I do the vertical multiplication there are accesses to pixelsX and pixelsY that are effectively a second pass of the separable filter. The values being accessed for negative values of x have been previously computed and the ones for positive values of x have not yet been computed by the horizontal pass.
check out Halide. It makes this sort of code a lot easier and more performant. (A double nesting of std::vector is not a good way to go.)
Okay, so my mistake was actually in this
for (int j = kernelSize / 2; j < img.rows - kernelSize / 2; j++)
{
for (int i = kernelSize / 2; i < img.cols - kernelSize / 2; i++)
{
double totalX = 0;
double totalY = 0;
//this is the vertical multiplication
for (int x = -kernelSize / 2; x <= kernelSize / 2; x++)
{
totalY += pixelsY[j + x][i] * pixelsY2[x + (kernelSize / 2)];
totalX += pixelsX[j + x][i] * pixelsX2[x + (kernelSize / 2)];
//cout << int(img.at<uchar>(j, i + x)) << " " << pixelsY1[x + (kernelSize / 2)] << endl;
}
pixelsX[j][i] = totalX; <---- I overwrite the old values
pixelsY[j][i] = totalY; <--- I overwrite the old values
}
}
So, pixelsX[j][i] = totalX and so forth is wrong, because I need the old values in order to finish the computation in the rest of the j, and i loops. So, I created another vector of vectors and pushed in it the totalX's and Y's, and this solved my issue.
I'm implementing an algorithm, I excuse myself for the extreme for looping, haven't found a better way yet.
The problem is that at the second iteration at line 81 it gives a First-chance exception at 0x000000007707320E (ntdll.dll) in Test.exe: 0xC0000005: Access violation reading location 0xFFFFFFFFFFFFFFFF.
void co_hog(Mat image, int offset, int blockSize, int nrBins, int cat) {
Mat img_x;
Mat img_y;
IplImage img = image;
Mat kern_x = (Mat_<char>(1, 3) << -1, 0, 1);
Mat kern_y = (Mat_<char>(3, 1) << -1, 0, 1);
filter2D(image, img_x, image.depth(), kern_x);
filter2D(image, img_y, image.depth(), kern_y);
Size imageSize = image.size();
int nrBlocksY = imageSize.height / blockSize;
int nrBlocksX = imageSize.width / blockSize;
int degreePerBin = 180 / nrBins;
Mat gradients = Mat(image.size(), CV_32FC1);
Mat magnitudes = Mat(image.size(), CV_32FC1);
for(int y = 0; y < image.rows; y++) {
for(int x = 0; x < image.cols; x++) {
float grad_x = (float)img_x.at<uchar>(y, x);
float grad_y = (float)img_y.at<uchar>(y, x);
gradients.at<float>(y, x) = abs(atan2(grad_y, grad_x) * 180 / PI);
magnitudes.at<float>(y, x) = sqrt(pow(grad_x, 2) + pow(grad_y, 2));
}
}
int bin_1, bin_2, bin_3, bin_4;
double theta_1, theta_2, theta_3, theta_4;
Mat H;
stringstream line(stringstream::in | stringstream::out);
line << cat << " ";
int index = 1;
for(int i = 0; i < nrBlocksY; i++) {
for(int j = 0; j < nrBlocksX; j++) {
Mat coOccMat = Mat::zeros(nrBins, nrBins, CV_32FC1);
for(int q = i * blockSize; q < (i * blockSize) + blockSize; q++) {
for(int p = j * blockSize; p < (j * blockSize) + blockSize; p++) {
for(int offy = -offset; offy < offset; offy++) {
for(int offx = -offset; offx < offset; offx++) {
if((q + offy) >= imageSize.height || (p + offx) >= imageSize.width || (q + offy) < 0 || (p + offx) < 0) {
continue;
}
float m_1 = magnitudes.at<float>(q, p);
float m_2 = magnitudes.at<float>(q + offy, p + offx);
float alpha = gradients.at<float>(q, p);
float beta = gradients.at<float>(q + offy, p + offx);
if(fmod(alpha / degreePerBin, 1) > 0.5) {
bin_1 = floor(alpha / degreePerBin);
bin_2 = bin_1 + 1;
} else {
bin_2 = floor(alpha / degreePerBin);
bin_1 = bin_2 - 1;
}
if(fmod(beta / degreePerBin, 1) > 0.5) {
bin_3 = floor(beta / degreePerBin);
bin_4 = bin_3 + 1;
} else {
bin_4 = floor(beta / degreePerBin);
bin_3 = bin_4 - 1;
}
theta_1 = (bin_1 * degreePerBin) + (degreePerBin / 2);
theta_2 = (bin_2 * degreePerBin) + (degreePerBin / 2);
theta_3 = (bin_3 * degreePerBin) + (degreePerBin / 2);
theta_4 = (bin_4 * degreePerBin) + (degreePerBin / 2);
coOccMat.at<float>(bin_1, bin_3) += (m_1 * (1 - (alpha - theta_1) / (theta_2 - theta_1))) + (m_2 * (1 - (beta - theta_3) / (theta_4 - theta_1)));
coOccMat.at<float>(bin_1, bin_4) += (m_1 * (1 - (alpha - theta_1) / (theta_2 - theta_1))) + (m_2 * ((beta - theta_3) / (theta_4 - theta_1)));
coOccMat.at<float>(bin_2, bin_3) += (m_1 * ((alpha - theta_1) / (theta_2 - theta_1))) + (m_2 * (1 - (beta - theta_3) / (theta_4 - theta_1)));
coOccMat.at<float>(bin_2, bin_4) += (m_1 * ((alpha - theta_1) / (theta_2 - theta_1))) + (m_2 * ((beta - theta_3) / (theta_4 - theta_1)));
}
}
}
}
cout << coOccMat << endl;
-> Next statement to be called *passes the first time* H = coOccMat.reshape(0, 1);
normalize(H, H);
cout << H.size() << endl;
for(int i = 0; i < H.cols; ++i) {
for(int j = 0; j < H.rows; ++j) {
if(H.at<float>(j, i) > 0) {
line << index << ":" << H.at<float>(j, i) << " ";
}
index++;
}
}
cout << "Done" << index << endl;
}
}
}
Problem has been fixed, sometimes the value for a bin was set on -1 so it couldn't access it, debugging tools of visual studio couldn't point out where it went wrong.
For a project I'm writing some code to compute the HoG of some images, but I'm stuck with the fact that my orientations are only between 0 ~ 90 degrees, while using the atan2 function.
I'm guessing that this problem occurs due to the filter2D function of OpenCV but I'm not sure if this is the reason or that I'm doing something else wrong:
Vector<Vector<Mat_<float>>> HoG(Mat image) {
Mat img_x;
Mat img_y;
IplImage img = image;
Mat kern_x = (Mat_<char>(1, 3) << -1, 0, 1);
Mat kern_y = (Mat_<char>(3, 1) << -1, 0, 1);
filter2D(image, img_x, image.depth(), kern_x);
filter2D(image, img_y, image.depth(), kern_y);
Vector<Vector<Mat_<float>>> histograms;
for(int y = 0; y < image.rows - size; y += size) {
Vector<Mat_<float>> temp_hist;
for(int x = 0; x < image.cols - size; x += size) {
float total_mag = 0;
Mat hist = Mat::zeros(1, 8, CV_32FC1);
for(int i = y; i < y + size; ++i) {
for(int j = x; j < x + size; ++j) {
float grad_x = (float)img_x.at<uchar>(i, j);
float grad_y = (float)img_y.at<uchar>(i, j);
double ori = myatan2(grad_x, grad_y);
float mag = sqrt(pow(grad_x, 2) + pow(grad_y, 2));
int bin = round(ori/45);
hist.at<float>(0, (bin - 1 < 0 ? 7 : bin - 1)) += - (float)(ori - ((round(ori/45) - 1) * 45.0 + 22.5)) / 45.0f;
hist.at<float>(0, bin) += -(float)(ori - ((round(ori/45) - 1) * 45.0 + 22.5)) / 45.0f;
total_mag += mag;
}
}
// Normalize the histogram
for(int i = 0; i < 8; ++i) {
hist.at<float>(0, i) = hist.at<float>(0, i) / total_mag;
}
temp_hist.push_back(hist);
}
histograms.push_back(temp_hist);
}
return histograms;
}
If you have any other tips to increase a speed-up in my code or something else that is also welcome of course.
I notice this:
float grad_x = (float)img_x.at<uchar>(i, j);
float grad_y = (float)img_y.at<uchar>(i, j);
You seem to be using uchar. Should this not be char?