I am trying to do Sobel operator in the HSV dimension (told to do this in the HSV by my guide but I dont understand why it will work better on HSV than on RGB) .
I have built a function that converts from RGB to HSV . while I have some mediocre knowledge in C++ I am getting confused by the Image Processing thus I tried to keep the code as simple as possible , meaning I dont care (at this stage) about time nor space .
From looking on the results I got in gray levels bmp photos , my V and S seems to be fine but my H looks very gibbrish .
I got 2 questions here :
1. How a normal H photo in gray level should look a like comparing to the source photo ?
2. Where was I wrong in the code :
void RGBtoHSV(unsigned char image[][NUMBER_OF_COLUMNS][NUMBER_OF_COLORS],
float Him[][NUMBER_OF_COLUMNS],
float Vim[][NUMBER_OF_COLUMNS],
float Sim[][NUMBER_OF_COLUMNS])
{
double Rn, Gn, Bn;
double C;
double H, S, V;
for (int row = 0; row < NUMBER_OF_ROWS; row++)
{
for (int column = 0; column < NUMBER_OF_COLUMNS; column++)
{
Rn = (1.0*image[row][column][R]) / 255;
Gn = (1.0*image[row][column][G] )/ 255;
Bn = (1.0*image[row][column][B] )/ 255;
//double RGBn[3] = { Rn, Gn, Bn };
double max = Rn;
if (max < Gn) max = Gn;
if (max < Bn) max = Bn;
double min = Rn;
if (min > Gn) min = Gn;
if (min > Bn) min = Bn;
C = max - min;
H = 0;
if (max==0)
{
S = 0;
H = -1; //undifined;
V = max;
}
else
{
/* if (max == Rn)
H = (60.0* ((int)((Gn - Bn) / C) % 6));
else if (max == Gn)
H = 60.0*( (Bn - Rn)/C + 2);
else
H = 60.0*( (Rn - Gn)/C + 4);
*/
if (max == Rn)
H = ( 60.0* ( (Gn - Bn) / C) ) ;
else if (max == Gn)
H = 60.0*((Bn - Rn) / C + 2);
else
H = 60.0*((Rn - Gn) / C + 4);
V = max; //AKA lightness
S = C / max; //saturation
}
while (H < 0)
H += 360;
while (H>360)
H -= 360;
Him[row][column] = (float)H;
Vim[row][column] = (float)V;
Sim[row][column] = (float)S;
}
}
}
also my hsvtorgb :
void HSVtoRGB(unsigned char image[][NUMBER_OF_COLUMNS][NUMBER_OF_COLORS],
float Him[][NUMBER_OF_COLUMNS],
float Vim[][NUMBER_OF_COLUMNS],
float Sim[][NUMBER_OF_COLUMNS])
{
double R1, G1, B1;
double C;
double V;
double S;
double H;
int Htag;
double Htag2;
double x;
double m;
for (int row = 0; row < NUMBER_OF_ROWS; row++)
{
for (int column = 0; column < NUMBER_OF_COLUMNS; column++)
{
H = (double)Him[row][column];
S = (double)Sim[row][column];
V = (double)Vim[row][column];
C = V*S;
Htag = (int) (H / 60.0);
Htag2 = H/ 60.0;
//x = C*(1 - abs(Htag % 2 - 1));
double tmp1 = fmod(Htag2, 2);
double temp=(1 - abs(tmp1 - 1));
x = C*temp;
//switch (Htag)
switch (Htag)
{
case 0 :
R1 = C;
G1 = x;
B1 = 0;
break;
case 1:
R1 = x;
G1 = C;
B1 = 0;
break;
case 2:
R1 = 0;
G1 = C;
B1 = x;
break;
case 3:
R1 = 0;
G1 = x;
B1 = C;
break;
case 4:
R1 = x;
G1 = 0;
B1 = C;
break;
case 5:
R1 = C;
G1 = 0;
B1 = x;
break;
default:
R1 = 0;
G1 = 0;
B1 = 0;
break;
}
m = V - C;
//this is also good change I found
//image[row][column][R] = unsigned char( (R1 + m)*255);
//image[row][column][G] = unsigned char( (G1 + m)*255);
//image[row][column][B] = unsigned char( (B1 + m)*255);
image[row][column][R] = round((R1 + m) * 255);
image[row][column][G] = round((G1 + m) * 255);
image[row][column][B] = round((B1 + m) * 255);
}
}
}
void HSVfloattoGrayconvert(unsigned char grayimage[NUMBER_OF_ROWS] [NUMBER_OF_COLUMNS], float hsvimage[NUMBER_OF_ROWS][NUMBER_OF_COLUMNS], char hsv)
{
//grayimage , flaotimage , h/s/v
float factor;
if (hsv == 'h' || hsv == 'H') factor = (float) 1 / 360;
else factor = 1;
for (int row = 0; row < NUMBER_OF_ROWS; row++)
{
for (int column = 0; column < NUMBER_OF_COLUMNS; column++)
{
grayimage[row][column] = (unsigned char) (0.5f + 255.0f * (float)hsvimage[row][column] / factor);
}
}
}
and my main:
unsigned char ColorImage1[NUMBER_OF_ROWS][NUMBER_OF_COLUMNS] [NUMBER_OF_COLORS];
float Himage[NUMBER_OF_ROWS][NUMBER_OF_COLUMNS];
float Vimage[NUMBER_OF_ROWS][NUMBER_OF_COLUMNS];
float Simage[NUMBER_OF_ROWS][NUMBER_OF_COLUMNS];
unsigned char ColorImage2[NUMBER_OF_ROWS][NUMBER_OF_COLUMNS] [NUMBER_OF_COLORS];
unsigned char HimageGray[NUMBER_OF_ROWS][NUMBER_OF_COLUMNS];
unsigned char VimageGray[NUMBER_OF_ROWS][NUMBER_OF_COLUMNS];
unsigned char SimageGray[NUMBER_OF_ROWS][NUMBER_OF_COLUMNS];
unsigned char HAfterSobel[NUMBER_OF_ROWS][NUMBER_OF_COLUMNS];
unsigned char VAfterSobel[NUMBER_OF_ROWS][NUMBER_OF_COLUMNS];
unsigned char SAfterSobal[NUMBER_OF_ROWS][NUMBER_OF_COLUMNS];
unsigned char HSVcolorAfterSobal[NUMBER_OF_ROWS][NUMBER_OF_COLUMNS][NUMBER_OF_COLORS];
unsigned char RGBAfterSobal[NUMBER_OF_ROWS][NUMBER_OF_COLUMNS][NUMBER_OF_COLORS];
int KernelX[3][3] = {
{-1,0,+1}, {-2,0,2}, {-1,0,1 }
};
int KernelY[3][3] = {
{-1,-2,-1}, {0,0,0}, {1,2,1}
};
void main()
{
//work
LoadBgrImageFromTrueColorBmpFile(ColorImage1, "P22A.bmp");
// add noise
AddSaltAndPepperNoiseRGB(ColorImage1, 350, 255);
StoreBgrImageAsTrueColorBmpFile(ColorImage1, "saltandpepper.bmp");
AddGaussNoiseCPPstileRGB(ColorImage1, 0.0, 1.0);
StoreBgrImageAsTrueColorBmpFile(ColorImage1, "Saltandgauss.bmp");
//saves hsv in float array
RGBtoHSV(ColorImage1, Himage, Vimage, Simage);
//saves hsv float arrays in unsigned char arrays
HSVfloattoGrayconvert(HimageGray, Himage, 'h');
HSVfloattoGrayconvert(VimageGray, Vimage, 'v');
HSVfloattoGrayconvert(SimageGray, Simage, 's');
StoreGrayImageAsGrayBmpFile(HimageGray, "P22H.bmp");
StoreGrayImageAsGrayBmpFile(VimageGray, "P22V.bmp");
StoreGrayImageAsGrayBmpFile(SimageGray, "P22S.bmp");
WaitForUserPressKey();
}
edit : Changed Code + add sources for equations :
Soruce : for equations :
http://www.rapidtables.com/convert/color/hsv-to-rgb.htm
http://www.rapidtables.com/convert/color/rgb-to-hsv.htm
edit3:
listening to #gpasch advice and using better reference and deleting the mod6 I am now able to restore the RGB original photo!!! but unfortunately now my H photo in grayscale is even more chaotic than before .
I'll edit the code about so it will have more info about how I am saving the H grayscale photo .
That is the peril of going through garbage web sites; I suggest the following:
https://www.cs.rit.edu/~ncs/color/t_convert.html
That mod 6 seems fishy there.
You also need to make sure you understand that H is in degrees from 0 to 360; if your filter expects 0..1 you have the change.
I am trying to do Sobel operator in the HSV dimension (told to do this in the HSV by my guide but I dont understand why it will work better on HSV than on RGB)
It depends on what you are trying to achieve. If you're trying to do edge detection based on brightness for example, then just working with say the V channel might be simpler than processing all three channels of RGB and combining them afterwards.
How a normal H photo in gray level should look a like comparing to the source photo ?
You would see regions which are a similar colour appear as a similar shade of grey, and for a real-world scene you would still see gradients. But where there are spatially adjacent regions with colours far apart in hue, there would be a sharp jump. The shapes would generally be recognisable though.
Where was I wrong in the code :
There are two main problems with your code. The first is that the hue scaling in HSVfloattoGrayconvert is wrong. Your code is setting factor=1.0/360.0f but then dividing by the factor, which means it's multiplying by 360. If you simply multiply by the factor, it produces the expected output. This is because the earlier calculation uses normalised values (0..1) for S and V but angle in degrees for H, so you need to divide by 360 to normalise H.
Second, the conversion back to RGB has a problem, mainly to do with calculating Htag where you want the original value for calculating x but the floor only when switching on the sector.
Note that despite what #gpasch suggested, the mod 6 operation is actually correct. This is because the conversion you are using is based on the hexagonal colour space model for HSV, and this is used to determine which sector your colour is in. For a continuous model, you could use a radial conversion instead which is slightly different. Both are well explained on Wikipedia.
I took your code, added a few functions to generate input data and save output files so it is completely standalone, and fixed the bugs above while making minimal changes to the source.
Given the following generated input image:
the Hue channel extracted is:
The saturation channel is:
and finally value:
After fixing up the HSV to RGB conversion, I verified that the resulting output image matches the original.
The updated code is below (as mentioned above, changed minimally to make a standalone test):
#include <string>
#include <cmath>
#include <cstdlib>
enum ColorIndex
{
R = 0,
G = 1,
B = 2,
};
namespace
{
const unsigned NUMBER_OF_COLUMNS = 256;
const unsigned NUMBER_OF_ROWS = 256;
const unsigned NUMBER_OF_COLORS = 3;
};
void RGBtoHSV(unsigned char image[][NUMBER_OF_COLUMNS][NUMBER_OF_COLORS],
float Him[][NUMBER_OF_COLUMNS],
float Vim[][NUMBER_OF_COLUMNS],
float Sim[][NUMBER_OF_COLUMNS])
{
double Rn, Gn, Bn;
double C;
double H, S, V;
for (int row = 0; row < NUMBER_OF_ROWS; row++)
{
for (int column = 0; column < NUMBER_OF_COLUMNS; column++)
{
Rn = image[row][column][R] / 255.0;
Gn = image[row][column][G] / 255.0;
Bn = image[row][column][B] / 255.0;
double max = Rn;
if (max < Gn) max = Gn;
if (max < Bn) max = Bn;
double min = Rn;
if (min > Gn) min = Gn;
if (min > Bn) min = Bn;
C = max - min;
H = 0;
if (max==0)
{
S = 0;
H = 0; // Undefined
V = max;
}
else
{
if (max == Rn)
H = 60.0*fmod((Gn - Bn) / C, 6.0);
else if (max == Gn)
H = 60.0*((Bn - Rn) / C + 2);
else
H = 60.0*((Rn - Gn) / C + 4);
V = max; //AKA lightness
S = C / max; //saturation
}
while (H < 0)
H += 360.0;
while (H > 360)
H -= 360.0;
Him[row][column] = (float)H;
Vim[row][column] = (float)V;
Sim[row][column] = (float)S;
}
}
}
void HSVtoRGB(unsigned char image[][NUMBER_OF_COLUMNS][NUMBER_OF_COLORS],
float Him[][NUMBER_OF_COLUMNS],
float Vim[][NUMBER_OF_COLUMNS],
float Sim[][NUMBER_OF_COLUMNS])
{
double R1, G1, B1;
double C;
double V;
double S;
double H;
double Htag;
double x;
double m;
for (int row = 0; row < NUMBER_OF_ROWS; row++)
{
for (int column = 0; column < NUMBER_OF_COLUMNS; column++)
{
H = (double)Him[row][column];
S = (double)Sim[row][column];
V = (double)Vim[row][column];
C = V*S;
Htag = H / 60.0;
double x = C*(1.0 - fabs(fmod(Htag, 2.0) - 1.0));
int i = floor(Htag);
switch (i)
{
case 0 :
R1 = C;
G1 = x;
B1 = 0;
break;
case 1:
R1 = x;
G1 = C;
B1 = 0;
break;
case 2:
R1 = 0;
G1 = C;
B1 = x;
break;
case 3:
R1 = 0;
G1 = x;
B1 = C;
break;
case 4:
R1 = x;
G1 = 0;
B1 = C;
break;
case 5:
R1 = C;
G1 = 0;
B1 = x;
break;
default:
R1 = 0;
G1 = 0;
B1 = 0;
break;
}
m = V - C;
image[row][column][R] = round((R1 + m) * 255);
image[row][column][G] = round((G1 + m) * 255);
image[row][column][B] = round((B1 + m) * 255);
}
}
}
void HSVfloattoGrayconvert(unsigned char grayimage[][NUMBER_OF_COLUMNS], float hsvimage[][NUMBER_OF_COLUMNS], char hsv)
{
//grayimage , flaotimage , h/s/v
float factor;
if (hsv == 'h' || hsv == 'H') factor = 1.0f/360.0f;
else factor = 1.0f;
for (int row = 0; row < NUMBER_OF_ROWS; row++)
{
for (int column = 0; column < NUMBER_OF_COLUMNS; column++)
{
grayimage[row][column] = (unsigned char) (0.5f + 255.0f * (float)hsvimage[row][column] * factor);
}
}
}
int KernelX[3][3] = {
{-1,0,+1}, {-2,0,2}, {-1,0,1 }
};
int KernelY[3][3] = {
{-1,-2,-1}, {0,0,0}, {1,2,1}
};
void GenerateTestImage(unsigned char image[][NUMBER_OF_COLUMNS][NUMBER_OF_COLORS])
{
for (unsigned y = 0; y < NUMBER_OF_ROWS; y++)
{
for (unsigned x = 0; x < NUMBER_OF_COLUMNS; x++)
{
image[y][x][R] = x % 256;
image[y][x][G] = y % 256;
image[y][x][B] = (255-x) % 256;
}
}
}
void GenerateTestImage(unsigned char image[][NUMBER_OF_COLUMNS])
{
for (unsigned y = 0; y < NUMBER_OF_ROWS; y++)
{
for (unsigned x = 0; x < NUMBER_OF_COLUMNS; x++)
{
image[x][y] = x % 256;
}
}
}
// Color (three channel) images
void SaveImage(unsigned char image[][NUMBER_OF_COLUMNS][NUMBER_OF_COLORS], const std::string& filename)
{
FILE* fp = fopen(filename.c_str(), "w");
fprintf(fp, "P6\n%u %u\n255\n", NUMBER_OF_COLUMNS, NUMBER_OF_ROWS);
fwrite(image, NUMBER_OF_COLORS, NUMBER_OF_ROWS*NUMBER_OF_COLUMNS, fp);
fclose(fp);
}
// Grayscale (single channel) images
void SaveImage(unsigned char image[][NUMBER_OF_COLUMNS], const std::string& filename)
{
FILE* fp = fopen(filename.c_str(), "w");
fprintf(fp, "P5\n%u %u\n255\n", NUMBER_OF_COLUMNS, NUMBER_OF_ROWS);
fwrite(image, 1, NUMBER_OF_ROWS*NUMBER_OF_COLUMNS, fp);
fclose(fp);
}
unsigned char ColorImage1[NUMBER_OF_ROWS][NUMBER_OF_COLUMNS][NUMBER_OF_COLORS];
unsigned char Himage[NUMBER_OF_ROWS][NUMBER_OF_COLUMNS];
unsigned char Simage[NUMBER_OF_ROWS][NUMBER_OF_COLUMNS];
unsigned char Vimage[NUMBER_OF_ROWS][NUMBER_OF_COLUMNS];
float HimageGray[NUMBER_OF_ROWS][NUMBER_OF_COLUMNS];
float SimageGray[NUMBER_OF_ROWS][NUMBER_OF_COLUMNS];
float VimageGray[NUMBER_OF_ROWS][NUMBER_OF_COLUMNS];
int main()
{
// Test input
GenerateTestImage(ColorImage1);
SaveImage(ColorImage1, "test_input.ppm");
//saves hsv in float array
RGBtoHSV(ColorImage1, HimageGray, VimageGray, SimageGray);
//saves hsv float arrays in unsigned char arrays
HSVfloattoGrayconvert(Himage, HimageGray, 'h');
HSVfloattoGrayconvert(Vimage, VimageGray, 'v');
HSVfloattoGrayconvert(Simage, SimageGray, 's');
SaveImage(Himage, "P22H.pgm");
SaveImage(Vimage, "P22V.pgm");
SaveImage(Simage, "P22S.pgm");
// Convert back to get the original test image
HSVtoRGB(ColorImage1, HimageGray, VimageGray, SimageGray);
SaveImage(ColorImage1, "test_output.ppm");
return 0;
}
The input image was generated by a very simple algorithm which gives us gradients in each dimension, so we can easily inspect and verify the expected output. I used ppm/pgm files as they are simpler to write and more portable than BMP.
Hope this helps - let me know if you have any questions.
Related
I'm starting with my c++ threads and don't understand some basic stuff. That's Mandelbrot example, it generates fractal image.
It's not my code, I just did some changes (here's original: https://rosettacode.org/wiki/Mandelbrot_set#PPM_non_interactive)
I have this function which generates matrix with colors to save to file:
vector<unsigned char *> drawMandelbrot()
{
/* screen ( integer) coordinate */
int iX, iY;
double Cx, Cy;
const double CxMin = -2.5;
const double CxMax = 1.5;
const double CyMin = -2.0;
const double CyMax = 2.0;
double PixelWidth = (CxMax - CxMin) / iXmax;
double PixelHeight = (CyMax - CyMin) / iYmax;
int Index = 0;
const int IterationMax = 200;
unsigned char color[3];
vector<unsigned char *> rows(MaxIndex);
double Zx, Zy;
double Zx2, Zy2;
int Iteration;
const double EscapeRadius = 2;
double ER2 = EscapeRadius * EscapeRadius;
for (iY = 0; iY < iYmax; iY++)
{
Cy = CyMin + iY * PixelHeight;
if (fabs(Cy) < PixelHeight / 2)
Cy = 0.0; /* Main antenna */
for (iX = 0; iX < iXmax; iX++)
{
Cx = CxMin + iX * PixelWidth;
/* initial value of orbit = critical point Z= 0 */
Zx = 0.0;
Zy = 0.0;
Zx2 = Zx * Zx;
Zy2 = Zy * Zy;
/* */
for (Iteration = 0; Iteration < IterationMax && ((Zx2 + Zy2) < ER2); Iteration++)
{
Zy = 2 * Zx * Zy + Cy;
Zx = Zx2 - Zy2 + Cx;
Zx2 = Zx * Zx;
Zy2 = Zy * Zy;
};
/* compute pixel color (24 bit = 3 bytes) */
if (Iteration == IterationMax)
{ /* interior of Mandelbrot set = black */
color[0] = 0;
color[1] = 0;
color[2] = 0;
}
else
{ /* exterior of Mandelbrot set = white */
color[0] = 255; /* Red*/
color[1] = 255; /* Green */
color[2] = 255; /* Blue */
};
rows[Index] = color;
Index++;
}
}
return rows;
}
Here is function to save it to file:
void saveToFile(vector<unsigned char *> matrix, char *filename)
{
char *comment = (char *)"# "; /* comment should start with # */
FILE *file;
file = fopen(filename, "wb"); /* b - binary mode */
fprintf(file, "P6\n %s\n %d\n %d\n %d\n", comment, iXmax, iYmax, MaxColorComponentValue);
for (int Index = 0; Index < MaxIndex; Index++)
{
fwrite(matrix[Index], 1, 3, file);
}
fclose(file);
}
Some global values and main loop:
const int iXmax = 1000;
const int iYmax = 1000;
const int MaxColorComponentValue = 255;
int const MaxIndex = (iXmax * iYmax) - 1;
int main()
{
clock_t start = clock();
vector<unsigned char *> image = drawMandelbrot();
clock_t stop = clock();
cout << (double(stop - start) / CLOCKS_PER_SEC) << " seconds\n";
char *filename = (char *)"new2.ppm";
saveToFile(image,filename);
return 0;
}
Problem is that generateMandelbrot() returns matrix like this:
image matrix
but it should be vector of elements looks like this which is actually color value:
color char
I know the problems is with color and image values types, but have any idea how it should look like.
Thanks!
This:
rows[Index] = color;
Is assigning the unsigned char * in your vector to the same array every time!
In other words it's like if I sell you ten cars and deliver the keys but they are all identical keys to the same car. Wouldn't you be upset?
Change your variables to use std::array:
using Color = std::array<unsigned char, 3>;
Color color;
vector<Color> rows(MaxIndex);
Now you have a vector of triples (Colors), instead of a vector of pointers that all point at the same triple.
Is there a way to scale down with highest quality a font which is png image in opengl at startup? I tried gluScaleImage but there are many artefacts. Is there anything that uses lanczos or something like that? I don't want to write a shader or anything that does the scaling runtime.
This is based on an algorithm, I copied decades ago from the German c't Magazin, and still use it from time to time for similar issues like described by OP.
bool scaleDown(
const Image &imgSrc,
Image &imgDst,
int w, int h,
int align)
{
const int wSrc = imgSrc.w(), hSrc = imgSrc.h();
assert(w > 0 && w <= wSrc && h > 0 && h <= hSrc);
// compute scaling factors
const double sx = (double)wSrc / (double)w;
const double sy = (double)hSrc / (double)h;
const double sxy = sx * sy;
// prepare destination image
imgDst.resize(w, h, (w * 3 + align - 1) / align * align);
// cache some data
const uint8 *const dataSrc = imgSrc.data();
const int bPRSrc = imgSrc.bPR();
// perform scaling
for (int y = 0; y < h; ++y) {
const double yStart = sy * y;
const double yEnd = std::min(sy * (y + 1), (double)hSrc);
const int yStartInt = (int)yStart;
const int yEndInt = (int)yEnd - (yEndInt == yEnd);
const double tFrm = 1 + yStartInt - yStart, bFrm = yEnd - yEndInt;
for (int x = 0; x < w; ++x) {
const double xStart = sx * x;
const double xEnd = std::min(sx * (x + 1), (double)wSrc);
const int xStartInt = (int)xStart;
const int xEndInt = (int)xEnd - (xEndInt == xEnd);
double lFrm = 1 + xStartInt - xStart, rFrm = xEnd - xEndInt;
double pixel[3] = { 0.0, 0.0, 0.0 }; // values of target pixel
for (int i = yStartInt; i <= yEndInt; ++i) {
int jData = i * bPRSrc + xStartInt * 3;
for (int j = xStartInt; j <= xEndInt; ++j) {
double pixelAdd[3];
for (int k = 0; k < 3; ++k) {
pixelAdd[k] = (double)dataSrc[jData++] / sxy;
}
if (j == xStartInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= lFrm;
} else if (j == xEndInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= rFrm;
}
if (i == yStartInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= tFrm;
} else if (i == yEndInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= bFrm;
}
for (int k = 0; k < 3; ++k) pixel[k] += pixelAdd[k];
}
}
imgDst.setPixel(x, y,
(uint8)pixel[0], (uint8)pixel[1], (uint8)pixel[2]);
}
}
// done
return true;
}
If I got it right, this implements a bilinear interpolation.
I don't dare to call it a Minimal Complete Verifiable Example although this is what I intended to do.
The complete sample application:
A simplified class Image
image.h:
#ifndef IMAGE_H
#define IMAGE_H
#include <vector>
// convenience type for bytes
typedef unsigned char uint8;
// image helper class
class Image {
private: // variables:
int _w, _h; // image size
size_t _bPR; // bytes per row
std::vector<uint8> _data; // image data
public: // methods:
// constructor.
Image(): _w(0), _h(0), _bPR(0) { }
// destructor.
~Image() = default;
// copy constructor.
Image(const Image&) = delete; // = default; would work as well.
// copy assignment.
Image& operator=(const Image&) = delete; // = default; would work as well.
// returns width of image.
int w() const { return _w; }
// returns height of image.
int h() const { return _h; }
// returns bytes per row.
size_t bPR() const { return _bPR; }
// returns pointer to image data.
const uint8* data(
int y = 0) // row number
const {
return &_data[y * _bPR];
}
// returns data size (in bytes).
size_t size() const { return _data.size(); }
// clears image.
void clear();
// resizes image.
uint8* resize( // returns allocated buffer
int w, // image width
int h, // image height
int bPR); // bytes per row
// returns pixel.
int getPixel(
int x, // column
int y) // row
const;
// sets pixel.
void setPixel(
int x, // column
int y, // row
uint8 r, uint8 g, uint8 b);
// sets pixel.
void setPixel(
int x, // column
int y, // row
int value) // RGB value
{
setPixel(x, y, value & 0xff, value >> 8 & 0xff, value >> 16 & 0xff);
}
};
// helper functions:
inline uint8 getR(int value) { return value & 0xff; }
inline uint8 getG(int value) { return value >> 8 & 0xff; }
inline uint8 getB(int value) { return value >> 16 & 0xff; }
#endif // IMAGE_H
image.cc:
#include <cassert>
#include "image.h"
// clears image.
void Image::clear()
{
_data.clear(); _w = _h = _bPR = 0;
}
// allocates image data.
uint8* Image::resize( // returns allocated buffer
int w, // image width
int h, // image height
int bPR) // bits per row
{
assert(w >= 0 && 3 * w <= bPR);
assert(h >= 0);
_w = w; _h = h; _bPR = bPR;
const size_t size = h * bPR;
_data.resize(size);
return _data.data();
}
// returns pixel.
int Image::getPixel(
int x, // column
int y) // row
const {
assert(x >= 0 && x < _w);
assert(y >= 0 && y < _h);
const size_t offs = y * _bPR + 3 * x;
return _data[offs + 0]
| _data[offs + 1] << 8
| _data[offs + 2] << 16;
}
// sets pixel.
void Image::setPixel(
int x, // column
int y, // row
uint8 r, uint8 g, uint8 b) // R, G, B values
{
assert(x >= 0 && x < _w);
assert(y >= 0 && y < _h);
const size_t offs = y * _bPR + 3 * x;
_data[offs + 0] = r;
_data[offs + 1] = g;
_data[offs + 2] = b;
}
Image Scaling
imageScale.h:
#ifndef IMAGE_SCALE_H
#define IMAGE_SCALE_H
#include "image.h"
/* scales an image to a certain width and height.
*
* Note:
* imgSrc and imgDst may not be identical.
*/
bool scaleTo( // returns true if successful
const Image &imgSrc, // source image
Image &imgDst, // destination image
int w, int h, // destination width and height
int align = 4); // row alignment
/* scales an image about a certain horizontal/vertical scaling factor.
*
* Note:
* imgSrc and imgDst may not be identical.
*/
inline bool scaleXY( // returns true if successful
const Image &imgSrc, // source image
Image &imgDst, // destination image
double sX, // horizontal scaling factor (must be > 0 but not too large)
double sY, // vertical scaling factor (must be > 0 but not too large)
int align = 4) // row alignment
{
return sX > 0.0 && sY > 0.0
? scaleTo(imgSrc, imgDst,
(int)(sX * imgSrc.w()), (int)(sY * imgSrc.h()), align)
: false;
}
/* scales an image about a certain scaling factor.
*
* Note:
* imgSrc and imgDst may not be identical.
*/
inline bool scale( // returns true if successful
const Image &imgSrc, // source image
Image &imgDst, // destination image
double s, // scaling factor (must be > 0 but not too large)
int align = 4) // row alignment
{
return scaleXY(imgSrc, imgDst, s, s, align);
}
#endif // IMAGE_SCALE_H
imageScale.cc:
#include <cassert>
#include <algorithm>
#include "imageScale.h"
namespace {
template <typename VALUE>
VALUE clip(VALUE value, VALUE min, VALUE max)
{
return value < min ? min : value > max ? max : value;
}
bool scaleDown(
const Image &imgSrc,
Image &imgDst,
int w, int h,
int align)
{
const int wSrc = imgSrc.w(), hSrc = imgSrc.h();
assert(w > 0 && w <= wSrc && h > 0 && h <= hSrc);
// compute scaling factors
const double sx = (double)wSrc / (double)w;
const double sy = (double)hSrc / (double)h;
const double sxy = sx * sy;
// prepare destination image
imgDst.resize(w, h, (w * 3 + align - 1) / align * align);
// cache some data
const uint8 *const dataSrc = imgSrc.data();
const int bPRSrc = imgSrc.bPR();
// perform scaling
for (int y = 0; y < h; ++y) {
const double yStart = sy * y;
const double yEnd = std::min(sy * (y + 1), (double)hSrc);
const int yStartInt = (int)yStart;
const int yEndInt = (int)yEnd - (yEndInt == yEnd);
const double tFrm = 1 + yStartInt - yStart, bFrm = yEnd - yEndInt;
for (int x = 0; x < w; ++x) {
const double xStart = sx * x;
const double xEnd = std::min(sx * (x + 1), (double)wSrc);
const int xStartInt = (int)xStart;
const int xEndInt = (int)xEnd - (xEndInt == xEnd);
double lFrm = 1 + xStartInt - xStart, rFrm = xEnd - xEndInt;
double pixel[3] = { 0.0, 0.0, 0.0 }; // values of target pixel
for (int i = yStartInt; i <= yEndInt; ++i) {
int jData = i * bPRSrc + xStartInt * 3;
for (int j = xStartInt; j <= xEndInt; ++j) {
double pixelAdd[3];
for (int k = 0; k < 3; ++k) {
pixelAdd[k] = (double)dataSrc[jData++] / sxy;
}
if (j == xStartInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= lFrm;
} else if (j == xEndInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= rFrm;
}
if (i == yStartInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= tFrm;
} else if (i == yEndInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= bFrm;
}
for (int k = 0; k < 3; ++k) pixel[k] += pixelAdd[k];
}
}
imgDst.setPixel(x, y,
(uint8)pixel[0], (uint8)pixel[1], (uint8)pixel[2]);
}
}
// done
return true;
}
bool scaleUp(
const Image &imgSrc,
Image &imgDst,
int w, int h,
int align)
{
const int wSrc = imgSrc.w(), hSrc = imgSrc.h();
assert(w && w >= wSrc && h && h >= hSrc);
// compute scaling factors
const double sx = (double)wSrc / (double)w;
const double sy = (double)hSrc / (double)h;
// prepare destination image
imgDst.resize(w, h, (w * 3 + align - 1) / align * align);
// cache some data
const uint8 *const dataSrc = imgSrc.data();
const int bPRSrc = imgSrc.bPR();
// perform scaling
for (int y = 0; y < h; ++y) {
const double yStart = sy * y;
const double yEnd = std::min(sy * (y + 1), (double)hSrc - 1);
const int yStartInt = (int)yStart;
const int yEndInt = (int)yEnd;
if (yStartInt < yEndInt) {
const double bFract = clip((double)((yEnd - yEndInt) / sy), 0.0, 1.0);
const double tFract = 1.0 - bFract;
for (int x = 0; x < w; ++x) {
const double xStart = sx * x;
const double xEnd = std::min(sx * (x + 1), (double)wSrc - 1);
const int xStartInt = (int)xStart, xEndInt = (int)xEnd;
double pixel[4];
if (xStartInt < xEndInt) {
const double rFract
= clip((double)((xEnd - xEndInt) / sx), 0.0, 1.0);
const double lFract = 1.0 - rFract;
int jData = yStartInt * bPRSrc + xStartInt * 3;
for (int k = 0; k < 3; ++k) {
pixel[k] = tFract * lFract * dataSrc[jData++];
}
for (int k = 0; k < 3; ++k) {
pixel[k] += tFract * rFract * dataSrc[jData++];
}
jData = yEndInt * bPRSrc + xStartInt * 3;
for (int k = 0; k < 3; ++k) {
pixel[k] += bFract * lFract *dataSrc[jData++];
}
for (int k = 0; k < 3; ++k) {
pixel[k] += bFract * rFract *dataSrc[jData++];
}
} else {
int jData = yStartInt * bPRSrc + xStartInt * 3;
for (int k = 0; k < 3; ++k) {
pixel[k] = tFract * dataSrc[jData++];
}
jData = yEndInt * bPRSrc + xStartInt * 3;
for (int k = 0; k < 3; ++k) {
pixel[k] += bFract * dataSrc[jData++];
}
}
imgDst.setPixel(x, y,
(uint8)pixel[0], (uint8)pixel[1], (uint8)pixel[2]);
}
} else {
for (int x = 0; x < w; ++x) {
const double xStart = sx * x;
const double xEnd = std::min(sx * (x + 1), (double)wSrc - 1);
const int xStartInt = (int)xStart, xEndInt = (int)xEnd;
double pixel[3];
if (xStartInt < xEndInt) {
const double rFract
= clip((double)((xEnd - xEndInt) / sx), 0.0, 1.0);
const double lFract = 1.0 - rFract;
int jData = yStartInt * bPRSrc + xStartInt * 3;
for (int k = 0; k < 3; ++k) {
pixel[k] = lFract * dataSrc[jData++];
}
for (int k = 0; k < 3; ++k) {
pixel[k] += rFract * dataSrc[jData++];
}
} else {
int jData = yStartInt * bPRSrc + xStartInt * 3;
for (int k = 0; k < 3; ++k) pixel[k] = dataSrc[jData++];
}
imgDst.setPixel(x, y,
(uint8)pixel[0], (uint8)pixel[1], (uint8)pixel[2]);
}
}
}
// done
return true;
}
} // namespace
bool scaleTo(const Image &imgSrc, Image &imgDst, int w, int h, int align)
{
Image imgTmp;
return w <= 0 || h <= 0 ? false
: w >= imgSrc.w() && h >= imgSrc.h()
? scaleUp(imgSrc, imgDst, w, h, align)
: w <= imgSrc.w() && h <= imgSrc.h()
? scaleDown(imgSrc, imgDst, w, h, align)
: w >= imgSrc.w()
? scaleUp(imgSrc, imgTmp, w, imgSrc.h(), 1)
&& scaleDown(imgTmp, imgDst, w, h, align)
: scaleDown(imgSrc, imgTmp, w, imgSrc.h(), 1)
&& scaleUp(imgTmp, imgDst, w, h, align);
}
PPM file IO
imagePPM.h:
#ifndef IMAGE_PPM_H
#define IMAGE_PPM_H
#include <iostream>
#include "image.h"
// reads a binary PPM file.
bool readPPM( // returns true if successful
std::istream &in, // input stream (must be opened with std::ios::binary)
Image &img, // image to read into
int align = 4); // row alignment
// writes binary PPM file.
bool writePPM( // returns true if successful
std::ostream &out, // output stream (must be opened with std::ios::binary)
const Image &img); // image to write from
#endif // IMAGE_PPM_H
imagePPM.cc:
#include <sstream>
#include <string>
#include "imagePPM.h"
// reads a binary PPM file.
bool readPPM( // returns true if successful
std::istream &in, // input stream (must be opened with std::ios::binary)
Image &img, // image to read into
int align) // row alignment
{
// parse header
std::string buffer;
if (!getline(in, buffer)) return false;
if (buffer != "P6") {
std::cerr << "Wrong header! 'P6' expected.\n";
return false;
}
int w = 0, h = 0, t = 0;
for (int i = 0; i < 3;) {
if (!getline(in, buffer)) return false;
if (buffer.empty()) continue; // skip empty lines
if (buffer[0] == '#') continue; // skip comments
std::istringstream str(buffer);
switch (i) {
case 0:
if (!(str >> w)) continue;
++i;
case 1:
if (!(str >> h)) continue;
++i;
case 2:
if (!(str >> t)) continue;
++i;
}
}
if (t != 255) {
std::cerr << "Unsupported format! t = 255 expected.\n";
return false;
}
// allocate image buffer
uint8 *data = img.resize(w, h, (w * 3 + align - 1) / align * align);
// read data
for (int i = 0; i < h; ++i) {
if (!in.read((char*)data, 3 * img.w())) return false;
data += img.bPR();
}
// done
return true;
}
// writes binary PPM file.
bool writePPM( // returns true if successful
std::ostream &out, // output stream (must be opened with std::ios::binary)
const Image &img) // image to write from
{
// write header
if (!(out << "P6\n" << img.w() << ' ' << img.h() << " 255\n")) return false;
// write image data
for (size_t y = 0; y < img.h(); ++y) {
const uint8 *const data = img.data(y);
if (!out.write((const char*)data, 3 * img.w())) return false;
}
// done
return true;
}
The main application
scaleRGBImg.cc:
#include <iostream>
#include <fstream>
#include <string>
#include "image.h"
#include "imagePPM.h"
#include "imageScale.h"
int main(int argc, char **argv)
{
// read command line arguments
if (argc <= 3) {
std::cerr << "Missing arguments!\n";
std::cout
<< "Usage:\n"
<< " scaleRGBImg IN_FILE SCALE OUT_FILE\n";
return 1;
}
const std::string inFile = argv[1];
char *end;
const double s = std::strtod(argv[2], &end);
if (end == argv[2] || *end != '\0') {
std::cerr << "Invalid scale factor '" << argv[2] << "'!\n";
return 1;
}
if (s <= 0.0) {
std::cerr << "Invalid scale factor " << s << "!\n";
return 1;
}
const std::string outFile = argv[3];
// read image
Image imgSrc;
{ std::ifstream fIn(inFile.c_str(), std::ios::binary);
if (!readPPM(fIn, imgSrc)) {
std::cerr << "Reading '" << inFile << "' failed!\n";
return 1;
}
}
// scale image
Image imgDst;
if (!scale(imgSrc, imgDst, s)) {
std::cerr << "Scaling failed!\n";
return 1;
}
// write image
{ std::ofstream fOut(outFile.c_str(), std::ios::binary);
if (!writePPM(fOut, imgDst) || (fOut.close(), !fOut.good())) {
std::cerr << "Writing '" << outFile << "' failed!\n";
return 1;
}
}
// done
return 0;
}
Test
Compiled in cygwin64:
$ g++ -std=c++11 -o scaleRGBImg scaleRGBImg.cc image.cc imagePPM.cc imageScale.cc
$
A sample image test.ppm for a test – converted to PPM in GIMP:
Test with the sample image:
$ for I in 0.8 0.6 0.4 0.2 ; do echo ./scaleRGBImg test.ppm $I test.$I.ppm ; done
./scaleRGBImg test.ppm 0.8 test.0.8.ppm
./scaleRGBImg test.ppm 0.6 test.0.6.ppm
./scaleRGBImg test.ppm 0.4 test.0.4.ppm
./scaleRGBImg test.ppm 0.2 test.0.2.ppm
$ for I in 0.8 0.6 0.4 0.2 ; do ./scaleRGBImg test.ppm $I test.$I.ppm ; done
$
This is what came out:
test.0.8.ppm:
test.0.6.ppm:
test.0.4.ppm:
test.0.2.ppm:
I'm trying to use a Gaussian filter on a generated Mandelbrot set, and I have a solution that will work sequentially. However, I do want to have a solution that works using GPU processing.
I'm using C++ AMP to use GPU processing. An issue with storing the individual color channels of the pixels is that in AMP you can't use unsigned 8 bit integers ( uint8_t ), therefore I've had to store the channels in unsigned 32 bit integers ( uint32_t ). All of the syntax that I've implemented into the AMP code is the same as the sequential one, however the colours of the one generated in AMP has the wrong colours output, however the shape of the Mandelbrot can still be seen.
So, I believe its the way that I'm recollecting the channels together for the pixel that's giving the wrong colour. If anymore details are needed, I can provide them.
Sequential Output
Sequential Code:
void applyFilter(){
int kernelCentreX, kernelCentreY;// center index of kernel
int kernelRadius = 5 / 2;
uint8_t r, g, b;
float kernelTotal = 0.0;
float redTotal = 0.0, blueTotal = 0.0, greenTotal = 0.0;
float sum;
the_clock::time_point start = the_clock::now();
for (int y = 0; y < HEIGHT; y++) { //loop through image height
for (int x = 0; x < WIDTH; x++) {//loop through image width
float redTotal = 0.0, blueTotal = 0.0, greenTotal = 0.0;
float kernelTotal = 0.0;
for (int v = 0; v < 5; v++) { //loop through for kernel height
for (int u = 0; u < 5; u++) { //loop through for kernel width
// Current position
int cX = x + u - kernelRadius;
int cY = y + v - kernelRadius;
//Make sure we stay in boundries
if (cX < 0 || cX > WIDTH - 1 || cY < 0 || cY > HEIGHT - 1)
{
continue;
}
//Get colour channels of current pixel
r = (image[cY][cX] >> 16);
g = (image[cY][cX] >> 8);
b = (image[cY][cX]);
//Get colour channels of current pixel
/*r = image[cY][cX] >> 16;
g = image[cY][cX] >> 8;
b = image[cY][cX];*/
//Calculate Totals
redTotal += r *kernel[v][u];
greenTotal += g *kernel[v][u];
blueTotal += b *kernel[v][u];
kernelTotal += kernel[v][u];
}
}
//Calculate new pixel values
r = (redTotal / kernelTotal);
g = (greenTotal / kernelTotal);
b = (blueTotal / kernelTotal);
image[y][x] = (r << 16 | g << 8 | b);
}
}
the_clock::time_point end = the_clock::now();
auto time_taken = duration_cast<milliseconds>(end - start).count();
cout << "Time taken to apply kernel(sequential) " << time_taken << "ms" << endl;
}
Parallel Output
Parallel Code:
void applyFilterAMP() {
the_clock::time_point start = the_clock::now();
uint32_t *pImage = &(image[0][0]);
uint32_t *pFilteredImage = &(filteredImage[0][0]);
float *pKernel = &(kernel[0][0]);
uint32_t pi[HEIGHT/3][WIDTH/3][3];
uint32_t* pPi = &(pi[0][0][0]);
array_view<float, 2>k(5, 5, pKernel);
array_view<uint32_t, 2> m(HEIGHT, WIDTH, pImage);
array_view<uint32_t, 2> fi(HEIGHT, WIDTH, pFilteredImage);
//array_view<uint32_t, 3> piColor(HEIGHT/3, WIDTH/3, 3, pPi);
fi.discard_data();
static const int TileSize = 16;
parallel_for_each(fi.extent, [=](concurrency::index<2> idx) restrict(amp) {
//parallel_for_each(fi.extent.tile<TileSize, TileSize>(), [=](tiled_index<TileSize, TileSize> tidx) restrict(amp) {
int kernelCentreX, kernelCentreY;// center index of kernel
int kernelRadius = 5/2;
int y = idx[0];
int x = idx[1];
//int y = tidx.global[0];
//int x = tidx.global[1];
float kernelTotal = 0.0;
float redTotal = 0.0, blueTotal = 0.0, greenTotal = 0.0;
float sum=0.0;
uint32_t r, g, b, newPixel;
for (int v = 0; v < 5; v++) { //loop through for kernel height
for (int u = 0; u < 5; u++) { //loop through for kernel width
// Current position
int cX = x + u - kernelRadius;
int cY = y + v - kernelRadius;
//int cX = tidx.global[1] + u - kernelRadius;
//int cY = tidx.global[0] + v - kernelRadius;
//Make sure we stay in boundries
if ((cX < 0 || cX > WIDTH - 1 || cY < 0 || cY > HEIGHT - 1))
{
continue;
}
//Get colour channels of pixel
r = m[cY][cX] >> 16;
g = m[cY][cX] >> 8;
b = m[cY][cX];
//Calculate Totals
redTotal += r *k[v][u];
greenTotal += g *k[v][u];
blueTotal += b *k[v][u];
kernelTotal += k[v][u];
}
}
//Calculate new pixel values
r = (redTotal / kernelTotal);
g = (greenTotal / kernelTotal);
b = (blueTotal / kernelTotal);
fi[y][x] = (r << 16 | g << 8 | b);
});
fi.synchronize();
//m.synchronize();
the_clock::time_point end = the_clock::now();
auto time_taken = duration_cast<milliseconds>(end - start).count();
cout << "Time taken to apply kernel(parallel) " << time_taken << "ms" << endl;
errno_t err = memcpy_s(image, sizeof(uint32_t)*(HEIGHT * WIDTH), filteredImage, sizeof(uint32_t)*(HEIGHT * WIDTH));
if (err)
{
printf("Error executing memcpy_s.\n");
}
/*for (int j = 0; j < HEIGHT; j++) {
for (int i = 0; i < WIDTH; i++) {
image[j][i] = filteredImage[j][i];
}
}*/
}
I'm trying to Shear an image along the X-axis using OpenCV to load the image, and the following algorithm to shear the image: x′=x+y·Bx, but for some reason, I end up with the following shear:
My source code looks like this:
#include "stdafx.h"
#include "opencv2\opencv.hpp"
using namespace std;
using namespace cv;
int main()
{
Mat src = imread("B2DBy.jpg", 1);
if (src.empty())
cout << "Error: Loading image" << endl;
int r1, c1; // tranformed point
int rows, cols; // original image rows and columns
rows = src.rows;
cols = src.cols;
float Bx = 2; // amount of shearing in x-axis
float By = 0; // amount of shearing in y-axis
int maxXOffset = abs(cols * Bx);
int maxYOffset = abs(rows * By);
Mat out = Mat::ones(src.rows + maxYOffset, src.cols + maxXOffset, src.type()); // create output image to be the same as the source
for (int r = 0; r < out.rows; r++) // loop through the image
{
for (int c = 0; c < out.cols; c++)
{
r1 = r + c * By - maxYOffset; // map old point to new
c1 = r * Bx + c - maxXOffset;
if (r1 >= 0 && r1 <= out.rows && c1 >= 0 && c1 <= out.cols) // check if the point is within the boundaries
{
out.at<uchar>(r, c) = src.at<uchar>(r1, c1); // set value
}
}
}
namedWindow("Source image", CV_WINDOW_AUTOSIZE);
namedWindow("Rotated image", CV_WINDOW_AUTOSIZE);
imshow("Source image", src);
imshow("Rotated image", out);
waitKey(0);
return 0;
}
EDIT
Fixed it myself.
Didn't need to substract the offset. Heres the updated source code:
Mat forward(Mat img) {
Mat umg = img;
int y1, x1; // tranformed point
int rows, cols; // original image rows and columns
rows = umg.rows;
cols = umg.cols;
float Bx = 0.7; // amount of shearing in x-axis
float By = 0; // amount of shearing in y-axis
int maxXOffset = abs(rows * Bx);
int maxYOffset = abs(cols * By);
Mat out = Mat::ones(rows + maxYOffset, cols + maxXOffset, umg.type()); // create output image to be the same as the source
for (int y = 0; y < rows; y++) // loop through the image
{
for (int x = 0; x < cols; x++)
{
y1 = y + x * By; // map old point to new
x1 = y * Bx + x;
out.at<uchar>(y1, x1) = umg.at<uchar>(y, x); // set value
}
}
return out;
}
Mat backwards(Mat img) {
Mat umg = img;
int y1, x1; // tranformed point
int rows, cols; // original image rows and columns
rows = umg.rows;
cols = umg.cols;
float Bx = 0.7; // amount of shearing in x-axis
float By = 0; // amount of shearing in y-axis
int maxXOffset = abs(rows * Bx);
int maxYOffset = abs(cols * By);
Mat out = Mat::ones(rows + maxYOffset, cols + maxXOffset, umg.type()); // create output image to be the same as the source
for (int y = 0; y < rows; y++) // loop through the image
{
for (int x = 0; x < cols; x++)
{
//y1 = y + x * By; // map old point to new
//x1 = y * Bx + x;
y1 = (1 / (1 - Bx*By)) * (y + x * By);
x1 = (1 / (1 - Bx*By)) * (y * Bx + x);
out.at<uchar>(y1, x1) = umg.at<uchar>(y, x); // set value
}
}
return out;
}
int main()
{
Mat src = imread("B2DBy.jpg", 0);
if (src.empty())
cout << "Error: Loading image" << endl;
Mat forwards = forward(src);
Mat back = backwards(src);
namedWindow("Source image", CV_WINDOW_NORMAL);
imshow("Source image", src);
imshow("back", back);
imshow("forward image", forwards);
waitKey(0);
return 0;
}
I found some time to work on this.
Now I understand what you tried to achieve with the offset computation, but I'm not sure whether yours is correct.
Just change all the cv::Vec3b to unsigned char or uchar and load as grayscale, if wanted.
Please try this code and maybe you'll find your error:
// no interpolation yet
// cv::Vec3b only
cv::Mat shear(const cv::Mat & input, float Bx, float By)
{
if (Bx*By == 1)
{
throw("Shearing: Bx*By==1 is forbidden");
}
if (input.type() != CV_8UC3) return cv::Mat();
// shearing:
// x'=x+y·Bx
// y'=y+x*By
// shear the extreme positions to find out new image size:
std::vector<cv::Point2f> extremePoints;
extremePoints.push_back(cv::Point2f(0, 0));
extremePoints.push_back(cv::Point2f(input.cols, 0));
extremePoints.push_back(cv::Point2f(input.cols, input.rows));
extremePoints.push_back(cv::Point2f(0, input.rows));
for (unsigned int i = 0; i < extremePoints.size(); ++i)
{
cv::Point2f & pt = extremePoints[i];
pt = cv::Point2f(pt.x + pt.y*Bx, pt.y + pt.x*By);
}
cv::Rect offsets = cv::boundingRect(extremePoints);
cv::Point2f offset = -offsets.tl();
cv::Size resultSize = offsets.size();
cv::Mat shearedImage = cv::Mat::zeros(resultSize, input.type()); // every pixel here is implicitely shifted by "offset"
// perform the shearing by back-transformation
for (int j = 0; j < shearedImage.rows; ++j)
{
for (int i = 0; i < shearedImage.cols; ++i)
{
cv::Point2f pp(i, j);
pp = pp - offset; // go back to original coordinate system
// go back to original pixel:
// x'=x+y·Bx
// y'=y+x*By
// y = y'-x*By
// x = x' -(y'-x*By)*Bx
// x = +x*By*Bx - y'*Bx +x'
// x*(1-By*Bx) = -y'*Bx +x'
// x = (-y'*Bx +x')/(1-By*Bx)
cv::Point2f p;
p.x = (-pp.y*Bx + pp.x) / (1 - By*Bx);
p.y = pp.y - p.x*By;
if ((p.x >= 0 && p.x < input.cols) && (p.y >= 0 && p.y < input.rows))
{
// TODO: interpolate, if wanted (p is floating point precision and can be placed between two pixels)!
shearedImage.at<cv::Vec3b>(j, i) = input.at<cv::Vec3b>(p);
}
}
}
return shearedImage;
}
int main(int argc, char* argv[])
{
cv::Mat input = cv::imread("C:/StackOverflow/Input/Lenna.png");
cv::Mat output = shear(input, 0.7, 0);
//cv::Mat output = shear(input, -0.7, 0);
//cv::Mat output = shear(input, 0, 0.7);
cv::imshow("input", input);
cv::imshow("output", output);
cv::waitKey(0);
return 0;
}
Giving me these outputs for the 3 sample lines:
Here is my code for creating the hough accumulator for lines in image :
void hough_lines_acc(cv::Mat img_a_edges, std::vector<std::vector<int> > &hough_acc) {
for (size_t r = 0; r < img_a_edges.rows; r++) {
for (size_t c = 0; c < img_a_edges.cols; c++) {
int theta = static_cast<int> (std::atan2(r, c) * 180 / M_PI);
int rho = static_cast<int> ((c * cos(theta)) + (r * sin(theta)));
if (theta < -90) theta = -90;
if (theta > 89) theta = 89;
++hough_acc[abs(rho)][theta];
}
}
cv::Mat img_mat(hough_acc.size(), hough_acc[0].size(), CV_8U);
std::cout << hough_acc.size() << " " << hough_acc[0].size() << std::endl;
for (size_t i = 0; i < hough_acc.size(); i++) {
for (size_t j = 0; j < hough_acc[0].size(); j++) {
img_mat.at<int> (i,j) = hough_acc[i][j];
}
}
imwrite("../output/ps1-2-b-1.png", img_mat);
}
theta varies from -90 to 89. I am getting negative rho values. Right now I am just replacing the negative who with a positive one but am not getting a correct answer. What do I do to the negative rho? Please explain the answer.
theta = arctan (y / x)
rho = x * cos(theta) + y * sin(theta)
Edited code :
bool hough_lines_acc(cv::Mat img_a_edges, std::vector<std::vector<int> > &hough_acc,\
std::vector<double> thetas, std::vector<double> rhos, int rho_resolution, int theta_resolution) {
int img_w = img_a_edges.cols;
int img_h = img_a_edges.rows;
int max_votes = 0;
int min_votes = INT_MAX;
for (size_t r = 0; r < img_h; r++) {
for (size_t c = 0; c < img_w; c++) {
if(img_a_edges.at<int>(r, c) == 255) {
for (size_t i = 0; i < thetas.size(); i++) {
thetas[i] = (thetas[i] * M_PI / 180);
double rho = ( (c * cos(thetas[i])) + (r * sin(thetas[i])) );
int buff = ++hough_acc[static_cast<int>(abs(rho))][static_cast<int>(i)];
if (buff > max_votes) {
max_votes = buff;
}
if (buff < min_votes) {
min_votes = buff;
}
}
}
}
}
double div = static_cast<double>(max_votes) / 255;
int threshold = 10;
int possible_edge = round(static_cast<double>(max_votes) / div) - threshold;
props({
{"max votes", max_votes},
{"min votes", min_votes},
{"scale", div}
});
// needed for scaling intensity for contrast
// not sure if I am doing it correctly
for (size_t r = 0; r < hough_acc.size(); r++) {
for (size_t c = 0; c < hough_acc[0].size(); c++) {
double val = hough_acc[r][c] / div;
if (val < 0) {
val = 0;
}
hough_acc[r][c] = static_cast<int>(val);
}
}
cv::Mat img_mat = cv::Mat(hough_acc.size(), hough_acc[0].size(), CV_8UC1, cv::Scalar(0));
for (size_t i = 0; i < hough_acc.size(); i++) {
for (size_t j = 0; j < hough_acc[0].size(); j++) {
img_mat.at<uint8_t> (i,j) = static_cast<uint8_t>(hough_acc[i][j]);
}
}
imwrite("../output/ps1-2-b-1.png", img_mat);
return true;
}
Still not correct output. What is the error here?
atan2 of two positive numbers... should not be giving you negative angles, it should only be giving you a range of 0-90
also for the hough transform, I think you want everything relative to one point (ie 0,0 in this case). I think for that you would actually want to make theta=90-atan2(r,c)
Admittedly though, I am a bit confused as I thought you had to encode line direction, rather than just "edge pt". ie I thought at each edge point you had to provide a discrete array of guessed line trajectories and calculate rho and theta for each one and throw all of those into your accumulator. As is... I am not sure what you are calculating.