I am running for displaying RGB image from raw in C++ without any library. When I input the square image (e.g: 512x512), my program can display the image perfectly, but it does not in not_square size image (e.g: 350x225). I understand that I need padding for this case, then I tried to find the same case but it didn't make sense for me how people can pad their image.
If anyone can show me how to pad, I would be thanks for this. And below is what I have done for RGB from Raw.
void CImage_MyClass::Class_MakeRGB(void)
{
m_BMPheader.biHeight = m_uiHeight;
m_BMPheader.biWidth = m_uiWidth;
m_pcBMP = new UCHAR[m_uiHeight * m_uiWidth * 3];
//RGB Image
{
int ind = 0;
for (UINT y = 0; y < m_uiHeight; y++)
{
for (UINT x = 0; x < m_uiHeight*3; x+=3)
{
m_pcBMP[ind++] = m_pcIBuff[m_uiHeight - y -1][x+2];
m_pcBMP[ind++] = m_pcIBuff[m_uiHeight - y -1][x+1];
m_pcBMP[ind++] = m_pcIBuff[m_uiHeight - y -1][x];
}
}
}
}
You need to pad the number of bytes in each line out to a multiple of 4.
void CImage_MyClass::Class_MakeRGB(void)
{
m_BMPheader.biHeight = m_uiHeight;
m_BMPheader.biWidth = m_uiWidth;
//Pad buffer width to next highest multiple of 4
const int bmStride = m_uiWidth * 3 + 3 & ~3;
m_pcBMP = new UCHAR[m_uiHeight * bmStride];
//Clear buffer so the padding bytes are 0
memset(m_pcBMP, 0, m_uiHeight * bmStride);
//RGB Image
{
for(UINT y = 0; y < m_uiHeight; y++)
{
for(UINT x = 0; x < m_uiWidth * 3; x += 3)
{
const int bmpPos = y * bmWidth + x;
m_pcBMP[bmpPos + 0] = m_pcIBuff[m_uiHeight - y - 1][x + 2];
m_pcBMP[bmpPos + 1] = m_pcIBuff[m_uiHeight - y - 1][x + 1];
m_pcBMP[bmpPos + 2] = m_pcIBuff[m_uiHeight - y - 1][x];
}
}
}
}
I also changed the inner for loop to use m_uiWidth instead of m_uiHeight.
#Retired Ninja, Thanks anyway for your answer... you showed me a simple way for this...
But by the way, I have fixed mine as well with different way.. here is it:
void CImage_MyClass::Class_MakeRGB(void)
{
m_BMPheader.biHeight = m_uiHeight;
m_BMPheader.biWidth = m_uiWidth;
int padding = 0;
int scanline = m_uiWidth * 3;
while ( ( scanline + padding ) % 4 != 0 )
{
padding++;
}
int psw = scanline + padding;
m_pcBMP = new UCHAR[m_uiHeight * m_uiWidth * 3 + m_uiHeight * padding];
//RGB Image
int ind = 0;
for (UINT y = 0; y < m_uiHeight; y++)
{
for (UINT x = 0; x < m_uiHeight*3; x+=3)
{
m_pcBMP[ind++] = m_pcIBuff[m_uiHeight - y -1][x+2];
m_pcBMP[ind++] = m_pcIBuff[m_uiHeight - y -1][x+1];
m_pcBMP[ind++] = m_pcIBuff[m_uiHeight - y -1][x];
}
for(int i = 0; i < padding; i++)
ind++;
}
}
Related
I was learning filters in OpenCV, but I'm a little confused about the Laplacian filter. My result is very different from the Laplacian filter in OpenCV lib.
For first, I use a Gaussian filter for the image:
Mat filtroGauss(Mat src){
Mat gauss = src.clone();
Mat temp(src.rows+2,src.cols+2,DataType<uchar>::type);
int y,x;
for (y=0; y<src.rows; y++){
for (x=0; x<src.cols; x++) temp.at<uchar>(y+1,x+1) = src.at<uchar>(y,x);
}
int mask[lenMask*lenMask];
mask[0] = mask[2] = mask[6] = mask[8] = 1;
mask[1] = mask[3] = mask[5] = mask[7] = 2;
mask[4] = 4;
int denominatore = 0;
for (int i=0; i<lenMask*lenMask; i++) denominatore += mask[i];
int value[lenMask*lenMask];
for(y=0; y<src.rows; y++){
for (x=0; x<src.cols; x++){
value[0] = temp.at<uchar>(y-1,x-1)*mask[0];
value[1] = temp.at<uchar>(y-1,x)*mask[1];
value[2] = temp.at<uchar>(y-1,x+1)*mask[2];
value[3] = temp.at<uchar>(y,x-1)*mask[3];
value[4] = temp.at<uchar>(y,x)*mask[4];
value[5] = temp.at<uchar>(y,x+1)*mask[5];
value[6] = temp.at<uchar>(y+1,x-1)*mask[6];
value[7] = temp.at<uchar>(y+1,x)*mask[7];
value[8] = temp.at<uchar>(y+1,x+1)*mask[8];
int avg = 0;
for(int i=0; i<lenMask*lenMask; i++)avg+=value[i];
avg = avg/denominatore;
gauss.at<uchar>(y,x) = avg;
}
}
return gauss;
}
Then I use the Laplacian function:
L(y,x) = f(y-1,x) + f(y+1,x) + f(y,x-1) + f(y,x+1) + 4*f(y,x)
Mat filtroLaplace(Mat src){
Mat output = src.clone();
Mat temp = src.clone();
int y,x;
for (y =1; y<src.rows-1; y++){
for(x =1; x<src.cols-1; x++){
output.at<uchar>(y,x) = temp.at<uchar>(y-1,x) + temp.at<uchar>(y+1,x) + temp.at<uchar>(y,x-1) + temp.at<uchar>(y,x+1) -4*( temp.at<uchar>(y,x));
}
}
return output;
}
And here is the final result from my code:
OpenCV result:
Let's rewrite the function a little, so it's easier to discuss:
cv::Mat filtroLaplace(cv::Mat src)
{
cv::Mat output = src.clone();
for (int y = 1; y < src.rows - 1; y++) {
for (int x = 1; x < src.cols - 1; x++) {
int sum = src.at<uchar>(y - 1, x)
+ src.at<uchar>(y + 1, x)
+ src.at<uchar>(y, x - 1)
+ src.at<uchar>(y, x + 1)
- 4 * src.at<uchar>(y, x);
output.at<uchar>(y, x) = sum;
}
}
return output;
}
The source of your problem is sum. Let's examine its range in scope of this algorithm, by taking the two extremes:
Black pixel, surrounded by 4 white. That means 255 + 255 + 255 + 255 - 4 * 0 = 1020.
White pixel, surrounded by 4 black. That means 0 + 0 + 0 + 0 - 4 * 255 = -1020.
When you perform output.at<uchar>(y, x) = sum; there's an implicit cast of the int back to unsigned char -- the high order bits simply get chopped off and the value overflows.
The correct approach to handle this situation (which OpenCV takes), is to perform saturation before the actual cast. Essentially
if (sum < 0) {
sum = 0;
} else if (sum > 255) {
sum = 255;
}
OpenCV provides function cv::saturate_cast<T> to do just this.
There's an additional problem that you're not handling the edge rows/columns of the input image -- you just leave them at the original value. Since you're not asking about that, I'll leave solving that as an excercise to the reader.
Code:
cv::Mat filtroLaplace(cv::Mat src)
{
cv::Mat output = src.clone();
for (int y = 1; y < src.rows - 1; y++) {
for (int x = 1; x < src.cols - 1; x++) {
int sum = src.at<uchar>(y - 1, x)
+ src.at<uchar>(y + 1, x)
+ src.at<uchar>(y, x - 1)
+ src.at<uchar>(y, x + 1)
- 4 * src.at<uchar>(y, x);
output.at<uchar>(y, x) = cv::saturate_cast<uchar>(sum);
}
}
return output;
}
Sample input:
Output of corrected filtroLaplace:
Output of cv::Laplacian:
I'm trying to update a 128x128 D3DLOCKED_RECT with sub images using the following code, but it seems to squish them down along the top, the X offset is ignored and the y offset is 60 percent off.
I've also tried to make the texture the correct size and copy it into a 128x128 texture at the correct location using RECT, however this is very slow and didn't seem to work correctly when I attempted it. There must be way to do it using the raw pixel data?
Any help would be much appreciated :)
EDIT: I got it semi working using the below code, the locations are now correct and the sizes. But it's only using the blue channel and everything is grey scale (blue scale?)
srcdata = (byte *) pixels;
dstdata = (unsigned int *)lockrect.pBits;
for (y = yoffset; y < (yoffset + height); y++)
{
for (x = xoffset; x < (xoffset + width); x++)
{
dstdata[ ( y * lockrect.Pitch / dstbytes + x ) + 0] = (unsigned int)srcdata[0];
dstdata[ ( y * lockrect.Pitch / dstbytes + x ) + 1] = (unsigned int)srcdata[1];
dstdata[ ( y * lockrect.Pitch / dstbytes + x ) + 2] = (unsigned int)srcdata[0];
dstdata[ ( y * lockrect.Pitch / dstbytes + x ) + 3] = (unsigned int)srcdata[3];
srcdata += srcbytes;
}
}'
END Edit
Test call after creating the 128x128 texture:
int x, y;
byte temp[132*132*4];
// Test texture (pink and black checker)
for( y = 0; y < 16; y++ )
{
for( x = 0; x < 16; x++ )
{
if(( y < 8 ) ^ ( x < 8 ))
((uint *)&temp)[y*16+x] = 0xFFFF00FF;
else ((uint *)&temp)[y*16+x] = 0xFF000000;
}
}
UpdateSubImage (0, 0, 16, 16, temp )
The update Fuction:
void UpdateSubImage (int xoffset, int yoffset, int width, int height, const
GLvoid *pixels)
{
int x, y;
int srcbytes = 4; //Hard coded for now, as all tests are RGBA
int dstbytes = 4; // ^
byte *srcdata;
byte *dstdata;
D3DLOCKED_RECT lockrect;
pTexture->LockRect( 0, &lockrect, NULL, 0);
srcdata = (byte *) pixels;
dstdata = (byte *) lockrect.pBits;
dstdata += (yoffset * width + xoffset) * dstbytes;
for (y = yoffset; y < (yoffset + height); y++)
{
for (x = xoffset; x < (xoffset + width); x++)
{
if (srcbytes == 1)
{
if (dstbytes == 1)
dstdata[0] = srcdata[0];
else if (dstbytes == 4)
{
dstdata[0] = srcdata[0];
dstdata[1] = srcdata[0];
dstdata[2] = srcdata[0];
dstdata[3] = srcdata[0];
}
}
else if (srcbytes == 3)
{
if (dstbytes == 1)
dstdata[0] = ((int) srcdata[0] + (int) srcdata[1] + (int) srcdata[2]) / 3;
else if (dstbytes == 4)
{
dstdata[0] = srcdata[2];
dstdata[1] = srcdata[1];
dstdata[2] = srcdata[0];
dstdata[3] = 255;
}
}
else if (srcbytes == 4)
{
if (dstbytes == 1)
dstdata[0] = ((int) srcdata[0] + (int) srcdata[1] + (int) srcdata[2]) / 3;
else if (dstbytes == 4)
{
dstdata[0] = srcdata[2];
dstdata[1] = srcdata[1];
dstdata[2] = srcdata[0];
dstdata[3] = srcdata[3];
}
}
// advance
srcdata += srcbytes;
dstdata += dstbytes;
}
}
pTexture->UnlockRect(0);
}
What the output looks like:
What the output should look like:
You're assuming that the data accessable through lockrect.pBits is linear in memory. This is in general not the case. Instead you have a constant offset between your rows which is defined by the lockrect.Pitch value.
To get the address of a pixel in the destination use:
byte * destAddr = (lockrect.pBits + y * lockrect.Pitch + 4 * x);
// for 32 bit images. For other formats adjust the hard-coded 4.
Thanks for the help :), in the end the following code worked:
Can it be made faster?
for (y = yoffset; y < (yoffset + height); y++)
{
for (x = xoffset; x < (xoffset + width); x++)
{
ARGB pixel;
pixel.r = srcdata[0];
pixel.g = srcdata[1];
pixel.b = srcdata[2];
pixel.a = srcdata[3];
memcpy( &dstdata[lockrect.Pitch * y + dstbytes * x], &pixel, dstbytes );
srcdata += srcbytes;
}
}
For an embedded design I am attempting to implement sobel's edge detection on a board without the use of a buffer. i.e. I am reading and writing directly from the screen. I can however, store about one or two imge width full of data to be referenced later. This is due to limitations set forth by the board. However I have fallen into some issue. All that I recieve is noise regardless if I attempt to do sobel or another edge detection algorithm. The code is below, does anyone have any suggestions
Version 1
void sobelEdgeDetection2() {
int GX[3][3];
int GY[3][3];
int sumX[3];
int sumY[3];
int SUM[3];
int piX = 0;
int piY = 0;
//uint8_t R, G, B = 0;
int I, J = 0;
//UnpackedColour pixVal;
uint16_t *buffer;
// allocate space for even scan lines and odd scan lines
buffer = new uint16_t[_gl->getWidth()];
//buffer for previous line
uint16_t *bufT;
// allocate space for even scan lines and odd scan lines
bufT = new uint16_t[_gl->getWidth()];
// Masks //////////////////////////////////////
//X//
GX[0][0] = -1;
GX[0][1] = 0;
GX[0][2] = 1;
GX[1][0] = -2;
GX[1][1] = 0;
GX[1][2] = 2;
GX[2][0] = -1;
GX[2][1] = 0;
GX[2][2] = 1;
//Y//
GY[0][0] = 1;
GY[0][1] = 2;
GY[0][2] = 1;
GY[1][0] = 0;
GY[1][1] = 0;
GY[1][2] = 0;
GY[2][0] = -1;
GY[2][1] = -2;
GY[2][2] = -1;
for (int Y = 0; Y < _gl->getHeight(); Y++) {
for (int X = 0; X < _gl->getWidth(); X++) {
sumX[0] = sumX[1] = sumX[2] = 0;
sumY[0] = sumY[1] = sumY[2] = 0;
if (Y == 0 || Y == _gl->getHeight() - 1) {
SUM[0] = SUM[1] = SUM[2] = 0;
} else if (X == 0 || X == _gl->getWidth() - 1) {
SUM[0] = SUM[1] = SUM[2] = 0;
} else {
for (I = -1; I <= 1; I++) {
for (J = -1; J <= 1; J++) {
piX = J + X;
piY = I + Y;
pixel16 pix = getPixel(piX, piY);
uint8_t Red = pix.Red;
uint8_t Green = pix.Green;
uint8_t Blue = pix.Blue;
sumX[0] += (Red) * GX[J + 1][I + 1];
sumX[1] += (Green) * GX[J + 1][I + 1];
sumX[2] += (Blue) * GX[J + 1][I + 1];
sumY[0] += (Red) * GY[J + 1][I + 1];
sumY[1] += (Green) * GY[J + 1][I + 1];
sumY[2] += (Blue) * GY[J + 1][I + 1];
}
}
SUM[0] = abs(sumX[0]) + abs(sumY[0]);
SUM[1] = abs(sumX[1]) + abs(sumY[1]);
SUM[2] = abs(sumX[2]) + abs(sumY[2]);
}
if (SUM[0] > 255)
SUM[0] = 255;
if (SUM[0] < 0)
SUM[0] = 0;
if (SUM[1] > 255)
SUM[1] = 255;
if (SUM[1] < 0)
SUM[1] = 0;
if (SUM[2] > 255)
SUM[2] = 255;
if (SUM[2] < 0)
SUM[2] = 0;
int newPixel[3];
newPixel[0] = (255 - ((unsigned char) (SUM[0])));
newPixel[1] = (255 - ((unsigned char) (SUM[1])));
newPixel[2] = (255 - ((unsigned char) (SUM[2])));
pixel16 pix(newPixel[0], newPixel[1], newPixel[2]);
buffer[X] = packColour(pix).packed565;
}
//Need to move cursor back
// draw it
this->paintRow(Point(0, Y), buffer, _gl->getWidth());
}
delete[] buffer;
}
Version2
/**
* https://www.cl.cam.ac.uk/projects/raspberrypi/tutorials/image-processing/edge_detection.html
* 1 Iterate over every pixel in the image
* 2 Apply the x gradient kernel
* 3 Apply the y gradient kernel
* 4 Find the length of the gradient using pythagoras' theorem
* 5 Normalise the gradient length to the range 0-255
* 6 Set the pixels to the new values
*/
void sobelEdgeDetection4() {
UnpackedColour colour;
for (int x = 1; x < _gl->getWidth() - 1; x++) {
for (int y = 1; y < _gl->getHeight() - 1; y++) {
// initialise Gx and Gy to 0
int Gx = 0;
int Gy = 0;
unsigned int intensity = 0;
// Left column
pixel16 pixel = this->getPixel(x - 1, y - 1);
intensity = pixel.Red + pixel.Green + pixel.Blue;
Gx += -intensity;
Gy += -intensity;
pixel = this->getPixel(x - 1, y);
intensity = pixel.Red + pixel.Green + pixel.Blue;
Gx += -2 * intensity;
pixel = this->getPixel(x - 1, y + 1);
intensity = pixel.Red + pixel.Green + pixel.Blue;
Gx += -intensity;
Gy += +intensity;
// middle column
pixel = this->getPixel(x, y - 1);
intensity = pixel.Red + pixel.Green + pixel.Blue;
Gy += -2 * intensity;
pixel = this->getPixel(x, y + 1);
intensity = pixel.Red + pixel.Green + pixel.Blue;
Gy += +2 * intensity;
// right column
pixel = this->getPixel(x + 1, y - 1);
intensity = pixel.Red + pixel.Green + pixel.Blue;
Gx += +intensity;
Gy += -intensity;
pixel = this->getPixel(x + 1, y);
intensity = pixel.Red + pixel.Green + pixel.Blue;
Gx += +2 * intensity;
pixel = this->getPixel(x + 1, y + 1);
intensity = pixel.Red + pixel.Green + pixel.Blue;
Gx += +intensity;
Gy += +intensity;
// calculate the gradient length
unsigned int length = (unsigned int) sqrt(
(float) (Gx * Gx) + (float) (Gy * Gy));
// normalise the length to 0 to 255
length = length / 17;
// draw the pixel on the edge image
pixel16 pixel2(length,length,length);
this->setPixel(x, y, pixel2);
}
}
}
Version 3
// sobel map for the x axis
const double _SOBEL_Gx[3][3] = { { -1.0, +0.0, +1.0 }, { -2.0, +0.0, +2.0 },
{ -1.0, +0.0, +1.0 } };
// sobel map for the y axis
const double _SOBEL_Gy[3][3] = { { +1.0, +2.0, +1.0 }, { +0.0, +0.0, +0.0 },
{ -1.0, -2.0, -1.0 } };
double get_sobel_gradient(int width, int height, int x, int y) {
double sobel_gradient_x = 0, sobel_gradient_y = 0;
int mx = 0, my = 0, sx = 0, sy = 0;
for (mx = x; mx < x + 3; mx++) {
sy = 0;
for (my = y; my < y + 3; my++) {
if (mx < width && my < height) {
//int r, g, b, idx;
int idx = (mx + width * my) * 3;
pixel16 pixVal = this->getPixel(idx);
//r = pixVal.Red;
//g = pixVal.Green;
//b = pixVal.Blue;
UnpackedColour col = this->packColour(pixVal);
sobel_gradient_x += col.packed565 * _SOBEL_Gx[sx][sy];
sobel_gradient_y += col.packed565 * _SOBEL_Gy[sx][sy];
}
sy++;
}
sx++;
}
return abs(sobel_gradient_x) + abs(sobel_gradient_y);
}
void sobelEdgeDetection3() {
double threshold = 50000.0;
UnpackedColour colour;
for (int y = 0; y < _gl->getHeight(); y++) {
for (int x = 0; x < _gl->getWidth(); x++) {
if (get_sobel_gradient(_gl->getWidth(), _gl->getHeight(), x, y)
>= threshold) {
colour.packed565 = 0x0000; //set white
} else {
colour.packed565 = 0xFFFF; //set black
}
this->setPixel(x, y, colour);
}
}
}
For Version 1, after you allocate 2 buffers (just use buffer and bufT), create 2 pointers to point to the current and previous rows, like this:
uint16_t *currentRow = buffer;
uint16_t *prevRow = bufT;
Inside the row loop, write to currentRow instead of buffer:
pixel16 pix(newPixel[0], newPixel[1], newPixel[2]);
currentRow[X] = packColour(pix).packed565;
Because the Sobel filter reads from the previous row, you can't overwrite a row until after you have finished calculating the filtered values for the row after it. So at the end of the loop, where you are currently calling paintRow(), draw the previous row (if one exists), and then swap the buffers so that the current becomes the previous, and the previous becomes the new current row (to be overwritten on the next pass through the loop). On the last row the current row is also drawn, because otherwise it won't be since the outer loop is about to terminate.
if(Y > 0) // draw the previous row if this is not the first row:
this->paintRow(Point(0, Y-1), prevRow, _gl->getWidth());
if(Y == _gl->getHeight()-1) // draw the current row if it is the last:
this->paintRow(Point(0, Y), currentRow, _gl->getWidth());
// swap row pointers:
uint16_t *temp = prevRow;
prevRow = currentRow;
currentRow = temp;
The same strategy should work for the other versions.
Iterating through 1D array (pseudo 2D) with step of 3:
arr = new int[height * width * 3];
for (int i = 0; i < height * width * 3; i+=3) {
arr[i] = 1;
}
I have tried this, but what I got is column of one third:
for (int y = 0; y < height * 3; y++) {
for (int x = 0; x < width; x+=3) {
arr[x + width * y] = 1;
}
}
Assuming your cells have a 'size' of 3 entries, you should use the * 3 on the inner loop. Otherwise you miss 2 thirds of your cells on each row.
You also need to multiply width by 3 to get the correct row.
for (int y = 0; y < height; y++) {
for (int x = 0; x < width * 3; x+=3) {
arr[x + width * 3 * y] = 1;
}
}
In general you need the following structure for such situations:
for (int y = 0; y < height; y++) {
for (int x = 0; x < width * cellWidth; x+= cellWidth) {
arr[x + width * cellWidth * y] = 1;
}
}
(Were cellWidth is 3 in your case)
To slightly simplify this, you could assume in the loops that your cells have a width of 1 (like a normal situation) and multiply by cellWidth when actually assigning the values:
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int index = (x + width * y) * cellWidth;
arr[index + 0] = 1; // First 'cell entry'
arr[index + 1] = 1; // Second
...
arr[index + cellWidth - 1] = 1; // Last
}
}
Another solution is to create larger 'items' using a struct for example:
typedef struct { int r, int g, int b } t_rgb;
t_rgb* arr = new t_rgb[height * width];
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
arr[x + width * y].r = 1;
}
}
and you are able to use it as a regular array (the compiler does all calculations for you). This also makes it more clear what is happening in your code.
What are you trying to accomplish exactly? Setting a channel in a RGB image?
I usually do it like this:
for (int y = 0; y < height; y++)
for (int x = 0; x < width; x++)
arr[(x + width * y) * 3] = 1;
In general, to set RGB values, you can simply add an offset like this:
for (int y = 0; y < height; y++)
for (int x = 0; x < width; x++)
{
size_t base = (x + width * y) * 3;
arr[base + 0] = r;
arr[base + 1] = g;
arr[base + 2] = b;
}
i want to transport the follow codes into c++:
gaussFilter = fspecial('gaussian', 2*neighSize+1, 0.5*neighSize);
pointFeature = imfilter(pointFeature, gaussFilter, 'symmetric');
where the pointFeature is a [height, width, 24] array.
i try to use filter2D, but it only support the 2D array.
so i want to know if there are functions in opencv that can filtering the multi-dimensional array?
You can use separable kernel filters for make anydimentional filter.
If you are using OpenCV, you could try this for a 3 Dimensional MatND:
void Smooth3DHist(cv::MatND &hist, const int& kernDimension)
{
assert(hist.dims == 3);
int x_size = hist.size[0];
int y_size = hist.size[1];
int z_size = hist.size[2];
int xy_size = x_size*y_size;
cv::Mat kernal = cv::getGaussianKernel(kernDimension, -1, CV_32F);
// Filter XY dimensions for every Z
for (int z = 0; z < z_size; z++)
{
float *ind = (float*)hist.data + z * xy_size; // sub-matrix pointer
cv::Mat subMatrix(2, hist.size, CV_32F, ind);
cv::sepFilter2D(subMatrix, subMatrix, CV_32F, kernal.t(), kernal, Point(-1,-1), 0.0, cv::BORDER_REPLICATE);
}
// Filter Z dimension
float* kernGauss = (float *)kernal.data;
unsigned kernSize = kernal.total();
int kernMargin = (kernSize - 1)/2;
float* lineBuffer = new float[z_size + 2*kernMargin];
for (int y = 0; y < y_size; y++)
{
for (int x = 0; x < x_size; x++)
{
// Copy along Z dimension into a line buffer
float* z_ptr = (float*)hist.data + y * x_size + x;//same as hist.ptr<float>(0, y, x)
for (int z = 0; z < z_size; z++, z_ptr += xy_size)
{
lineBuffer[z + kernMargin] = *z_ptr;
}
// Replicate borders
for (int m = 0; m < kernMargin; m++)
{
lineBuffer[m] = lineBuffer[kernMargin];// replicate left side
lineBuffer[z_size + 2*kernMargin - 1 - m] = lineBuffer[kernMargin + z_size - 1];//replicate right side
}
// Filter line buffer 1D - convolution
z_ptr = (float*)hist.data + y * x_size + x;
for (int z = 0; z < z_size; z++, z_ptr += xy_size)
{
*z_ptr = 0.0f;
for (unsigned k = 0; k < kernSize; k++)
{
*z_ptr += lineBuffer[z+k]*kernGauss[k];
}
}
}
}
delete [] lineBuffer;
}