I'm new to OpenCV and I'm trying to proccess the image from the directory, make it black and white (grayscale) and then write it down to another file. But the output image is quite different from what I expected. Maybe you can help me and indicate the errors in code?
#include <iostream>
#include <opencv2/opencv.hpp>
#include <conio.h>
#include <string.h>
#include <string>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/core/core.hpp>
#include <stdio.h>
#include <stdlib.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
using namespace std;
void faktorial(int InSize, char *DataIn, char *DataOut)// заголовок функции
{
for(int i = 0, j = 0; i < InSize; i += 4, j++)
{
DataOut[j] = (DataIn[i] + DataIn[i + 1] + DataIn[i + 2]) / 3;
}
}
int main()
{
char* c = "E:\henrik-evensen-castle-valley-v03.jpg";
printf("Input source of image\n Example of right directory file: E:\henrik-evensen-castle-valley-v03.jpg\n Your try:\n");
char *tbLEN;
tbLEN = new char [1024];
cin.getline(tbLEN,1024);
cout << tbLEN;
IplImage* image;
image = cvLoadImage(tbLEN, 1);
int height1 = image->height;
int width1 = image->width;
int step = image->widthStep;
int SizeIn = step*height1;
char* DatIn = image->imageData;
IplImage *image2 = cvCreateImage(cvSize(image->width, image->height), IPL_DEPTH_8U, 1);
char* DatOut = image2->imageData;
faktorial(SizeIn, DatIn, DatOut);
cvNamedWindow("Imagecolor");
cvShowImage("Imagecolor", image);
cvNamedWindow("Gray");
cvShowImage("Gray", image2);
cvWaitKey(0);
return 0;
}
EDIT:
I don't need CvtColor function, I need to use that one factorial function.
In faktorial you assume you have 3 channels. So you need to increase i by 3, and not by 4. Also, you need to convert char* data to uchar* data, so that accumulation works ok:
You end up with:
void faktorial(int InSize, uchar *DataIn, uchar *DataOut)
{
for (int i = 0, j = 0; i < InSize; i += 3, j++)
{
DataOut[j] = (DataIn[i] + DataIn[i + 1] + DataIn[i + 2]) / 3;
}
}
You can easily extend this to multiple channels, like:
void faktorial2(int InSize, int nChannels, uchar *DataIn, uchar *DataOut)
{
for (int i = 0, j = 0; i < InSize; i += nChannels, j++)
{
int accum = 0;
for (int c = 0; c < nChannels; ++c)
{
accum += DataIn[i + c];
}
DataOut[j] = uchar(accum / nChannels);
}
}
You in general need also to take image stride into account:
void faktorial3(int rows, int cols, int in_step, int in_channels, int out_step, uchar *in, uchar *out)
{
for (int r = 0; r < rows; ++r)
{
for (int c = 0; c < cols; ++c)
{
int accum = 0;
for (int i = 0; i < in_channels; ++i)
{
accum += in[r*in_step + c * in_channels + i];
}
out[r*out_step + c] = uchar(accum / in_channels);
}
}
}
Here the full code with the calls:
#include <opencv2/opencv.hpp>
using namespace std;
void faktorial3(int rows, int cols, int in_step, int in_channels, int out_step, uchar *in, uchar *out)
{
for (int r = 0; r < rows; ++r)
{
for (int c = 0; c < cols; ++c)
{
int accum = 0;
for (int i = 0; i < in_channels; ++i)
{
accum += in[r*in_step + c * in_channels + i];
}
out[r*out_step + c] = uchar(accum / in_channels);
}
}
}
void faktorial(int InSize, uchar *DataIn, uchar *DataOut)
{
for (int i = 0, j = 0; i < InSize; i += 3, j++)
{
DataOut[j] = (DataIn[i] + DataIn[i + 1] + DataIn[i + 2]) / 3;
}
}
void faktorial2(int InSize, int nChannels, uchar *DataIn, uchar *DataOut)
{
for (int i = 0, j = 0; i < InSize; i += nChannels, j++)
{
int accum = 0;
for (int c = 0; c < nChannels; ++c)
{
accum += DataIn[i + c];
}
DataOut[j] = uchar(accum / nChannels);
}
}
int main()
{
char tbLEN[] = "D:\\SO\\img\\barns.jpg";
IplImage* image;
image = cvLoadImage(tbLEN, 1);
IplImage *image2 = cvCreateImage(cvSize(image->width, image->height), IPL_DEPTH_8U, 1);
int height1 = image->height;
int width1 = image->width;
int step = image->widthStep;
int SizeIn = step*height1;
int nChannels = image->nChannels;
uchar* DatIn = (uchar*)image->imageData;
uchar* DatOut = (uchar*)image2->imageData;
faktorial(SizeIn, DatIn, DatOut);
//faktorial2(SizeIn, nChannels, DatIn, DatOut);
//faktorial3(image->height, image->width, image->widthStep, image->nChannels, image2->widthStep, (uchar*)image->imageData, (uchar*)image2->imageData);
cvNamedWindow("Imagecolor");
cvShowImage("Imagecolor", image);
cvNamedWindow("Gray");
cvShowImage("Gray", image2);
cvWaitKey(0);
return 0;
}
Remember that C api is obsolete. You should switch to C++ api.
Try cvtColor(src, bwsrc, CV_RGB2GRAY);
http://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html (look for cvtColor).
Your faktorial is intended for 4 byte per pixel images (and it doesn't take into account possible line padding).
Loaded from JPG image has 3 byte per pixel, that is why you see 4 shifted ghosts.
You can modify faktorial or just convert loaded image to 4-byte format
image = cvLoadImage(tbLEN, 1);
cvtColor(image, image, CV_RGB2RGBA);
Related
Let me start by saying I am very newbie at C++ and so I don't really know the best practices or handle very well with the syntax.
I'm trying to read a black and white image, and using the sobel algorithm to detect its edges and output the result, but halfway my execution I get an error:
munmap_chunk(): invalid pointer
Aborted (core dumped)
Though the image is outputted, it's only half of it and I can't seem to figure out whats causing this.
I wrote the following code:
#include <iostream>
#include <omp.h>
#include "CImg.h"
using namespace std;
using namespace cimg_library;
int main() {
const int x_mask [9] = {
-1, 0, 1,
-2, 0, 2,
-1, 0, 1
};
const int y_mask [9] = {
-1, -2, -1,
0, 0, 0,
1, 2, 1
};
const char* fileName = "test.png";
CImg<float> img = CImg<float>(fileName);
int cols = img.width();
int lines = img.height();
CImg<float> output = CImg<float>(cols, lines, 1, 1, 0.0);
printf("Loading %d x %d image...\n", cols, lines);
const int mask_size = 3;
int gradient_x;
int gradient_y;
// Loop through image ignoring borders
for(int i = 1; i<cols-1; i++) {
for(int j = 1; j<lines-1; j++){
output(j,i) = 0;
gradient_x = 0;
gradient_y = 0;
// Find the x_gradient and y_gradient
for(int m = 0; m < mask_size; m++) {
for(int n = 0; n < mask_size; n++) {
// Neighbourgh pixels
int np_x = j + (m - 1);
int np_y = i + (n - 1);
float v = img(np_x,np_y);
int mask_index = (m*3) + n;
gradient_x = gradient_x + (x_mask[mask_index] * v);
gradient_y = gradient_y + (y_mask[mask_index] * v);
}
}
float gradient_sum = sqrt((gradient_x * gradient_x) + (gradient_y * gradient_y));
if(gradient_sum >= 255) {
gradient_sum = 255;
} else if(gradient_sum <= 0) {
gradient_sum = 0;
}
output(j, i) = gradient_sum;
}
}
printf("Outputed image of size %d x %d\n", output.width(), output.height());
output.save("test_edges.png");
return 0;
}
Applied to this image:
I get this ouput:
I have tried to extract patches from an image parallelly with pixel shift/overlapping. I have written the CPU version of the code. But I could not able to convert the for loop which has an increment of pixel shift. I have given the part of the code where for loop is being used. CreatePatchDataSet function has the "for loop " which has an increment of pixel shift. Please help me out to convert this function into Cuda. I have provided the following code.
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <iostream>
#include <fstream>
#include <sstream>
#include <random>
#include <vector>
#include <omp.h>
using namespace std;
using namespace cv;
#define PATCH_SIZE (5)
#define PIXEL_SHIFT (2)
void ConvertMat2DoubleArray(cv::Mat input, double* output)
{
for (int i = 0; i < input.rows; i++)
{
double *src = input.ptr<double>(i);
for (int j = 0; j < input.cols; j++)
{
output[input.cols * input.channels() * i + input.channels() * j + 0] = src[j];
}
}
}
void GetNumOfPatch(const int width, const int height, const int patch_size, const int pixel_shift, int* num_of_patch, int* num_of_patch_col, int* num_of_patch_row) {
*num_of_patch_col = 0;
int len_nb = 0;
while (len_nb < width) {
if (len_nb != 0) {
len_nb += patch_size - (patch_size - pixel_shift);
}
else {
len_nb += patch_size;
}
(*num_of_patch_col)++;
}
len_nb = 0;
*num_of_patch_row = 0;
while (len_nb < height) {
if (len_nb != 0) {
len_nb += patch_size - (patch_size - pixel_shift);
}
else {
len_nb += patch_size;
}
(*num_of_patch_row)++;
}
*num_of_patch = (*num_of_patch_col) * (*num_of_patch_row);
}
void CreatePatchDataSet(double *original_data, double* patch_data, const int width, const int height, const int pixel_shift, const int patch_size, const int num_of_patch_col, const int num_of_patch_row) {
int counter_row = 0;
int num_of_patch_image = num_of_patch_row * num_of_patch_col;
for (int i = 0; i < height; i += pixel_shift) {
int counter_col = 0;
for (int j = 0; j < width; j += pixel_shift) {
//Get Low Resolution Image
for (int ii = 0; ii < patch_size; ii++) {
for (int jj = 0; jj < patch_size; jj++) {
if ((i + ii) < height && (j + jj) < width) {
patch_data[num_of_patch_image * (patch_size * ii + jj) + num_of_patch_col*counter_row + counter_col] = original_data[width*(i + ii) + (j + jj)];
}
else {
patch_data[num_of_patch_image * (patch_size * ii + jj) + num_of_patch_col*counter_row + counter_col] = 0.;
}
}
}
counter_col++;
if (counter_col == num_of_patch_col) {
break;
}
}
counter_row++;
if (counter_row == num_of_patch_row) {
break;
}
}
}
int main()
{
int ratio=2;
cv::Mat image = cv::imread("input_b2_128.tif", CV_LOAD_IMAGE_UNCHANGED);
cv::Mat imageH = cv::Mat(image.rows * ratio, image.cols * ratio, CV_8UC1);
cv::resize(image, imageH, cv::Size(imageH.cols, imageH.rows), 0, 0,
cv::INTER_LANCZOS4);
double* orgimageH = (double*)calloc(imageH.cols*imageH.rows*image.channels(), sizeof(double));
ConvertMat2DoubleArray(imageH, orgimageH);
int widthH = imageH.cols;
int heightH = imageH.rows;
int dimH = (int)PATCH_SIZE * (int)PATCH_SIZE* (int)image.channels();
int dimL = (int)PATCH_SIZE/ratio* (int)PATCH_SIZE/ratio * (int)image.channels();
//3. Create training data set=========================
int num_of_patch_image = 0;
int num_of_patch_col = 0;
int num_of_patch_row = 0;
GetNumOfPatch(widthH, heightH, (int)PATCH_SIZE, (int)PIXEL_SHIFT, &num_of_patch_image, &num_of_patch_col, &num_of_patch_row);
cout<<"patch numbers: \n " << num_of_patch_image << endl;
double* FY = (double*)calloc(dimH * num_of_patch_image, sizeof(double));
CreatePatchDataSet(orgimageH, FY, widthH, heightH, (int)PIXEL_SHIFT, (int)PATCH_SIZE, num_of_patch_col, num_of_patch_row);
free(orgimageH);
free(FY);
return 0;
}
The results I got for first 10 values in CPU version:
patch numbers:
16129
238,240,240,235,237,230,227,229,228,227
I have tried to convert this function to Kernel function using cuda:. But it goes into the infinite loop. As I am very new to this CUDA field, could you please help me to find out the problem in the code ?
__global__ void CreatePatchDataSet(double *original_data, double* patch_data, const int width, const int height, const int pixel_shift, const int patch_size, const int num_of_patch_col, const int num_of_patch_row) {
int num_of_patch_image = num_of_patch_row * num_of_patch_col;
int i = threadIdx.x + (blockDim.x*blockIdx.x);
int j = threadIdx.y + (blockDim.y*blockIdx.y);
while (i<height && j< width)
{
int counter_row = 0;
int counter_col = 0;
//Get Low Resolution Image
for (int ii = 0; ii < patch_size; ii++) {
for (int jj = 0; jj < patch_size; jj++) {
if ((i + ii) < height && (j + jj) < width) {
patch_data[num_of_patch_image * (patch_size * ii + jj) + num_of_patch_col*counter_row + counter_col] = original_data[width*(i + ii) + (j + jj)];
}
else {
patch_data[num_of_patch_image * (patch_size * ii + jj) + num_of_patch_col*counter_row + counter_col] = 0.;
}
}
}
counter_col++;
if (counter_col == num_of_patch_col) {
break;
}
counter_row++;
if (counter_row == num_of_patch_row) {
break;
}
}
i+= blockDim.x*gridDim.x;
j+= blockDim.y*gridDim.y;
}
int main()
{
int ratio=2;
cv::Mat image = cv::imread("input_b2_128.tif", CV_LOAD_IMAGE_UNCHANGED);
cv::Mat imageH = cv::Mat(image.rows * ratio, image.cols * ratio, CV_8UC1);
cv::resize(image, imageH, cv::Size(imageH.cols, imageH.rows), 0, 0, cv::INTER_LANCZOS4);
double *orgimageH = (double*)calloc(imageH.cols*imageH.rows*image.channels(), sizeof(double));
ConvertMat2DoubleArray(imageH, orgimageH);
int widthH = imageH.cols;
int heightH = imageH.rows;
//
int dimH = (int)PATCH_SIZE * (int)PATCH_SIZE* (int)image.channels();
int dimL = (int)PATCH_SIZE/ratio* (int)PATCH_SIZE/ratio * (int)image.channels();
//3. Create training data set=========================
int num_of_patch_image = 0;
int num_of_patch_col = 0;
int num_of_patch_row = 0;
GetNumOfPatch(widthH, heightH, (int)PATCH_SIZE, (int)PIXEL_SHIFT, &num_of_patch_image, &num_of_patch_col, &num_of_patch_row);
cout<<"patch numbers: \n " << num_of_patch_image << endl;
double* FY = (double*)calloc(dimH * num_of_patch_image, sizeof(double));
double *d_orgimageH;
gpuErrchk(cudaMalloc ((void**)&d_orgimageH, sizeof(double)*widthH*heightH));
double *d_FY;
gpuErrchk(cudaMalloc ((void**)&d_FY, sizeof(double)* dimH * num_of_patch_image));
gpuErrchk(cudaMemcpy(d_orgimageH , orgimageH , sizeof(double)*widthH*heightH, cudaMemcpyHostToDevice));
dim3 dimBlock(16, 16);
dim3 dimGrid;
dimGrid.x = (widthH + dimBlock.x - 1) / dimBlock.x;
dimGrid.y = (heightH + dimBlock.y - 1) / dimBlock.y;
CreatePatchDataSet<<<dimGrid,dimBlock>>>(d_orgimageH, d_FY, widthH, heightH, (int)PIXEL_SHIFT, (int)PATCH_SIZE, num_of_patch_col, num_of_patch_row);
gpuErrchk(cudaMemcpy(FY,d_FY, sizeof(double)*dimH * num_of_patch_image, cudaMemcpyDeviceToHost));
// cout<<"Hello world";
free(orgimageH);
free(FY);
cudaFree(d_FY);
cudaFree(d_orgimageH);
return 0;
}
Image I have used: [1]: https://i.stack.imgur.com/Ywg7p.png
i+= blockDim.x*gridDim.x;
j+= blockDim.y*gridDim.y;
is outside the while loop in your kernel. As i and j never change inside the while loop, it isn't stopping. There could be more problems here, but this is the most prominent one.
EDIT: Another one that I found, is that you have only one while over both i and j instead of one for each. You should probably use for loops like in your CPU code:
for (i = pixel_shift * (threadIdx.x + (blockDim.x*blockIdx.x));
i < height;
i += pixel_shift * blockDim.x * gridDim.x) {
for (j = ...; j < ...; j += ...) {
/* ... */
}
}
EDIT 2:
I could imagine this to be a good idea:
for (counter_row = threadIdx.y + blockDim.y * blockIdx.y;
counter_row < num_of_patch_row;
counter_row += blockDim.y * gridDim.y) {
i = counter_row * pixel_shift;
if (i > height)
break;
for (counter_col = threadIdx.x + blockDim.x * blockIdx.x;
counter_col < num_of_patch_col;
counter_col += blockDim.x * gridDim.x) {
j = counter_col * pixel_shift;
if (j > width)
break;
/* ... */
}
}
I have also exchanged the x/y fields of the execution parameters between the inner and the outer loop, as it seemed more appropriate considering that the x field is continuous in warps (memory access benefits).
Reference with this Question & answer by #Decade Moon
How can i use that method for generate image from byte array instead of image file.
i tried like below but nothing works. no image are shown
std::vector<char> data= std::vector<char>(imgx->Height * imgx->Width * 4);
int offset;
for (int row = 0; row < imgx->Height; row++)
{
for (int col = 0; col < imgx->Width; col++)
{
offset = (row * (int)(imgx->Width * 4)) + (col * 4);
data[offset] = 0x58; // Red
data[offset + 1] = 0x58; // Green
data[offset + 2] = 0x58; // Blue
data[offset + 3] = 0x58; // Alpha
}
};
My approach is little bit different from the reply you reffered to, but it works pretty well.
#include <wrl.h>
#include <robuffer.h>
using namespace Windows::UI::Xaml::Media::Imaging;
using namespace Windows::Storage::Streams;
using namespace Microsoft::WRL;
typedef uint8 byte;
byte* GetPointerToPixelData(IBuffer^ pixelBuffer, unsigned int *length)
{
if (length != nullptr)
{
*length = pixelBuffer ->Length;
}
// Query the IBufferByteAccess interface.
ComPtr<IBufferByteAccess> bufferByteAccess;
reinterpret_cast<IInspectable*>(pixelBuffer)->QueryInterface(IID_PPV_ARGS(&bufferByteAccess));
// Retrieve the buffer data.
byte* pixels = nullptr;
bufferByteAccess->Buffer(&pixels);
return pixels;
}
MainPage::MainPage()
{
InitializeComponent();
auto bitmap = ref new WriteableBitmap(50, 50);
image->Source = bitmap;
unsigned int length;
byte* sourcePixels = GetPointerToPixelData(bitmap->PixelBuffer, &length);
const unsigned int width = bitmap->PixelWidth;
const unsigned int height = bitmap->PixelHeight;
create_async([this, width, height, sourcePixels] {
byte* temp = sourcePixels;
// generate RED - BLUE gradient
for(unsigned int k = 0; k < height; k++) {
for (unsigned int i = 0; i < (width * 4); i += 4) {
int pos = k * (width * 4) + (i);
temp[pos] = (byte)(0xFF * k / (float)height); // B
temp[pos + 1] = 0x0; // G
temp[pos + 2] = 0xFF - (byte)(0xFF * k / (float)height); // R
temp[pos + 3] = 0xFF; // A
}
}
});
}
I want to find dominant N colors on the picture. For this purpose I decided to use KMeans algorithm. My project written on C, that is way I used cvKMeans2 algorithm. But it gives me very strange results. Then I decided to try kmeans algorithm on OpenCV C++. It gives me more accurate results. So, where is my fault? Could someone explain it to me?
1. I used this image for test.
2. Implementation on C.
#include <cv.h>
#include <highgui.h>
#define CLUSTERS 3
int main(int argc, char **argv) {
const char *filename = "test_12.jpg";
IplImage *tmp = cvLoadImage(filename);
if (!tmp) {
return -1;
}
IplImage *src = cvCloneImage(tmp);
cvCvtColor(tmp, src, CV_BGR2RGB);
CvMat *samples = cvCreateMat(src->height * src->width, 3, CV_32F);
for (int i = 0; i < samples->height; i++) {
samples->data.fl[i * 3 + 0] = (uchar) src->imageData[i * 3 + 0];
samples->data.fl[i * 3 + 1] = (uchar) src->imageData[i * 3 + 1];
samples->data.fl[i * 3 + 2] = (uchar) src->imageData[i * 3 + 2];
}
CvMat *labels = cvCreateMat(samples->height, 1, CV_32SC1);
CvMat *centers = cvCreateMat(CLUSTERS, 3, CV_32FC1);
int flags = 0;
int attempts = 5;
cvKMeans2(samples, CLUSTERS, labels,
cvTermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 10000, 0.005),
attempts, 0, flags, centers);
int rows = 40;
int cols = 300;
IplImage *des = cvCreateImage(cvSize(cols, rows), 8, 3);
int part = 4000;
int r = 0;
int u = 0;
for (int y = 0; y < 300; ++y) {
for (int x = 0; x < 40; ++x) {
if (u >= part) {
r++;
part = (r + 1) * part;
}
des->imageData[(300 * x + y) * 3 + 0] = static_cast<char>(centers->data.fl[r * 3 + 0]);
des->imageData[(300 * x + y) * 3 + 1] = static_cast<char>(centers->data.fl[r * 3 + 1]);
des->imageData[(300 * x + y) * 3 + 2] = static_cast<char>(centers->data.fl[r * 3 + 2]);
u++;
}
}
IplImage *dominant_colors = cvCloneImage(des);
cvCvtColor(des, dominant_colors, CV_BGR2RGB);
cvNamedWindow("dominant_colors", CV_WINDOW_AUTOSIZE);
cvShowImage("dominant_colors", dominant_colors);
cvWaitKey(0);
cvDestroyWindow("dominant_colors");
cvReleaseImage(&src);
cvReleaseImage(&des);
cvReleaseMat(&labels);
cvReleaseMat(&samples);
return 0;
}
3. Implementation on C++.
#include <cv.h>
#include <opencv/cv.hpp>
#define CLUSTERS 3
int main(int argc, char **argv) {
const cv::Mat &tmp = cv::imread("test_12.jpg");
cv::Mat src;
cv::cvtColor(tmp, src, CV_BGR2RGB);
cv::Mat samples(src.rows * src.cols, 3, CV_32F);
for (int y = 0; y < src.rows; y++)
for (int x = 0; x < src.cols; x++)
for (int z = 0; z < 3; z++)
samples.at<float>(y + x * src.rows, z) = src.at<cv::Vec3b>(y, x)[z];
int attempts = 5;
cv::Mat labels;
cv::Mat centers;
kmeans(samples, CLUSTERS, labels, cv::TermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 1000, 0.005),
attempts, cv::KMEANS_PP_CENTERS, centers);
cv::Mat colors(cv::Size(CLUSTERS * 100, 30), tmp.type());
int p = 100;
int cluster_id = 0;
for (int x = 0; x < CLUSTERS * 100; x++) {
for (int y = 0; y < 30; y++) {
if (x >= p) {
cluster_id++;
p = (cluster_id + 1) * 100;
}
colors.at<cv::Vec3b>(y, x)[0] = static_cast<uchar>(centers.at<float>(cluster_id, 0));
colors.at<cv::Vec3b>(y, x)[1] = static_cast<uchar>(centers.at<float>(cluster_id, 1));
colors.at<cv::Vec3b>(y, x)[2] = static_cast<uchar>(centers.at<float>(cluster_id, 2));
}
}
cv::Mat dominant_colors;
cv::cvtColor(colors, dominant_colors, CV_RGB2BGR);
cv::imshow("dominant_colors", dominant_colors);
cv::waitKey(0);
return 0;
}
4. Result of code on C.
5. Result of code on C++.
I found my mistake. It is related to IplImage's widthStep field. As I read here widthStep gets padded up to a multiple of 4 for performance reasons. If widthStep is equal to 30 it will padded up to 32.
int h = src->height;
int w = src->width;
int c = 3;
int delta = 0;
for (int i = 0, y = 0; i < h; ++i) {
for (int j = 0; j < w; ++j) {
for (int k = 0; k < c; ++k, y++) {
samples->data.fl[i * w * c + c * j + k] = (uchar) src->imageData[delta + i * w * c + c * j + k];
}
}
delta += src->widthStep - src->width * src->nChannels;
}
With pointers
for (int x = 0, i = 0; x < src->height; ++x) {
auto *ptr = (uchar *) (src->imageData + x * src->widthStep);
for (int y = 0; y < src->width; ++y, i++) {
for (int j = 0; j < 3; ++j) {
samples->data.fl[i * 3 + j] = ptr[3 * y + j];
}
}
}
I am doing a small project in image processing using CUDA.I am trying to use Gaussian blurring to blur an image.Everything is fine but I cannot figure out why the kernel launch statement is showing this strange error:
Here is my complete code, if it can be of any help:
#include<time.h>
#include<stdlib.h>
#include<stdio.h>
#include<string.h>
#include<math.h>
#include<cuda_runtime.h>
#include<device_launch_parameters.h>
#include <helper_cuda.h>
#include <helper_cuda_gl.h>
#include<helper_image.h>
#include< helper_cuda_gl.h>
#include<helper_cuda_drvapi.h>
unsigned int width, height;
int mask[3][3] = { 1, 2, 1,
2, 3, 2,
1, 2, 1,
};
int getPixel(unsigned char *arr, int col, int row)
{
int sum = 0;
for (int j = -1; j <= 1; j++)
{
for (int i = -1; i <= 1; i++)
{
int color = arr[(row + j)*width + (col + i)];
sum += color*mask[i + 1][j + 1];
}
}
return sum / 15;
}
void h_blur(unsigned char * arr, unsigned char * result){
int offset = 2 * width;
for (int row = 2; row < height - 3; row++)
{
for (int col = 2; col < width - 3; col++)
{
result[offset + col] = getPixel(arr, col, row);
}
offset += width;
}
}
__global__ void d_blur(unsigned char *arr, unsigned char * result, int width, int height)
{
int col = blockIdx.x*blockDim.x + threadIdx.x;
int row = blockIdx.y*blockDim.y + threadIdx.y;
if (row < 2 || col < 2 || row >= height - 3 || col >= width - 3)
return;
int mask[3][3] = { 1, 2, 1, 2, 3, 2, 1, 2, 1 };
int sum = 0;
for (int j = -1; j <= 1; j++)
{
int color = arr[(row + j)*width + (col + i)];
sum += color*mask[i + 1][j + 1];
}
result[row*width + col] = sum / 15;
}
int main(int argc, char ** argv)
{
unsigned char *d_resultPixels;
unsigned char *h_resultPixels;
unsigned char *h_pixels = NULL;
unsigned char *d_pixels = NULL;
char *srcPath = "C:\ProgramData\NVIDIA Corporation\CUDA Samples\v6.5\3_Imaging\dxtc\data\lena_std.ppm";
char *h_ResultPath = "C:\ProgramData\NVIDIA Corporation\CUDA Samples\v6.5\3_Imaging\dxtc\data\lena_std.ppm";
char *d_ResultPath = "C:\ProgramData\NVIDIA Corporation\CUDA Samples\v6.5\3_Imaging\dxtc\data\lena_std.ppm";
sdkLoadPGM(srcPath, &h_pixels, &width, &height);
int ImageSize = sizeof(unsigned char) * width * height;
h_resultPixels = (unsigned char *)malloc(ImageSize);
cudaMalloc((void**)&d_pixels, ImageSize);
cudaMalloc((void**)&d_resultPixels, ImageSize);
cudaMemcpy(d_pixels, h_pixels, ImageSize, cudaMemcpyHostToDevice);
dim3 block(16, 16);
dim3 grid(width / 16, height / 16);
d_blur << < grid, block >> >(d_pixels, d_resultPixels, width, height);
cudaThreadSynchronize();
cudaMemcpy(h_resultPixels, d_resultPixels, ImageSize, cudaMemcpyDeviceToHost);
sdkSavePGM(d_ResultPath, h_resultPixels, width, height);
printf("Press enter to exit ...\n");
getchar();
}
As you are trying to run this in Visual Studio, you need to update the Intellisense. Also,you can refer the following link for a better Image Convolution Operation in CUDA.
2D Image Convolution in CUDA