I am going to use the grabcutNPP from cuda sample in order to speed up the image processing. The original sample code is implemented for FIBITMAP, but my input/output type will be Mat.
I had figured out most of the code but stuck in the cudaMemcpyDeviceToHost step...As a CUDA beginner, I have no reason why it always stops at this step
Here is part of my code :
void grabcutGPU(Mat& _src, Mat& _dst, Rect _srcRect){
GrabCut *grabcut;
const size_t width = _src.rows;
const size_t height = _src.cols;
size_t image_pitch;
size_t result_pitch;
size_t trimap_pitch;
uchar4 *gpu_src, *gpu_dst;
unsigned char *d_trimap;
NppiRect rect;
// rect to nppirect
rect.x = _srcRect.x;
rect.y = _srcRect.y;
rect.width = _srcRect.width;
rect.height = _srcRect.height;
//melloc for src_image
checkCudaErrors(cudaMallocPitch(&gpu_src, &image_pitch, width * sizeof(uchar4), height));
checkCudaErrors(cudaMemcpy2D(gpu_src, image_pitch, _src.ptr<uchar4>(), width * sizeof(uchar4), width * sizeof(uchar4), height, cudaMemcpyHostToDevice));
// melloc foe rect
checkCudaErrors(cudaMallocPitch(&d_trimap, &trimap_pitch, width, height));
// Setup GrabCut
grabcut = new GrabCut(gpu_src, (int)image_pitch, d_trimap, (int)trimap_pitch, width, height);
//rect to memory
checkCudaErrors(TrimapFromRect(d_trimap, (int)trimap_pitch, rect, width, height));
//grabcut segmentation
grabcut->computeSegmentationFromTrimap();
//melloc for dst_image
checkCudaErrors(cudaMallocPitch(&gpu_dst, &result_pitch, width * 4, height));
//GPU process
checkCudaErrors(ApplyMatte(2, gpu_dst, (int)result_pitch, gpu_src, (int)image_pitch, grabcut->getAlpha(), grabcut->getAlphaPitch(), width, height));
size_t output_pitch = result_pitch;
//send result to dst
checkCudaErrors(cudaMemcpy2D(_dst.ptr(), (int)output_pitch, gpu_dst, result_pitch, width * 4, height, cudaMemcpyDeviceToHost));
delete grabcut;
checkCudaErrors(cudaDeviceSynchronize(), "Kernel Launch Failed");
checkCudaErrors(cudaFree(gpu_src), "CUDA Free Failed");
checkCudaErrors(cudaFree(gpu_dst));
checkCudaErrors(cudaFree(d_trimap), "CUDA Free Failed");}
This question is solved.
First of all, the row and col are carelessly mistaken.
Then, the input mat were 3 channels but this function needs 4 channels mat for passing result. It could be solved by converting colour type.
Thanks to Micka, or I might never notice the channel problem.
Related
I'm using libtiff to read Image data into an array. I have the following code
std::vector <uint32>> image;
uint32 width;
uint32 height;
TIFFGetField(tif, TIFFTAG_IMAGEWIDTH, &width);
TIFFGetField(tif, TIFFTAG_IMAGELENGTH, &height);
uint32 npixels = width * height;
uint32* raster;
raster = (uint32*)_TIFFmalloc(npixels * sizeof(uint32));
if (TIFFReadRGBAImageOriented(tif, width, height, raster, ORIENTATION_TOPLEFT, 0) == 1)
{
std::cout << "success" << std:endl;
}
This code works. However, what I actually want is to reduce my width and height so that only a cropped part of the image is read into the raster. Thus my actual code for npixels is:
uint32 npixels = (width -100) * (height -100);
When I try to run this, I get an:
Exception Error at 0x00007FFEC7A2FC4E (tiff.dll): Access violation when trying to write at position 0x00000251B12C7000
In the libtiff documentation it says:
The raster is assumed to be an array of width times height 32-bit entries, where width must be less than or equal to the width of the image (height may be any non-zero size). If the raster dimensions are smaller than the image, the image data is cropped to the raster bounds.
based on that I thought reducing npixels does the trick... How do I cut the right and lower part of the image I want to write into my raster?
You just changed the number of elements in allocated buffer, but still try to read the image of original size, thus you get access violation since the buffer is overflown. To get the cropping you should pass correct width and height to TIFFReadRGBAImageOriented as well:
uint32 nwidth = width - 100;
uint32 nheight = height - 100;
uint32 npixels = nwidth * nheight;
raster = (uint32*)_TIFFmalloc(npixels * sizeof(uint32));
if (TIFFReadRGBAImageOriented(tif, nwidth, nheight, raster, ORIENTATION_TOPLEFT, 0) == 1)
{
std::cout << "success" << std:endl;
}
I am reading the raw video data from the read buffer using
cv::Mat imgbuf(Size(640, 480), CV_8UC3, &mem[0], (640*3));
This variable imgbuf I am passing to face detection algorithm which detects the face & draws the rectangle around the face. after that I am getting output something like
I tried with below code where I am performing resize operation before pass to face detection algorithm. by using this method it is working fine. but without resizing function I am getting noticeable output with rectangle around the face.
while(1)
{
unsigned char *mem = (unsigned char*)mmap(NULL, page_offset + len,
PROT_READ |PROT_WRITE, MAP_PRIVATE, fd, page_base);
cv::Mat imgbuf(Size(640, 480), CV_8UC3, &mem[0], (640*3));
cv::resize(imgbuf,imgbuf,(640,480)); //Dummy function to get the right output.
auto result = v->facedetection(imgbuf);
for (const auto &r : result.rects) {
cv::rectangle(imgbuf,cv::Rect{ cv::Point(r.x * imgbuf.cols, r.y *
imgbuf.rows),cv::Size{(int)(r.width * imgbuf.cols), (int)(r.height *
imgbuf.rows) } },0xff);
}
imshow("face-detection", imgbuf);
waitKey(1);
can anybody help be to sort out this problem
Test this method:
unsigned char *mem = (unsigned char*)mmap(NULL, page_offset + len,
PROT_READ |PROT_WRITE, MAP_PRIVATE, fd, page_base);
cv::Mat imgbuf(480,640, CV_8UC3, &mem[0]);
cv::Mat img_2, img_3;
cv::resize(imgbuf,img_2,cv::Size(640,480));
img_2.copyTo(img_3);
auto result = v->facedetection(img_2);
for (const auto &r : result.rects)
{
cv::Rect myR = cv::Rect(r.x * img_2.cols, r.y * img_2.rows, (int)(r.width * img_2.cols),
(int)(r.height * img_2.rows));
cv::rectangle(img_3,myR,Scalar(0, 0, 255), 1);
}
imshow("Result", img_3);
waitKey(0);
After getting a valid result you can optimize this and use less of "Mat"s.
TLDR;
For anyone arriving here whilst trying to figure out how to do gaussian blur or grayscale with OpenCL, the final working code is here. Note that in that repo I'm actually running the whole thing inside Docker with GPU access using Nvidia's Docker wrapper. You can look inside the 'Dockerfile' for the steps that need to be taken to get the code running, or just run it using Nvidia-Docker if you have that setup and are running on an Nvidia GPU.
Original Question:
Using the following kernel in an OpenCL image filter application I get the expected result, that is, a returned grayscale version of the input image:
const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
CLK_ADDRESS_CLAMP_TO_EDGE |
CLK_FILTER_NEAREST;
__kernel void process(__read_only image2d_t src,
__write_only image2d_t dst)
{
int x = get_global_id(0);
int y = get_global_id(1);
float4 color;
color = read_imagef(src, sampler, (int2)(x, y));
float gray = (color.x + color.y + color.z) / 3;
write_imagef(dst, (int2)(x,y), (float4)(gray, gray, gray, 0));
}
So far, so good. I then tried to create a kernel that would just copy across the top and left border of the image:
const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
CLK_ADDRESS_CLAMP_TO_EDGE |
CLK_FILTER_NEAREST;
__kernel void process(__read_only image2d_t src,
__write_only image2d_t dst)
{
int x = get_global_id(0);
int y = get_global_id(1);
float4 color;
if (x < 10 || y < 10)
{
color = read_imagef(src, sampler, (int2)(x, y));
write_imagef(dst, (int2)(x,y), (float4)(color.x, color.y, color.z, 0));
}
else
{
write_imagef(dst, (int2)(x,y), (float4)(0,0,0,0));
}
}
The returned image is not what I expected:
I'm loading the input image this way:
// Load an image using the OpenCV library and create an OpenCL
// image out of it
cl::Image2D LoadImage(cl::Context context, char *fileName, int &width, int &height)
{
cv::Mat image = cv::imread(fileName, CV_LOAD_IMAGE_COLOR);
cv::Mat imageRGBA;
width = image.rows;
height = image.cols;
cv::cvtColor(image, imageRGBA, CV_RGB2RGBA);
char *buffer = reinterpret_cast<char *>(imageRGBA.data);
cl::Image2D clImage(context,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
cl::ImageFormat(CL_RGBA, CL_UNORM_INT8),
width,
height,
0,
buffer);
return clImage;
}
The output image:
cl::Image2D imageOutput(context,
CL_MEM_WRITE_ONLY,
cl::ImageFormat(CL_RGBA, CL_UNORM_INT8),
width,
height,
0,
NULL);
The Kernel:
cl::Program program(context, util::loadProgram("border.cl"), true);
cl::make_kernel<cl::Image2D, cl::Image2D> filter(program, "process");
cl::NDRange global(width, height);
filter(cl::EnqueueArgs(queue, global), clImageInput, imageOutput);
Then reading the image back:
cl::size_t<3> origin;
origin[0] = 0; origin[1] = 0, origin[2] = 0;
cl::size_t<3> region;
region[0] = width; region[1] = height; region[2] = 1;
float* oup = new float[width * height];
queue.enqueueReadImage(imageOutput, CL_TRUE, origin, region, 0, 0, oup);
cv::imwrite(filename_out, cv::Mat(width, height, CV_8UC4, oup));
Why is the image being processed the way it is? Only selecting pixels with a y coordinate less than 10 seems to work, but selecting pixels with an x coordinate less than 10 seems to stagger across the image.
if I write a test image using the following line in the kernel:
write_imagef(dst, (int2)(x,y), (float4)((float)x / 512.0f, 0, 0, 0));
I get the following image:
The first strange thing is that the blue channel is being set, not the red. I have no idea why as I am alway loading and saving the image in RGBA order. Secondly, the banding is very unusual, I'm not sure how to interpret this.
If I use the following line in the kernel:
write_imagef(dst, (int2)(x,y), (float4)(0, (float)y / 512.0f, 0, 0));
I get the following image:
This looks the way I would expect.
I can provide more code if necessary but using the grayscale kernel in the exact same harness works perfectly. As does another kernel not listed here which simply copies all the pixels across.
I'm running the code on and Nvidia Geforce 980M with OpenCL 1.2
I'm not seeing anything obvious yet. One strange thing: your image is CL_RGBA, CL_UNORM_INT8 but you're reading it out into an array of floats? How are you displaying it from that? Second, I'm not famliar with your kernel launch technique; what is filter and is it launching with dimension of 2? Regarding the issue you're seeing, I'd suggest using process of elimination to figure out where the problem lies. For example, (1) if you remove the conditional and copy all pixels, do you get the whole image? (2) Instead of writing black where the conditional is false, what if you write a Red channel gradient based on X position and a Green channel gradient based on Y position. Do you get a double gradient? Based on results, continue to divide the problem until you find the cause. It looks a lot like a row pitch issue, perhaps in the display function?
Ok, so the issue was the way I was reading height and width was backwards, i.e.
width = image.rows;
height = image.cols;
Should have been
height = image.rows;
width = image.cols;
With this corrected, the rest of the code can stay the same, except the last line where I save the image to disk, here the values need to be swapped again, i.e.
cv::imwrite(filename_out, cv::Mat(width, height, CV_8UC4, oup));
Needs to change to:
cv::imwrite(filename_out, cv::Mat(height, width, CV_8UC4, oup));
I think this ultimately comes down to the matrix approach to an image where the first coordinate is actually the row number, which is the height and the second coordinate is the column number, which is the width.
The diagnostics #Dithermaster mentioned really helped, as did printing out the assumed width and height, which was ultimately incorrect.
It's interesting that by having both of those errors in the code a pixel for pixel copy worked fine, but once you start to perform actions based on the x,y coordinates you get some really funky results.
I know how to pass putText position and font size:
void TextBox( cv::Mat & img, const std::string & text, const cv::Rect & bbox )
{
cv::Point position;
double size;
int face = CV_FONT_HERSHEY_PLAIN;
Trick( /*in:*/ text, bbox, face /*out:*/ position, size );
cv::putText( img, text, position, face, size, cv::Scalar( 100, 255, 100 ) );
}
How to do the trick?
I would like to scale text to fit its bounding box.
(Font face might be unused input to that.)
It's a bit tricky, but you can play with cv::getTextSize(). There is a sample code in the description of the function but it only renders some text, the tight box (surrounding the text), and the baseline of it.
If you'd like to render the text in an arbitrary ROI of an image then you first need to render it into another image (which fits to the text size), resize it to the ROI desired and then put it over the image, such as below:
void PutText(cv::Mat& img, const std::string& text, const cv::Rect& roi, const cv::Scalar& color, int fontFace, double fontScale, int thickness = 1, int lineType = 8)
{
CV_Assert(!img.empty() && (img.type() == CV_8UC3 || img.type() == CV_8UC1));
CV_Assert(roi.area() > 0);
CV_Assert(!text.empty());
int baseline = 0;
// Calculates the width and height of a text string
cv::Size textSize = cv::getTextSize(text, fontFace, fontScale, thickness, &baseline);
// Y-coordinate of the baseline relative to the bottom-most text point
baseline += thickness;
// Render the text over here (fits to the text size)
cv::Mat textImg(textSize.height + baseline, textSize.width, img.type());
if (color == cv::Scalar::all(0)) textImg = cv::Scalar::all(255);
else textImg = cv::Scalar::all(0);
// Estimating the resolution of bounding image
cv::Point textOrg((textImg.cols - textSize.width) / 2, (textImg.rows + textSize.height - baseline) / 2);
// TR and BL points of the bounding box
cv::Point tr(textOrg.x, textOrg.y + baseline);
cv::Point bl(textOrg.x + textSize.width, textOrg.y - textSize.height);
cv::putText(textImg, text, textOrg, fontFace, fontScale, color, thickness);
// Resizing according to the ROI
cv::resize(textImg, textImg, roi.size());
cv::Mat textImgMask = textImg;
if (textImgMask.type() == CV_8UC3)
cv::cvtColor(textImgMask, textImgMask, cv::COLOR_BGR2GRAY);
// Creating the mask
cv::equalizeHist(textImgMask, textImgMask);
if (color == cv::Scalar::all(0)) cv::threshold(textImgMask, textImgMask, 1, 255, cv::THRESH_BINARY_INV);
else cv::threshold(textImgMask, textImgMask, 254, 255, cv::THRESH_BINARY);
// Put into the original image
cv::Mat destRoi = img(roi);
textImg.copyTo(destRoi, textImgMask);
}
And call it like:
cv::Mat image = cv::imread("C:/opencv_logo.png");
cv::Rect roi(5, 5, image.cols - 5, image.rows - 5);
cv::Scalar color(255, 0, 0);
int fontFace = cv::FONT_HERSHEY_SCRIPT_SIMPLEX;
double fontScale = 2.5;
int thickness = 2;
PutText(image, "OpenCV", roi, color, fontFace, fontScale, thickness);
As a result of the PutText() function you can render any text over an arbitrary ROI of the image, such as:
Hope it helps and works :)
Update #1:
And keep in mind that text rendering (with or without this trick) in OpenCV is very expensive and can affect to the runtime of your applications. Other libraries may outperform the OpenCVs rendering system.
I'm new with OpenCV library, and I would like to use it to detect circles in a video stream captured from an iPad's back camera. I figured out how to do it and with OpenCV 2.4.2, it can be done in less than 10 lines of code. But it doesn't work for me, and I think I missed something because of some weird behaviours I obtain.
The code is very simple and begins in the Objective-C callback triggers each time a new frame is captured by the camera. Here is what I do in this callback:
- (void)captureOutput:(AVCaptureOutput *)captureOutput
didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer
fromConnection:(AVCaptureConnection *)connection
{
// Convert CMSampleBufferRef to CVImageBufferRef
CVImageBufferRef imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
// Lock pixel buffer
CVPixelBufferLockBaseAddress(imageBuffer, kCVPixelBufferLock_ReadOnly);
// Construct VideoFrame struct
uint8_t *baseAddress = (uint8_t*)CVPixelBufferGetBaseAddress(imageBuffer);
size_t width = CVPixelBufferGetWidth(imageBuffer);
size_t height = CVPixelBufferGetHeight(imageBuffer);
size_t stride = CVPixelBufferGetBytesPerRow(imageBuffer);
// Unlock pixel buffer
CVPixelBufferUnlockBaseAddress(imageBuffer, 0);
std::vector<unsigned char> data(baseAddress, baseAddress + (stride * height));
// Call C++ function with these arguments => (data, (int)width, (int)height)
}
And here is the C++ function that process the image with OpenCV:
void proccessImage(std::vector<unsigned char>& imageData, int width, int height)
{
// Create cv::Mat from std::vector<unsigned char>
Mat src(width, height, CV_8UC4, const_cast<unsigned char*>(imageData.data()));
Mat final;
// Draw a circle at position (300, 200) with a radius of 30
cv::Point center(300, 200);
circle(src, center, 30.f, CV_RGB(0, 0, 255), 3, 8, 0);
// Convert the gray image to RGBA
cvtColor(src, final, CV_BGRA2RGBA);
// Reform the std::vector from cv::Mat data
std::vector<unsigned char> array;
array.assign((unsigned char*)final.datastart, (unsigned char*)final.dataend);
// Send final image data to GPU and draw it
}
The image retrieve from iPad's back camera is in BGRA (32 bits) format.
What I expected was an image from the iPad's back camera with a simple circle drawn at the position x = 300px, y = 200px and with a radius of 30px.
And this is what I got: http://i.stack.imgur.com/bWfwa.jpg
Do you know what is wrong with my code?
Thanks in advance.
Thanks for your help, I finally figured out what happen, and it's my entire fault...
When you create a new Mat you need to pass it the image's height as first argument, and not width. The circle is drawn properly if I switch the arguments.