Texture object fetching in CUDA - c++

I can find many examples online that use CUDA texture references, but not so many that rely on texture objects. I am trying to understand why my code below always fetches 0 rather than my input texture. Am I missing something, or using a wrong setting? I simplified it as much as I could:
#include <stdio.h>
__global__ void fetch(cudaTextureObject_t tex, std::size_t width, std::size_t height)
{
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
float u = (i + 0.5f) / width;
float v = (j + 0.5f) / height;
auto p = tex2D<uchar4>(tex, u, v);
printf("i=%d, j=%d -> u=%3.2f, v=%3.2f, r=%d, g=%d, b=%d, a=%d\n", i, j, u, v, p.x, p.y, p.z, p.w);
// -> always returns p = {0, 0, 0, 0}
}
}
}
int main() {
constexpr std::size_t width = 2;
constexpr std::size_t height = 2;
// creating a dummy texture
uchar4 image[width*height];
for(std::size_t j = 0; j < height; ++j) {
for(std::size_t i = 0; i < width; ++i)
image[j*width+i] = make_uchar4(255*j/height, 255*i/width, 55, 255);
}
cudaArray_t cuArray;
auto channelDesc = cudaCreateChannelDesc<uchar4>();
cudaMallocArray(&cuArray, &channelDesc, width, height);
cudaMemcpy2DToArray(cuArray, 0, 0, image, width*sizeof(uchar4), width*sizeof(uchar4), height, cudaMemcpyHostToDevice);
struct cudaResourceDesc resDesc;
memset(&resDesc, 0, sizeof(resDesc));
resDesc.resType = cudaResourceTypeArray;
resDesc.res.array.array = cuArray;
struct cudaTextureDesc texDesc;
memset(&texDesc, 0, sizeof(texDesc));
texDesc.addressMode[0] = cudaAddressModeBorder;
texDesc.addressMode[1] = cudaAddressModeBorder;
texDesc.filterMode = cudaFilterModeLinear;
texDesc.readMode = cudaReadModeElementType;
texDesc.normalizedCoords = 1;
cudaTextureObject_t texObj = 0;
cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL);
fetch<<<1, 1>>>(texObj, width, height);
cudaDeviceSynchronize();
cudaDestroyTextureObject(texObj);
cudaFreeArray(cuArray);
return 0;
}

In your code you specify the texture description as
texDesc.addressMode[0] = cudaAddressModeBorder;
texDesc.addressMode[1] = cudaAddressModeBorder;
texDesc.filterMode = cudaFilterModeLinear;
texDesc.readMode = cudaReadModeElementType;
texDesc.normalizedCoords = 1;
and the array holding the texture data is defined as
auto channelDesc = cudaCreateChannelDesc<uchar4>();
Quoting the documentation
Linear Filtering
In this filtering mode, which is only available for floating-point textures ......
You have a uchar4 texture. You can't use linear filtering on an integer texture. Either change to a floating point texture type or use another read mode.

Related

how to use GL_RGB9_E5 format?

I am trying to use GL_RGB9_E5 format with 3D texture. For this I have created simple test to understand the usage of format. Somehow I am not getting what I expect. Following is test program
GLuint texture;
const unsigned int depth = 1;
const unsigned int width = 7;
const unsigned int height = 3;
glGenTextures(1, &texture);
glBindTexture(GL_TEXTURE_3D, texture);
const float color[3] = { 0.0f, 0.5f, 0.0f };
const rgb9e5 uColor = float3_to_rgb9e5(color);
GLuint * t1 = (GLuint*)malloc(width*height*depth* sizeof(GLuint));
GLuint * t2 = (GLuint*)malloc(width*height*depth* sizeof(GLuint));
int index = 0;
for (int i = 0; i < depth; i++)
for (int j = 0; j < width; j++)
for (int k = 0; k < height; k++)
{
t1[index] = uColor.raw;
index++;
}
glTexImage3D(GL_TEXTURE_3D, 0,
GL_RGB9_E5,
width, height, depth, 0,
GL_RGB,
GL_UNSIGNED_INT,
(void*)t1);
glGetTexImage(GL_TEXTURE_3D, 0, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV, t2);
index = 0;
for (int i = 0; i < depth; i++)
for (int j = 0; j < width; j++)
for (int k = 0; k < height; k++)
{
rgb9e5 t;
t.raw = t2[index];
index++;
}
What I am expecting is what ever uColor.raw I am putting in t1 I get back same raw color in t2 .
I have taken float3_to_rgb9e5 method from Specification for GL_RGB9_E5
at bottom you can find code.
It would be great If someone can give me example for how to use it correctly or correct way of doing it.
GL_RGB,
GL_UNSIGNED_INT,
This means that you are passing 3-channel pixel data, where each channel is a normalized, 32-bit unsigned integer. But that's not what you're actually doing.
You're actually passing 3 channel data, which is packed into a 32-bit unsigned integer, such that the first 5 bits represent a floating-point exponent, followed by 3 sets of 9-bits, each representing the unsigned mantissa of a float. We spell that:
GL_RGB,
GL_UNSIGNED_INT_5_9_9_9_REV

How do I create a dynamic array of arrays (of arrays)?

I'm trying to create a dynamic array of arrays (of arrays). But for some reason the data gets corrupted. I'm using the data to generate a texture in a OpenGL application.
The following code works fine:
unsigned char imageData[64][64][3];
for (int i = 0; i < 64; i++)
{
for (int j = 0; j < 64; j++)
{
unsigned char r = 0, g = 0, b = 0;
if (i < 32)
{
if (j < 32)
r = 255;
else
b = 255;
}
else
{
if (j < 32)
g = 255;
}
imageData[i][j][0] = r;
imageData[i][j][1] = g;
imageData[i][j][2] = b;
}
std::cout << std::endl;
}
glTexImage2D(target, 0, GL_RGB, 64, 64, 0, GL_RGB, GL_UNSIGNED_BYTE, imageData);
Problem is, I want to be able to create a texture of any size (not just 64*64). So I'm trying this:
unsigned char*** imageData = new unsigned char**[64]();
for (int i = 0; i < 64; i++)
{
imageData[i] = new unsigned char*[64]();
for (int j = 0; j < 64; j++)
{
imageData[i][j] = new unsigned char[3]();
unsigned char r = 0, g = 0, b = 0;
if (i < 32)
{
if (j < 32)
r = 255;
else
b = 255;
}
else
{
if (j < 32)
g = 255;
}
imageData[i][j][0] = r;
imageData[i][j][1] = g;
imageData[i][j][2] = b;
}
std::cout << std::endl;
}
glTexImage2D(target, 0, GL_RGB, 64, 64, 0, GL_RGB, GL_UNSIGNED_BYTE, imageData);
But that doesn't work, the image gets all messed up so I assume I'm creating the array of arrays (of arrays) incorrectly? What am I doing wrong?
Also, I guess I should be using vectors instead. But how can I cast the vector of vectors of vectors data into a (void *) ?
This line contains multiple bugs:
unsigned char* pixel = &(imageData[(y * height) + x]);
You should multiply x by height and add y. And there's also the fact that each pixel is actually 3 bytes. Some issues that led to this bug in your code (and will lead to to others)
You should also be using std::vector. You can call std::vector::data to get a pointer to the underlying data to interface to C API's.
You should have a class that represents a pixel. This will handle the offsetting correctly and give things names and made the code clearer.
Whenever you are working with a multi dimensional array that you encode into a single dimensional one, you should try to carefully write an access function that takes care of indexing so you can test it separately.
(end bulleted list... oh SO).
struct Pixel {
unsigned char red;
unsigned char blue;
unsigned char green;
};
struct TwoDimPixelArray {
TwoDimArray(int width, int height)
: m_width(width), m_height(height)
{
m_vector.resize(m_width * m_height);
}
Pixel& get(int x, int y) {
return m_vector[x*height + y];
}
Pixel* data() { return m_vector.data(); }
private:
int m_width;
int m_height;
std::vector<Pixel> m_vector;
}
int width = 64;
int height = 64;
TwoDimPixelArray imageData(width, height);
for (int x = 0; x != width ; ++ x) {
for (int y = 0; y != height ; ++y) {
auto& pixel = imageData.get(x, y);
// ... pixel.red = something, pixel.blue = something, etc
}
}
glTexImage2D(target, 0, GL_RGB, 64, 64, 0, GL_RGB, GL_UNSIGNED_BYTE, imageData.data());
You need to use continuous memory for it to work with opengl.
My solution is inspired by previous answers, with a different indexing system
unsigned char* imageData = new unsigned char[width*height*3];
unsigned char r, g, b;
const unsigned int row_size_bytes = width * 3;
for( unsigned int x = 0; x < width; x++ ) {
unsigned int current_row_offset_bytes = x * 3;
for( unsigned int y = 0; y < height; y++ ) {
unsigned int one_dim_offset = y * row_size_bytes + current_row_offset_bytes
unsigned char* pixel = &(imageData[one_dim_offset]);
pixel[0] = r;
pixel[1] = g;
pixel[2] = b;
}
}
Unfortunnately it's untested, but i'm confident assuming sizeof(char) is 1.

Texture turns to gray in opengl when added alpha channel manually

int j,k,t3,t4;
for (int i = 0;i < width;i++)
for (j = 0;j < height;j++)
{
t3 = (i*height + j)*3 ;
t4 = (i*height + j) * 4;
for (k = 0;k < 3;k++)
texture[t4+k] = data[t3+k];
texture[t4 + 3] = (data[t3 + 1]==255 && data[t3 + 2]==255 && data[t3]==255) ? 0 : 255;
}
GLuint textureID;
glGenTextures(1, &textureID);
glBindTexture(GL_TEXTURE_2D, textureID);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_BGRA, GL_UNSIGNED_BYTE, texture);
It's a Bitmap file and I loaded it successfully with the original data.
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, width, height, 0, GL_BGR, GL_UNSIGNED_BYTE, data);
But when I added the Alpha channel manually the texture losts its color.The picture is attached below.
I can't quite explain the exact symptoms, but the index arithmetic in your code looks off:
for (int i = 0;i < width;i++)
for (j = 0;j < height;j++)
{
t3 = (i*height + j)*3 ;
t4 = (i*height + j) * 4;
Since images are normally laid out in memory row by row, the index that iterates over the pixels in the row should be the one added without a multiplier, while the other index is multiplied by width (not height). This should be:
for (j = 0; j < height; j++)
for (int i = 0; i < width; i++)
{
t3 = (j * width + i) * 3;
t4 = (j * width + i) * 4;

CImg: Image binarization result fails

So, the problem in my following code is that the result of the image binarization becomes too dark. (There was even an example image I have whose binary image becomes wholly black.)
I have been searching any mistake in my code for a very long time, and have found none that seemingly looks problematic to me.
Below is the image I want to binarize:
Image before binarized - in my code is named: "hildebrantmed.bmp"
Below is the resulting binary image:
Image after binarized
Before I show you my source code, here are the 'rules' in the image binarization (since this is an assignment I recently got):
I am not allowed to use any other libraries than CImg.
The programming language to use is C/C++. Not any other else.
Normally, the Otsu's method is the choice. However, I may be allowed to use other algorithms if it is better.
Lastly, here is my source code:
#include <iostream>
#include <CImg.h>
using namespace std;
using namespace cimg_library;
/**
* Generate histogram of the grayscale image
*/
int * generate_histogram(CImg<unsigned char> img)
{
int histogram[256];
// initialize default values for histogram
for (int i = 0; i < 256; i++)
{
histogram[i] = 0;
}
// increment intensity for histogram
for (int i = 0; i < img.height(); i++)
{
for (int j = 0; j < img.width(); j++)
{
int gray_value = img(j, i, 0, 0);
histogram[gray_value]++;
}
}
return histogram;
}
/**
* Find threshold value from the grayscale image's histogram
*/
int otsu_threshold(CImg<unsigned char> img)
{
int * histogram = generate_histogram(img); // image histogram
int total = img.width() * img.height(); // total pixels
double sum = 0;
int i;
for (i = 0; i < 256; i++)
{
sum += i * histogram[i];
}
double sumB = 0;
int wB = 0;
int wF = 0;
double var_max = 0;
int threshold = 0;
for (i = 0; i < 256; i++)
{
wB += histogram[i];
if (wB == 0) continue;
wF = total - wB;
if (wF == 0) continue;
sumB += (double)(i * histogram[i]);
double mB = sumB / wB;
double mF = (sum - sumB) / wF;
double var_between = (double)wB * (double)wF * (mB - mF) * (mB - mF);
if (var_between > var_max)
{
var_max = var_between;
threshold = i;
}
}
return threshold;
}
/**
* Main function
*/
int main(int argc, char * argv[])
{
// retrieve image from its path
CImg<unsigned char> img("hildebrantmed.bmp");
const int width = img.width();
const int height = img.height();
// initialize a new image for img's grayscale
CImg<unsigned char> gray_img(width, height, 1, 1, 0);
// from RGB divided into three separate channels
CImg<unsigned char> imgR(width, height, 1, 3, 0);
CImg<unsigned char> imgG(width, height, 1, 3, 0);
CImg<unsigned char> imgB(width, height, 1, 3, 0);
// for all (x, y) pixels in image
cimg_forXY(img, x, y)
{
imgR(x, y, 0, 0) = img(x, y, 0, 0),
imgG(x, y, 0, 1) = img(x, y, 0, 1),
imgB(x, y, 0, 2) = img(x, y, 0, 2);
// separate the channels
int R = (int)img(x, y, 0, 0);
int G = (int)img(x, y, 0, 1);
int B = (int)img(x, y, 0, 2);
// obtain gray value from different weights of RGB channels
int gray_value = (int)(0.299 * R + 0.587 * G + 0.114 * B);
gray_img(x, y, 0, 0) = gray_value;
}
// find threshold of grayscale image
int threshold = otsu_threshold(gray_img);
// initialize a binary image version of img
CImg<unsigned char> binary_img(width, height, 1, 1, 0);
// for every (x, y) pixel in gray_img
cimg_forXY(img, x, y)
{
int gray_value = gray_img(x, y, 0, 0);
// COMPARE gray_value with threshold
int binary_value;
// gray_value > threshold: 255 (white)
if (gray_value > threshold) binary_value = 255;
// gray_value < threshold: 0 (black)
else binary_value = 0;
// assign binary_value to each of binary_img's pixels
binary_img(x, y, 0, 0) = binary_value;
}
// display the images
CImgDisplay src_disp(img, "Source image");
CImgDisplay gray_disp(gray_img, "Grayscale image");
CImgDisplay binary_disp(binary_img, "Binary image");
while (!src_disp.is_closed() && !gray_disp.is_closed() && !binary_disp.is_closed())
{
src_disp.wait();
gray_disp.wait();
}
return 0;
}
If you find that another algorithm would work better, please provide with the algorithm and source code in your answer. Thanks for your attention.
First error: you're trying to return an array's pointer which actually gets destroyed as soon as the generate_histogram function ends.
To make it work properly, you should supply the pointer to an array from the calling function, something like:
{
//[....]
int histogram[256];
generate_histogram(img, histogram);
//[....]
}
int * generate_histogram(CImg<unsigned char> img, int* arHistogram)
{
//[....]
}

Displaying Kinect streams using OpenCV (C++)

I'm trying to get every frame of the stream produced by the RGB camera of the Kinect (using SDK version 1.8) into an OpenCV (2.4.10) Mat_<Vec3b>. This is my current algorithm, which is not at all fast:
Mat_<Vec3b> mat = Mat::zeros(480, 640, CV_8UC3);
NUI_IMAGE_FRAME imageFrame;
NUI_LOCKED_RECT lockedRect;
if (sensor->NuiImageStreamGetNextFrame(colorStream, 0, &imageFrame) < 0) { return; }
INuiFrameTexture* texture = imageFrame.pFrameTexture;
texture->LockRect(0, &lockedRect, NULL, 0);
if (lockedRect.Pitch != 0)
{
BYTE* upperLeftCorner = (BYTE*)lockedRect.pBits;
BYTE* pointerToTheByteBeingRead = upperLeftCorner;
for (int i = 0; i < 480; i++)
{
for (int j = 0; j < 640; j++)
{
unsigned char r = *pointerToTheByteBeingRead;
pointerToTheByteBeingRead += 1;
unsigned char g = *pointerToTheByteBeingRead;
pointerToTheByteBeingRead += 1;
unsigned char b = *pointerToTheByteBeingRead;
pointerToTheByteBeingRead += 2; //So to skip the alpha channel
mat.at<Vec3b>(Point(j, i))[0] = r;
mat.at<Vec3b>(Point(j, i))[1] = g;
mat.at<Vec3b>(Point(j, i))[2] = b;
}
}
}
texture->UnlockRect(0);
sensor->NuiImageStreamReleaseFrame(colorStream, &imageFrame);
I checked the OpenCV documentation and I understand I'm supposed to use pointer access to increase efficiency. Are Mat_<Vec3b>s stored into memory the same way as Mats or should I do some other pointer arithmetic?
Also, I understand updating every single pixel every time is not the most efficient way of achieving the display of the stream through a Mat. What other things could I do?
Finally figured out how to use pointer arithmetic. The code is self-explanatory:
Mat_<Vec3b> mat = Mat::zeros(480, 640, CV_8UC3);
NUI_IMAGE_FRAME imageFrame;
NUI_LOCKED_RECT lockedRect;
if (sensor->NuiImageStreamGetNextFrame(colorStream, 0, &imageFrame) < 0) { return; }
INuiFrameTexture* texture = imageFrame.pFrameTexture;
texture->LockRect(0, &lockedRect, NULL, 0);
if (lockedRect.Pitch != 0)
{
BYTE* upperLeftCorner = (BYTE*)lockedRect.pBits;
BYTE* pointerToTheByteBeingRead = upperLeftCorner;
for (int i = 0; i < 480; i++)
{
Vec3b *pointerToRow = mat.ptr<Vec3b>(i);
for (int j = 0; j < 640; j++)
{
unsigned char r = *pointerToTheByteBeingRead;
pointerToTheByteBeingRead += 1;
unsigned char g = *pointerToTheByteBeingRead;
pointerToTheByteBeingRead += 1;
unsigned char b = *pointerToTheByteBeingRead;
pointerToTheByteBeingRead += 2; //So to skip the alpha channel
pointerToRow[j] = Vec3b(r, g, b);
}
}
}
texture->UnlockRect(0);
sensor->NuiImageStreamReleaseFrame(colorStream, &imageFrame);