Halide Jit compilation

Halide Jit compilation - c++

Im trying to compile my halide program to jit to use it later in code few times on different images. But i think i making something wrong, can anyone correct me?
First I create halide function to run:
void m_gammaFunctionTMOGenerate()
{
Halide::ImageParam img(Halide::type_of<float>(), 3);
img.set_stride(0, 4);
img.set_stride(2, 1);
Halide::Var x, y, c;
Halide::Param<float> key, sat, clampMax, clampMin;
Halide::Param<bool> cS;
Halide::Func gamma;
// algorytm
//img.width() , img.height();
if (cS.get())
{
float k1 = 1.6774;
float k2 = 0.9925;
sat.set((1 + k1) * pow(key.get(), k2) / (1 + k1 * pow(key.get(), k2)));
}
Halide::Expr luminance = img(x, y, 0) * 0.072186f + img(x, y, 1) * 0.715158f + img(x, y, 2) * 0.212656f;
Halide::Expr ldr_lum = (luminance - clampMin) / (clampMax - clampMin);
Halide::clamp(ldr_lum, 0.f, 1.f);
ldr_lum = Halide::pow(ldr_lum, key);
Halide::Expr imLum = img(x, y, c) / luminance;
imLum = Halide::pow(imLum, sat) * ldr_lum;
Halide::clamp(imLum, 0.f, 1.f);
gamma(x, y, c) = imLum;
// rozkład
gamma.vectorize(x, 16).parallel(y);
// kompilacja
auto & obuff = gamma.output_buffer();
obuff.set_stride(0, 4);
obuff.set_stride(2, 1);
obuff.set_extent(2, 3);
std::vector<Halide::Argument> arguments = { img, key, sat, clampMax, clampMin, cS };
m_gammaFunction = (gammafunction)(gamma.compile_jit());
}
store it in pointer:
typedef int(*gammafunction)(buffer_t*, float, float, float, float, bool, buffer_t*);
gammafunction m_gammaFunction;
then i try to run it:
buffer_t output_buf = { 0 };
//// The host pointers point to the start of the image data:
buffer_t buf = { 0 };
buf.host = (uint8_t *)data; // Might also need const_cast
float * output = new float[width * height * 4];
output_buf.host = (uint8_t*)(output);
// // If the buffer doesn't start at (0, 0), then assign mins
output_buf.extent[0] = buf.extent[0] = width; // In elements, not bytes
output_buf.extent[1] = buf.extent[1] = height; // In elements, not bytes
output_buf.extent[2] = buf.extent[2] = 4; // Assuming RGBA
// // No need to assign additional extents as they were init'ed to zero above
output_buf.stride[0] = buf.stride[0] = 4; // RGBA interleaved
output_buf.stride[1] = buf.stride[1] = width * 4; // Assuming no line padding
output_buf.stride[2] = buf.stride[2] = 1; // Channel interleaved
output_buf.elem_size = buf.elem_size = sizeof(float);
// Run the pipeline
int error = m_photoFunction(&buf, params[0], &output_buf);
But it doesn't work...
Error:
Exception thrown at 0x000002974F552DE0 in Viewer.exe: 0xC0000005: Access violation executing location 0x000002974F552DE0.
If there is a handler for this exception, the program may be safely continued.
Edit:
Here is my code for running function:
buffer_t output_buf = { 0 };
//// The host pointers point to the start of the image data:
buffer_t buf = { 0 };
buf.host = (uint8_t *)data; // Might also need const_cast
float * output = new float[width * height * 4];
output_buf.host = (uint8_t*)(output);
// // If the buffer doesn't start at (0, 0), then assign mins
output_buf.extent[0] = buf.extent[0] = width; // In elements, not bytes
output_buf.extent[1] = buf.extent[1] = height; // In elements, not bytes
output_buf.extent[2] = buf.extent[2] = 3; // Assuming RGBA
// // No need to assign additional extents as they were init'ed to zero above
output_buf.stride[0] = buf.stride[0] = 4; // RGBA interleaved
output_buf.stride[1] = buf.stride[1] = width * 4; // Assuming no line padding
output_buf.stride[2] = buf.stride[2] = 1; // Channel interleaved
output_buf.elem_size = buf.elem_size = sizeof(float);
// Run the pipeline
int error = m_gammaFunction(&buf, params[0], params[1], params[2], params[3], params[4] > 0.5 ? true : false, &output_buf);
if (error) {
printf("Halide returned an error: %d\n", error);
return -1;
}
memcpy(output, data, size * sizeof(float));
can anyone help me with it?
Edit:
Thanks to #KhouriGiordano I found out what I was doing wrong. Indeed I switched from AOT compiling to this code. So now my code looks like that:
class GammaOperator
{
public:
GammaOperator();
int realize(buffer_t * input, float params[], buffer_t * output, int width);
private:
HalideFloat m_key;
HalideFloat m_sat;
HalideFloat m_clampMax;
HalideFloat m_clampMin;
HalideBool m_cS;
Halide::ImageParam m_img;
Halide::Var x, y, c;
Halide::Func m_gamma;
};
GammaOperator::GammaOperator()
: m_img( Halide::type_of<float>(), 3)
{
Halide::Expr w = (1.f + 1.6774f) * pow(m_key.get(), 0.9925f) / (1.f + 1.6774f * pow(m_key.get(), 0.9925f));
Halide::Expr sat = Halide::select(m_cS, m_sat, w);
Halide::Expr luminance = m_img(x, y, 0) * 0.072186f + m_img(x, y, 1) * 0.715158f + m_img(x, y, 2) * 0.212656f;
Halide::Expr ldr_lum = (luminance - m_clampMin) / (m_clampMax - m_clampMin);
ldr_lum = Halide::clamp(ldr_lum, 0.f, 1.f);
ldr_lum = Halide::pow(ldr_lum, m_key);
Halide::Expr imLum = m_img(x, y, c) / luminance;
imLum = Halide::pow(imLum, sat) * ldr_lum;
imLum = Halide::clamp(imLum, 0.f, 1.f);
m_gamma(x, y, c) = imLum;
}
int GammaOperator::realize(buffer_t * input, float params[], buffer_t * output, int width)
{
m_img.set(Halide::Buffer(Halide::type_of<float>(), input));
m_img.set_stride(0, 4);
m_img.set_stride(1, width * 4);
m_img.set_stride(2, 4);
// algorytm
m_gamma.vectorize(x, 16).parallel(y);
//params[0], params[1], params[2], params[3], params[4] > 0.5 ? true : false
//{ img, key, sat, clampMax, clampMin, cS };
m_key.set(params[0]);
m_sat.set(params[1]);
m_clampMax.set(params[2]);
m_clampMin.set(params[3]);
m_cS.set(params[4] > 0.5f ? true : false);
//// kompilacja
m_gamma.realize(Halide::Buffer(Halide::type_of<float>(), output));
return 0;
}
and i use it like that:
buffer_t output_buf = { 0 };
//// The host pointers point to the start of the image data:
buffer_t buf = { 0 };
buf.host = (uint8_t *)data; // Might also need const_cast
float * output = new float[width * height * 4];
output_buf.host = (uint8_t*)(output);
// // If the buffer doesn't start at (0, 0), then assign mins
output_buf.extent[0] = buf.extent[0] = width; // In elements, not bytes
output_buf.extent[1] = buf.extent[1] = height; // In elements, not bytes
output_buf.extent[2] = buf.extent[2] = 4; // Assuming RGBA
// // No need to assign additional extents as they were init'ed to zero above
output_buf.stride[0] = buf.stride[0] = 4; // RGBA interleaved
output_buf.stride[1] = buf.stride[1] = width * 4; // Assuming no line padding
output_buf.stride[2] = buf.stride[2] = 1; // Channel interleaved
output_buf.elem_size = buf.elem_size = sizeof(float);
// Run the pipeline
int error = s_gamma->realize(&buf, params, &output_buf, width);
but it is still crashing on m_gamma.realize function with info in console:
Error: Constraint violated: f0.stride.0 (4) == 1 (1)

By using Halide::Param::get(), you are extracting the (default of 0) value from the Param object at the time you call get(). If you want to use the parameter value given at the time you call the generated function, just use it without calling get and it should be implicitly converted to an Expr.
Since Param is not convertible to a boolean, the Halide way of doing an if is Halide::select().
You aren't using the clamped return value of Halide::clamp().
I don't see cS being used by the Halide code, only the C code.
Now to your JIT problem. It looks like you started doing AOT compilation and switched to JIT.
You make a std::vector<Halide::Argument> but don't pass it anywhere. How can Halide know what Param you want to use? It looks at the Func and finds references to ImageParam and Param objects.
How can you know what order it expects the Param? You have no control over this. I was able to dump the bitcode by defining HL_GENBITCODE=1 and then view that with llvm-dis to see your function:
int gamma
( buffer_t *img
, float clampMax
, float key
, float clampMin
, float sat
, void *user_context
, buffer_t *result
);
Use gamma.realize(Halide::Buffer(Halide::type_of<float>(), &output_buf)) instead of using gamma.compile_jit() and trying to call the generated function properly.
For one time use:
Use Image instead of ImageParam.
Use Expr instead of Param.
For repeated use with a single JIT compile:
Keep the ImageParam and Param around and set them before realizing the Func.

Related

Image subtraction with CUDA and textures

My goal is to use C++ with CUDA to subtract a dark frame from a raw image. I want to use textures for acceleration. The input of the images is cv::Mat with the type CV_8UC4 (I use the pointer to the data of the cv::Mat). This is the kernel I came up with, but I have no idea how to eventually subtract the textures from each other:
__global__ void DarkFrameSubtractionKernel(unsigned char* outputImage, size_t pitchOutputImage,
cudaTextureObject_t inputImage, cudaTextureObject_t darkImage, int width, int height)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
const float tx = (x + 0.5f);
const float ty = (y + 0.5f);
if (x >= width || y >= height) return;
uchar4 inputImageTemp = tex2D<uchar4>(inputImage, tx, ty);
uchar4 darkImageTemp = tex2D<uchar4>(darkImage, tx, ty);
outputImage[y * pitchOutputImage + x] = inputImageTemp - darkImageTemp; // this line will throw an error
}
This is the function that calls the kernel (you can see that I create the textures from unsigned char):
void subtractDarkImage(unsigned char* inputImage, size_t pitchInputImage, unsigned char* outputImage,
size_t pitchOutputImage, unsigned char* darkImage, size_t pitchDarkImage, int width, int height,
cudaStream_t stream)
{
cudaResourceDesc resDesc = {};
resDesc.resType = cudaResourceTypePitch2D;
resDesc.res.pitch2D.width = width;
resDesc.res.pitch2D.height = height;
resDesc.res.pitch2D.devPtr = inputImage;
resDesc.res.pitch2D.pitchInBytes = pitchInputImage;
resDesc.res.pitch2D.desc = cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsigned);
cudaTextureDesc texDesc = {};
texDesc.readMode = cudaReadModeElementType;
texDesc.addressMode[0] = cudaAddressModeBorder;
texDesc.addressMode[1] = cudaAddressModeBorder;
cudaTextureObject_t imageInputTex, imageDarkTex;
CUDA_CHECK(cudaCreateTextureObject(&imageInputTex, &resDesc, &texDesc, 0));
resDesc.res.pitch2D.devPtr = darkImage;
resDesc.res.pitch2D.pitchInBytes = pitchDarkImage;
CUDA_CHECK(cudaCreateTextureObject(&imageDarkTex, &resDesc, &texDesc, 0));
dim3 block(32, 8);
dim3 grid = paddedGrid(block.x, block.y, width, height);
DarkImageSubtractionKernel << <grid, block, 0, stream >> > (reinterpret_cast<uchar4*>(outputImage), pitchOutputImage / sizeof(uchar4),
imageInputTex, imageDarkTex, width, height);
CUDA_CHECK(cudaDestroyTextureObject(imageInputTex));
CUDA_CHECK(cudaDestroyTextureObject(imageDarkTex));
}
The code does not compile as I can not subtract a uchar4 from another one (in the kernel). Is there an easy way of subtraction here?
Help is very much appreciated.

Is there an easy way of subtraction here?
There are no arithmetic operators defined for CUDA built-in vector types. If you replace
outputImage[y * pitchOutputImage + x] = inputImageTemp - darkImageTemp;
with
uchar4 val;
val.x = inputImageTemp.x - darkImageTemp.x;
val.y = inputImageTemp.y - darkImageTemp.y;
val.z = inputImageTemp.z - darkImageTemp.z;
val.w = inputImageTemp.w - darkImageTemp.w;
outputImage[y * pitchOutputImage + x] = val;
things will work. If this offends you, I suggest writing a small library of helper functions to hide the mess.

Why isn't horizontal advance enough to properly format a glyph?

I am making a simple text renderer with vulkan and I am using freetype to format my text.
I read the freetype tutorial and I have come up with the following function:
void Scribe::CreateSingleLineGeometry(const string &text, ScGeomMetaInfo &info,
ScGeomtry &geometry)
{
float texture_length = info.texture_len;
float h_offset = info.h_offset;
float char_l = info.char_len;
float v_anchor = info.v_offset;
auto &vertices = geometry.first;
auto &indices = geometry.second;
auto length = max(ft_face->bbox.xMax - ft_face->bbox.xMin,
ft_face->bbox.yMax - ft_face->bbox.yMin);
for(auto &c: text)
{
auto glyph_index = FT_Get_Char_Index(ft_face, c);
auto error = FT_Load_Glyph(ft_face, glyph_index, FT_LOAD_NO_HINTING);
auto metrics = ft_face->glyph->metrics;
float g_height = metrics.height;
float g_bearing = metrics.horiBearingY;
float correction = 1.0 - (g_height) / float(length) -
float(metrics.horiBearingY - g_height) / float(length);
correction *= char_l;
float bearingX = char_l * float(metrics.horiBearingX) / float(length);
// Insert the vertex positions and uvs into the returned geometry
float h_coords[] = {h_offset + bearingX, h_offset + bearingX + char_l};
float v_coords[] = {v_anchor + correction, v_anchor + correction + char_l};
auto glyph_data = glyph_map[glyph_index];
float tex_h_coords[] = {glyph_data.lt_uv.x, glyph_data.rb_uv.x};
float tex_v_coords[] = {glyph_data.lt_uv.y, glyph_data.rb_uv.y};
for(int x=0; x<2; x++) {
for(int y=0; y<2; y++) {
vertices.insert(end(vertices), {h_coords[x], v_coords[y], 0});
vertices.insert(end(vertices), {tex_h_coords[x], tex_v_coords[y]});
}
}
// Setup the indices of the current quad
// There's 4 vertices in a quad, so we offset each index by (quad_size * num_quads)
uint delta = 4 * info.c_num++;
indices.insert(end(indices), {0+delta, 3+delta, 1+delta, 0+delta, 3+delta, 2+delta});
h_offset += char_l * float(metrics.horiAdvance) / float(length) + 0.03;
}
}
In particular I want to emphasize the line:
h_offset += char_l * float(metrics.horiAdvance) / float(length) + 0.03;
That 0.03 at the end of the line doesn't come from anywhere, I inserted it there to make things look good.
This is the result with that extra offset:
Which I think looks pretty good. However, if I were to remove the extra offset:
h_offset += char_l * float(metrics.horiAdvance) / float(length);
I get something that doesn't look right at all. Why isn't the advance enough to correctly format the font?

CUDA, "illegal memory access was encountered" in Memcpy

I have this cuda file:
#include "cuda.h"
#include "../../HandleError.h"
#include "Sphere.hpp"
#include <stdlib.h>
#include <CImg.h>
#define WIDTH 1280
#define HEIGHT 720
#define rnd(x) (x*rand()/RAND_MAX)
#define SPHERES_COUNT 5
using namespace cimg_library;
__global__
void kernel(unsigned char* bitmap, Sphere* s)
{
// Map threadIdx/blockIdx to pixel position
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;
float ox = x - blockDim.x * gridDim.x / 2;
float oy = y - blockDim.y * gridDim.y / 2;
float r = 0.2, g = 0.2, b = 0.5;
float maxz = -INF;
for (int i = 0; i < SPHERES_COUNT; i++) {
float n, t = s[i].hit(ox, oy, &n);
if (t > maxz) {
float fscale = n;
r = s[i].r * fscale;
g = s[i].g * fscale;
b = s[i].b * fscale;
maxz = t;
}
}
bitmap[offset*3] = (int)(r * 255);
bitmap[offset*3 + 1] = (int)(g * 255);
bitmap[offset*3 + 2] = (int)(b * 255);
}
__constant__ Sphere s[SPHERES_COUNT];
int main ()
{
//Capture start time
cudaEvent_t start, stop;
HANDLE_ERROR(cudaEventCreate(&start));
HANDLE_ERROR(cudaEventCreate(&stop));
HANDLE_ERROR(cudaEventRecord(start, 0));
//Create host bitmap
CImg<unsigned char> image(WIDTH, HEIGHT, 1, 3);
image.permute_axes("cxyz");
//Allocate device bitmap data
unsigned char* dev_bitmap;
HANDLE_ERROR(cudaMalloc((void**)&dev_bitmap, image.size()*sizeof(unsigned char)));
//Generate spheres and copy them on the GPU one by one
Sphere* temp_s = (Sphere*)malloc(SPHERES_COUNT*sizeof(Sphere));
for (int i=0; i <SPHERES_COUNT; i++) {
temp_s[i].r = rnd(1.0f);
temp_s[i].g = rnd(1.0f);
temp_s[i].b = rnd(1.0f);
temp_s[i].x = rnd(1000.0f) - 500;
temp_s[i].y = rnd(1000.0f) - 500;
temp_s[i].z = rnd(1000.0f) - 500;
temp_s[i].radius = rnd(100.0f) + 20;
}
HANDLE_ERROR(cudaMemcpyToSymbol(s, temp_s, sizeof(Sphere)*SPHERES_COUNT));
free(temp_s);
//Generate a bitmap from spere data
dim3 grids(WIDTH/16, HEIGHT/16);
dim3 threads(16, 16);
kernel<<<grids, threads>>>(dev_bitmap, s);
//Copy the bitmap back from the GPU for display
HANDLE_ERROR(cudaMemcpy(image.data(), dev_bitmap,
image.size()*sizeof(unsigned char),
cudaMemcpyDeviceToHost));
cudaFree(dev_bitmap);
image.permute_axes("yzcx");
image.save("render.bmp");
}
It compiles fine, but when executed I get this error:
an illegal memory access was encountered in main.cu at line 82
that is, here:
//Copy the bitmap back from the GPU for display
HANDLE_ERROR(cudaMemcpy(image.data(), dev_bitmap,
image.size()*sizeof(unsigned char),
cudaMemcpyDeviceToHost));
I cannot understand why...
I know that If remove this:
bitmap[offset*3] = (int)(r * 255);
bitmap[offset*3 + 1] = (int)(g * 255);
bitmap[offset*3 + 2] = (int)(b * 255);
The error is not reported, so I thought It may be an out of index error, reported later, but I have An identical version of this program that makes no use of constant memory, and it works fine with the very same version of the kernel function...

There are two things at issue here. The first is this:
__constant__ Sphere s[SPHERES_COUNT];
int main ()
{
......
kernel<<<grids, threads>>>(dev_bitmap, s);
......
In host code, s is a host memory variable which provides a handle for the CUDA runtime to hook up with the device constant memory symbol. It doesn't contain a valid device pointer and can't be passed to kernel calls. The result is a invalid memory access error.
You could do this:
__constant__ Sphere s[SPHERES_COUNT];
int main ()
{
......
Sphere *d_s;
cudaGetSymbolAddress((void **)&d_s, s);
kernel<<<grids, threads>>>(dev_bitmap, d_s);
......
which would cause a symbol lookup to get the device address of s, and it would be valid to pass that to the kernel. However, the GPU relies on the compiler emitting specific instructions to access memory through the constant cache. The device compiler will only emit these instructions when it can detect that a __constant__ variable is being accessed within a kernel, which is not possible when using a pointer. You can see more about how the compiler will generate code for constant variable access in this Stack Overflow question and answer.

Does videoInput guarantee rgb camera input? (Transfering image from videoInput/dshow -> Java BufferedImage)

I am using videoInput to get a live stream from my webcam, but I've ran into a problem where videoInput's documentation implies that I should always be getting a BGR/RGB, however, the "verbose" output tells me the pixel format is YUY2.
***** VIDEOINPUT LIBRARY - 0.1995 - TFW07 *****
SETUP: Setting up device 0
SETUP: 1.3M WebCam
SETUP: Couldn't find preview pin using SmartTee
SETUP: Default Format is set to 640 by 480
SETUP: trying format RGB24 # 640 by 480
SETUP: trying format RGB32 # 640 by 480
SETUP: trying format RGB555 # 640 by 480
SETUP: trying format RGB565 # 640 by 480
SETUP: trying format YUY2 # 640 by 480
SETUP: Capture callback set
SETUP: Device is setup and ready to capture.
My first thoughts were to try converting to RGB (assuming I was really getting YUY2 data), and I end up getting a blue image that was highly distorted.
Here is my code for converting YUY2 to BGR (Note: This is part of a much larger program, and this is borrowed code - I can get the url at anyone's request):
#define CLAMP_MIN( in, min ) ((in) < (min))?(min):(in)
#define CLAMP_MAX( in, max ) ((in) > (max))?(max):(in)
#define FIXNUM 16
#define FIX(a, b) ((int)((a)*(1<<(b))))
#define UNFIX(a, b) ((a+(1<<(b-1)))>>(b))
#define ICCIRUV(x) (((x)<<8)/224)
#define ICCIRY(x) ((((x)-16)<<8)/219)
#define CLIP(t) CLAMP_MIN( CLAMP_MAX( (t), 255 ), 0 )
#define GET_R_FROM_YUV(y, u, v) UNFIX((FIX(1.0, FIXNUM)*(y) + FIX(1.402, FIXNUM)*(v)), FIXNUM)
#define GET_G_FROM_YUV(y, u, v) UNFIX((FIX(1.0, FIXNUM)*(y) + FIX(-0.344, FIXNUM)*(u) + FIX(-0.714, FIXNUM)*(v)), FIXNUM)
#define GET_B_FROM_YUV(y, u, v) UNFIX((FIX(1.0, FIXNUM)*(y) + FIX(1.772, FIXNUM)*(u)), FIXNUM)
bool yuy2_to_rgb24(int streamid) {
int i;
unsigned char y1, u, y2, v;
int Y1, Y2, U, V;
unsigned char r, g, b;
int size = stream[streamid]->config.g_h * (stream[streamid]->config.g_w / 2);
unsigned long srcIndex = 0;
unsigned long dstIndex = 0;
try {
for(i = 0 ; i < size ; i++) {
y1 = stream[streamid]->vi_buffer[srcIndex];
u = stream[streamid]->vi_buffer[srcIndex+ 1];
y2 = stream[streamid]->vi_buffer[srcIndex+ 2];
v = stream[streamid]->vi_buffer[srcIndex+ 3];
Y1 = ICCIRY(y1);
U = ICCIRUV(u - 128);
Y2 = ICCIRY(y2);
V = ICCIRUV(v - 128);
r = CLIP(GET_R_FROM_YUV(Y1, U, V));
//r = (unsigned char)CLIP( (1.164f * (float(Y1) - 16.0f)) + (1.596f * (float(V) - 128)) );
g = CLIP(GET_G_FROM_YUV(Y1, U, V));
//g = (unsigned char)CLIP( (1.164f * (float(Y1) - 16.0f)) - (0.813f * (float(V) - 128.0f)) - (0.391f * (float(U) - 128.0f)) );
b = CLIP(GET_B_FROM_YUV(Y1, U, V));
//b = (unsigned char)CLIP( (1.164f * (float(Y1) - 16.0f)) + (2.018f * (float(U) - 128.0f)) );
stream[streamid]->rgb_buffer[dstIndex] = b;
stream[streamid]->rgb_buffer[dstIndex + 1] = g;
stream[streamid]->rgb_buffer[dstIndex + 2] = r;
dstIndex += 3;
r = CLIP(GET_R_FROM_YUV(Y2, U, V));
//r = (unsigned char)CLIP( (1.164f * (float(Y2) - 16.0f)) + (1.596f * (float(V) - 128)) );
g = CLIP(GET_G_FROM_YUV(Y2, U, V));
//g = (unsigned char)CLIP( (1.164f * (float(Y2) - 16.0f)) - (0.813f * (float(V) - 128.0f)) - (0.391f * (float(U) - 128.0f)) );
b = CLIP(GET_B_FROM_YUV(Y2, U, V));
//b = (unsigned char)CLIP( (1.164f * (float(Y2) - 16.0f)) + (2.018f * (float(U) - 128.0f)) );
stream[streamid]->rgb_buffer[dstIndex] = b;
stream[streamid]->rgb_buffer[dstIndex + 1] = g;
stream[streamid]->rgb_buffer[dstIndex + 2] = r;
dstIndex += 3;
srcIndex += 4;
}
return true;
} catch(...) {
return false;
}
}
After this wasn't working, I assume either a) my color space conversion function is wrong, or b) videoInput is lying to me.
Well, I wanted to double check that videoInput was indeed telling me the truth, and it turns out there is absolutely no way for me to see the pixel format I'm getting from the videoInput::getPixels() function, outside of the verbose text (unless I'm extremely crazy and just can't see it). This makes me assume that it's possible that videoInput does some sort of color space conversion behind the scenes so you're always getting a consistent image, regardless of the webcam. With this in mind, and following some of the documentation in videoInput.h:96, it sort of appears that it just gives out RGB or BGR images.
The utility I'm using to display the image takes RGB images (Java BufferedImage), so I figured I could just feed it the raw data directly from videoInput, and it should be fine.
Here is how I've got my image setup in Java:
BufferedImage buffer = new BufferedImage(directShow.device_stream_width(stream),directShow.device_stream_height(stream), BufferedImage.TYPE_INT_RGB );
int rgbdata[] = directShow.grab_frame_stream(stream);
if( rgbdata.length > 0 ) {
buffer.setRGB(
0, 0,
directShow.device_stream_width(stream),
directShow.device_stream_height(stream),
rgbdata,
0, directShow.device_stream_width(stream)
);
}
And here is how I send it to Java (C++/JNI):
JNIEXPORT jintArray JNICALL Java_directshowcamera_dsInterface_grab_1frame_1stream(JNIEnv *env, jobject obj, jint streamid)
{
//jclass bbclass = env->FindClass( "java/nio/IntBuffer" );
//jmethodID putMethod = env->GetMethodID(bbclass, "put", "(B)Ljava/nio/IntBuffer;");
int buffer_size;
jintArray ia;
jint *intbuffer = NULL;
unsigned char *buffer = NULL;
append_stream( streamid );
buffer_size = stream_device_rgb24_size(streamid);
ia = env->NewIntArray( buffer_size );
intbuffer = (jint *)calloc( buffer_size, sizeof(jint) );
buffer = stream_device_buffer_rgb( streamid );
if( buffer == NULL ) {
env->DeleteLocalRef( ia );
return env->NewIntArray( 0 );
}
for(int i=0; i < buffer_size; i++ ) {
intbuffer[i] = (jint)buffer[i];
}
env->SetIntArrayRegion( ia, 0, buffer_size, intbuffer );
free( intbuffer );
return ia;
}
This has been driving me absolutely nuts for the past two weeks, and I've tried variations of anything suggested to me as well, with absolutely no sane success.

Flipping the 2D texture on a sphere with Ray-Tracing

I am working on my ray-tracer and I think I've made some significant achievements. I am currently trying to place texture images onto objects. However they don't place quite well. They appear flipped on the sphere. Here is the final image of my current code:
Here are the relevant code:
-Image Class for opening image
class Image
{
public:
Image() {}
void read_bmp_file(char* filename)
{
int i;
FILE* f = fopen(filename, "rb");
unsigned char info[54];
fread(info, sizeof(unsigned char), 54, f); // read the 54-byte header
// extract image height and width from header
width = *(int*)&info[18];
height = *(int*)&info[22];
int size = 3 * width * height;
data = new unsigned char[size]; // allocate 3 bytes per pixel
fread(data, sizeof(unsigned char), size, f); // read the rest of the data at once
fclose(f);
for(i = 0; i < size; i += 3)
{
unsigned char tmp = data[i];
data[i] = data[i+2];
data[i+2] = tmp;
}
/*Now data should contain the (R, G, B) values of the pixels. The color of pixel (i, j) is stored at
data[j * 3* width + 3 * i], data[j * 3 * width + 3 * i + 1] and data[j * 3 * width + 3*i + 2].
In the last part, the swap between every first and third pixel is done because windows stores the
color values as (B, G, R) triples, not (R, G, B).*/
}
public:
int width;
int height;
unsigned char* data;
};
-Texture class
class Texture: public Material
{
public:
Texture(char* filename): Material() {
image_ptr = new Image;
image_ptr->read_bmp_file(filename);
}
virtual ~Texture() {}
virtual void set_mapping(Mapping* mapping)
{ mapping_ptr = mapping;}
virtual Vec get_color(const ShadeRec& sr) {
int row, col;
if(mapping_ptr)
mapping_ptr->get_texel_coordinates(sr.local_hit_point, image_ptr->width, image_ptr->height, row, col);
return Vec (image_ptr->data[row * 3 * image_ptr->width + 3*col ]/255.0,
image_ptr->data[row * 3 * image_ptr->width + 3*col+1]/255.0,
image_ptr->data[row * 3 * image_ptr->width + 3*col+2]/255.0);
}
public:
Image* image_ptr;
Mapping* mapping_ptr;
};
-Mapping class
class SphericalMap: public Mapping
{
public:
SphericalMap(): Mapping() {}
virtual ~SphericalMap() {}
virtual void get_texel_coordinates (const Vec& local_hit_point,
const int hres,
const int vres,
int& row,
int& column) const
{
float theta = acos(local_hit_point.y);
float phi = atan2(local_hit_point.z, local_hit_point.x);
if(phi < 0.0)
phi += 2*PI;
float u = phi/(2*PI);
float v = (PI - theta)/PI;
column = (int)((hres - 1) * u);
row = (int)((vres - 1) * v);
}
};
-Local hit points:
virtual void Sphere::set_local_hit_point(ShadeRec& sr)
{
sr.local_hit_point.x = sr.hit_point.x - c.x;
sr.local_hit_point.y = (sr.hit_point.y - c.y)/R;
sr.local_hit_point.z = sr.hit_point.z -c.z;
}
-This is how I constructed the sphere in main:
Texture* t1 = new Texture("Texture\\earthmap2.bmp");
SphericalMap* sm = new SphericalMap();
t1->set_mapping(sm);
t1->set_ka(0.55);
t1->set_ks(0.0);
Sphere *s1 = new Sphere(Vec(-60,0,50), 149);
s1->set_material(t1);
w.add_object(s1);
Sorry for long codes but if I had any idea where that problem might occur, I'd have posted that part. Finally this is how I call get_color() function from the main:
xShaded += sr.material_ptr->get_color(sr).x * in.x * max(0.0, sr.normal.dot(l)) +
sr.material_ptr->ks * in.x * pow((max(0.0,sr.normal.dot(h))),1);
yShaded += sr.material_ptr->get_color(sr).y * in.y * max(0.0, sr.normal.dot(l)) +
sr.material_ptr->ks * in.y * pow((max(0.0,sr.normal.dot(h))),1);
zShaded += sr.material_ptr->get_color(sr).z * in.z * max(0.0, sr.normal.dot(l)) +
sr.material_ptr->ks * in.z * pow((max(0.0,sr.normal.dot(h))),1);

Shot in the dark: if memory serves, BMPs are stored from the bottom up, while many other image formats are top-down. Could that possibly be the problem? Perhaps your file reader just needs to reverse the rows?

Changing float phi = atan2(local_hit_point.z, local_hit_point.x); to float phi = atan2(local_hit_point.x, local_hit_point.z); solved the problem.

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

Halide Jit compilation - c++

Related

Image subtraction with CUDA and textures

Why isn't horizontal advance enough to properly format a glyph?

CUDA, "illegal memory access was encountered" in Memcpy

Does videoInput guarantee rgb camera input? (Transfering image from videoInput/dshow -> Java BufferedImage)

Flipping the 2D texture on a sphere with Ray-Tracing

Categories

Resources