Allegro 5 Play Multiple Samples at Once - c++

I've come across an issue with playing samples Allegro 5. When I play a sample, I can't play that sample again until it's finished playing. Sometimes it will also not play the sample if another, different, sample is playing.
Anyway to get around this?
I play audio with a "Sound" class, which only has a play function. The rest are constuctors and member vars, all of which are used in the play function.
void Sound::play()
{
al_play_sample(
pSample, // ALLEGRO_SAMPLE
mGain, // float
mPan, // float
mSpeed, // float
getPlaymode(mPlaymode), // I use my own non-AL playmode enums. This is a private function that returns the AL version.
NULL); // ALLEGRO_SAMPLE_ID
}
The whole class:
Sound.h
class ContentManager;
enum Playmode
{
BiDir,
Loop,
Once,
StreamOnce,
StreamOneDir
};
class Sound : public Trackable
{
private:
/* Variables
* * * * * * * * * * * * */
ALLEGRO_SAMPLE* pSample;
float
mGain,
mPan,
mSpeed;
Playmode mPlaymode;
std::string
mAssetPath,
mAssetName;
/* Private Functions
* * * * * * * * * * * * */
ALLEGRO_PLAYMODE getPlaymode(Playmode playmode)
{
switch (playmode)
{
case BiDir:
return ALLEGRO_PLAYMODE::ALLEGRO_PLAYMODE_BIDIR;
case Loop:
return ALLEGRO_PLAYMODE::ALLEGRO_PLAYMODE_LOOP;
case Once:
return ALLEGRO_PLAYMODE::ALLEGRO_PLAYMODE_ONCE;
case StreamOnce:
return ALLEGRO_PLAYMODE::_ALLEGRO_PLAYMODE_STREAM_ONCE;
case StreamOneDir:
return ALLEGRO_PLAYMODE::_ALLEGRO_PLAYMODE_STREAM_ONEDIR;
// Default to once
default:
return ALLEGRO_PLAYMODE::ALLEGRO_PLAYMODE_ONCE;
}
}
public:
/* Constructors/Destructor
* * * * * * * * * * * * */
Sound();
Sound(
// assetPath, assetName, gain, pan, speed, playmode
std::string assetPath,
std::string assetName,
float gain = 1.0f,
float pan = 0.0f,
float speed = 1.0f,
Playmode playmode = Once);
Sound(const Sound& other);
~Sound();
friend class ContentManager; // My content system.
void play();
};
Sound.cpp
#include "Sound.h"
/* Constructors/Destructor
* * * * * * * * * * * * */
Sound::Sound()
{
this->mAssetPath = "";
this->mAssetName = "";
this->mGain = 1.0f;
this->mPan = 0.0f;
this->mSpeed = 1.0f;
this->mPlaymode = Once;
this->pSample = NULL;
}
Sound::Sound(std::string assetPath, std::string assetName, float gain, float pan, float speed, Playmode playmode)
{
this->mAssetPath = assetPath;
this->mAssetName = assetName;
this->mGain = gain;
this->mPan = pan;
this->mSpeed = speed;
this->mPlaymode = playmode;
this->pSample = al_load_sample((assetPath + assetName).c_str());
}
Sound::Sound(const Sound& other)
{
this->mAssetPath = other.mAssetPath;
this->mAssetName = other.mAssetName;
this->mGain = other.mGain;
this->mPan = other.mPan;
this->mSpeed = other.mSpeed;
this->mPlaymode = other.mPlaymode;
this->pSample = al_load_sample((mAssetPath + mAssetName).c_str());
}
Sound::~Sound()
{
al_destroy_sample(pSample);
}
void Sound::play()
{
al_play_sample(
pSample,
mGain,
mPan,
mSpeed,
getPlaymode(mPlaymode),
NULL);
}
I call the play function through the rest of my system, which would look something like this:
// Game->ContentManager->Sound->play()
Game::instance()->content()->getSound("somesound.wav")->play();
Content manager contains maps of my assets.
This is part of a larger project I'm working on for a class, but no, this part isn't homework. My professor disallowed us from having any public/top level AL codes (e.g. no public AL returns, etc).
Let me know if I need to clarify anything. Any help is always appreciated.

I might be wrong, but it sounds like you have to reserve more samples using al_reserve_samples(number_of_samples);

Based off of ppsz's answer, I did some digging and did the following based on what I found.
int numSamples = /*Some int*/
int reservedSamples = 0;
int i = (numSamples >= 1 ? numSamples : 1);
bool success = false;
do
{
success = al_reserve_samples(i);
i -= 1;
}
while (success == false || i > 0);
Source

Related

I am learning to make QRCode in esp32-2432s028

I am learning to make QRCode in esp32-2432s028 (this pictura) from https://www.youtube.com/watch?v=Ss3zBO-V9kI I have problem.
From code
/*
QR Code Maker (ESP32+LVGL8)
For More Information: https://youtu.be/Ss3zBO-V9kI
Created by Eric N. (ThatProject)
*/
/////////////////////////////////////////////////////////////////
#include <lvgl.h>
#include "MyDisplay.h"
static const uint32_t screenWidth = 320;
static const uint32_t screenHeight = 480;
static lv_disp_draw_buf_t draw_buf;
static lv_color_t buf[ screenWidth * 10 ];
lv_obj_t * mainScreen;
lv_obj_t * titleImage;
lv_obj_t * qrCode;
LV_IMG_DECLARE(ui_logo_img_obj);
void my_disp_flush( lv_disp_drv_t *disp, const lv_area_t *area, lv_color_t *color_p )
{
uint32_t w = ( area->x2 - area->x1 + 1 );
uint32_t h = ( area->y2 - area->y1 + 1 );
tft.startWrite();
tft.setAddrWindow( area->x1, area->y1, w, h );
tft.writePixels((lgfx::rgb565_t *)&color_p->full, w * h);
tft.endWrite();
lv_disp_flush_ready( disp );
}
void my_touchpad_read( lv_indev_drv_t * indev_driver, lv_indev_data_t * data )
{
if (ts.touched()) {
data->state = LV_INDEV_STATE_PR;
TS_Point p = ts.getPoint();
data->point.x = p.x;
data->point.y = p.y;
} else {
data->state = LV_INDEV_STATE_REL;
}
}
void setup()
{
Serial.begin(115200);
tft.begin();
tft.setRotation(0);
tft.setBrightness(255);
if (!ts.begin(40, SDA_FT6236, SCL_FT6236)) {
Serial.println("Unable to start the capacitive touch Screen.");
}
lv_init();
lv_disp_draw_buf_init( &draw_buf, buf, NULL, screenWidth * 10 );
static lv_disp_drv_t disp_drv;
lv_disp_drv_init(&disp_drv);
disp_drv.hor_res = screenWidth;
disp_drv.ver_res = screenHeight;
disp_drv.flush_cb = my_disp_flush;
disp_drv.draw_buf = &draw_buf;
lv_disp_drv_register(&disp_drv);
static lv_indev_drv_t indev_drv;
lv_indev_drv_init(&indev_drv);
indev_drv.type = LV_INDEV_TYPE_POINTER;
indev_drv.read_cb = my_touchpad_read;
lv_indev_drv_register(&indev_drv);
ui_init();
}
void loop() {
lv_timer_handler();
delay( 5 );
}
void ui_init() {
ui_background();
ui_dynamic_obj();
lv_disp_load_scr(mainScreen);
}
void ui_background() {
mainScreen = lv_obj_create(NULL);
lv_obj_clear_flag(mainScreen, LV_OBJ_FLAG_SCROLLABLE);
titleImage = lv_img_create(mainScreen);
lv_img_set_src(titleImage, &ui_logo_img_obj);
lv_obj_set_size(titleImage, 320, 117);
lv_obj_set_pos(titleImage, 0, 0);
lv_obj_set_align(titleImage, LV_ALIGN_TOP_MID);
lv_obj_add_flag(titleImage, LV_OBJ_FLAG_ADV_HITTEST);
lv_obj_clear_flag(titleImage, LV_OBJ_FLAG_SCROLLABLE);
lv_obj_t * titleLabel = lv_label_create(mainScreen);
lv_obj_set_pos(titleLabel, 0, 120);
lv_obj_set_align(titleLabel, LV_ALIGN_TOP_MID);
lv_label_set_text(titleLabel, "QR Code Maker");
lv_obj_clear_flag(titleLabel, LV_OBJ_FLAG_CLICKABLE);
lv_obj_set_style_text_font(titleLabel, &lv_font_montserrat_34, LV_PART_MAIN | LV_STATE_DEFAULT);
}
static void ta_event_cb(lv_event_t * e) {
lv_event_code_t code = lv_event_get_code(e);
lv_obj_t * ta = lv_event_get_target(e);
lv_obj_t * kb = (lv_obj_t*)lv_event_get_user_data(e);
if (code == LV_EVENT_READY) {
lv_obj_add_flag(kb, LV_OBJ_FLAG_HIDDEN);
const char * text = lv_textarea_get_text(ta);
if (strlen(text) == 0) return;
lv_qrcode_update(qrCode, text, strlen(text));
lv_obj_clear_flag(qrCode, LV_OBJ_FLAG_HIDDEN);
}
if (code == LV_EVENT_CLICKED || code == LV_EVENT_FOCUSED) {
lv_keyboard_set_textarea(kb, ta);
lv_obj_clear_flag(kb, LV_OBJ_FLAG_HIDDEN);
lv_obj_add_flag(qrCode, LV_OBJ_FLAG_HIDDEN);
}
if (code == LV_EVENT_DEFOCUSED) {
lv_keyboard_set_textarea(kb, NULL);
lv_obj_add_flag(kb, LV_OBJ_FLAG_HIDDEN);
}
}
void ui_dynamic_obj(void) {
lv_obj_t * kb = lv_keyboard_create(mainScreen);
lv_obj_t * ta = lv_textarea_create(mainScreen);
lv_obj_align(ta, LV_ALIGN_CENTER, 0, -40);
lv_obj_add_event_cb(ta, ta_event_cb, LV_EVENT_ALL, kb);
lv_obj_set_size(ta, screenWidth - 40, 60);
lv_keyboard_set_textarea(kb, ta);
qrCode = lv_qrcode_create(mainScreen, 200, lv_color_hex3(0x000), lv_color_hex3(0xeef));
lv_obj_set_pos(qrCode, 0, -20);
lv_obj_set_align(qrCode, LV_ALIGN_BOTTOM_MID);
lv_obj_add_flag(qrCode, LV_OBJ_FLAG_HIDDEN);
}
I run in arduino ide 2.0.1
It shown error is:
C:\Users\Supakee\Documents\Arduino\QRCODE\QRCODE.ino: In function 'void ui_dynamic_obj()':
C:\Users\Supakee\Documents\Arduino\QRCODE\QRCODE.ino:141:88: error: too many arguments to function 'lv_obj_t* lv_qrcode_create(lv_obj_t*)'
qrCode = lv_qrcode_create(mainScreen, 200, lv_color_hex3(0x000), lv_color_hex3(0xeef));
^
In file included from c:\users\supakee\documents\arduino\libraries\lvgl-d17450a55fbd8603e57b64d23feb36a63832b195\lvgl.h:91,
from c:\Users\Supakee\Documents\Arduino\libraries\lvgl-d17450a55fbd8603e57b64d23feb36a63832b195\src/lvgl.h:17,
from C:\Users\Supakee\Documents\Arduino\QRCODE\QRCODE.ino:7:
c:\users\supakee\documents\arduino\libraries\lvgl-d17450a55fbd8603e57b64d23feb36a63832b195\src/libs/qrcode/lv_qrcode.h:45:12: note: declared here
lv_obj_t * lv_qrcode_create(lv_obj_t * parent);
^~~~~~~~~~~~~~~~
Multiple libraries were found for "lvgl.h"
Used: C:\Users\Supakee\Documents\Arduino\libraries\lvgl-d17450a55fbd8603e57b64d23feb36a63832b195
Not used: C:\Users\Supakee\Documents\Arduino\libraries\src
exit status 1
Compilation error: too many arguments to function 'lv_obj_t* lv_qrcode_create(lv_obj_t*)'
I tried to follow instruction in LVGL Document.
I want to make this esp32-2432s028 to display QR code so how can i do it?
I use library from https://github.com/lvgl/lvgl/tree/6948eee9b12e13ea7db9287b96385b05a8a6cc9a
I use code from https://github.com/0015/ThatProject/tree/master/ESP32_LVGL/LVGL8/4_QR_Code_Maker
The compilation error:
Compilation error: too many arguments to function 'lv_obj_t* lv_qrcode_create(lv_obj_t*)'
Suggests that you should only send a single argument to the function
qrCode = lv_qrcode_create(mainScreen, 200, lv_color_hex3(0x000), lv_color_hex3(0xeef));
Does the documentation you have read differ from the implementation you are using?

C++ Kinect v2 & freenect2: how to convert depth data to real world coordinates

I am trying to compute real world xyz coordinates using a Kinect v2 camera (in Linux), but my computation give me wrong results.
Here is the code:
cv::Point3f xyzWorld={0.0f};
xyzWorld.z = pointDepth;
xyzWorld.x = (float) ((float)x -(depthcx)) * xyzWorld.z / depthfx;
xyzWorld.y = (float) ((float)y - (depthcy)) * xyzWorld.z / depthfy;
xyzWorld.z = pointDepth;
return xyzWorld;
I think the problem is due to the depth value of fx, fy, cx and cy.
Can someone help me?
I am using freenect2.
Why not just use the OpenNi implementation
OniStatus VideoStream::convertDepthToWorldCoordinates(float depthX, float depthY, float depthZ, float* pWorldX, float* pWorldY, float* pWorldZ)
{
if (m_pSensorInfo->sensorType != ONI_SENSOR_DEPTH)
{
m_errorLogger.Append("convertDepthToWorldCoordinates: Stream is not from DEPTH\n");
return ONI_STATUS_NOT_SUPPORTED;
}
float normalizedX = depthX / m_worldConvertCache.resolutionX - .5f;
float normalizedY = .5f - depthY / m_worldConvertCache.resolutionY;
OniVideoMode videoMode;
int size = sizeof(videoMode);
getProperty(ONI_STREAM_PROPERTY_VIDEO_MODE, &videoMode, &size);
float const convertToMillimeters = (videoMode.pixelFormat == ONI_PIXEL_FORMAT_DEPTH_100_UM) ? 10.f : 1.f;
*pWorldX = (normalizedX * depthZ * m_worldConvertCache.xzFactor) / convertToMillimeters;
*pWorldY = (normalizedY * depthZ * m_worldConvertCache.yzFactor) / convertToMillimeters;
*pWorldZ = depthZ / convertToMillimeters;
return ONI_STATUS_OK;
}
and
OniStatus VideoStream::convertWorldToDepthCoordinates(float worldX, float worldY, float worldZ, float* pDepthX, float* pDepthY, float* pDepthZ)
{
if (m_pSensorInfo->sensorType != ONI_SENSOR_DEPTH)
{
m_errorLogger.Append("convertWorldToDepthCoordinates: Stream is not from DEPTH\n");
return ONI_STATUS_NOT_SUPPORTED;
}
*pDepthX = m_worldConvertCache.coeffX * worldX / worldZ + m_worldConvertCache.halfResX;
*pDepthY = m_worldConvertCache.halfResY - m_worldConvertCache.coeffY * worldY / worldZ;
*pDepthZ = worldZ;
return ONI_STATUS_OK;
}
and the world conversion cache :
void VideoStream::refreshWorldConversionCache()
{
if (m_pSensorInfo->sensorType != ONI_SENSOR_DEPTH)
{
return;
}
OniVideoMode videoMode;
int size = sizeof(videoMode);
getProperty(ONI_STREAM_PROPERTY_VIDEO_MODE, &videoMode, &size);
size = sizeof(float);
float horizontalFov;
float verticalFov;
getProperty(ONI_STREAM_PROPERTY_HORIZONTAL_FOV, &horizontalFov, &size);
getProperty(ONI_STREAM_PROPERTY_VERTICAL_FOV, &verticalFov, &size);
m_worldConvertCache.xzFactor = tan(horizontalFov / 2) * 2;
m_worldConvertCache.yzFactor = tan(verticalFov / 2) * 2;
m_worldConvertCache.resolutionX = videoMode.resolutionX;
m_worldConvertCache.resolutionY = videoMode.resolutionY;
m_worldConvertCache.halfResX = m_worldConvertCache.resolutionX / 2;
m_worldConvertCache.halfResY = m_worldConvertCache.resolutionY / 2;
m_worldConvertCache.coeffX = m_worldConvertCache.resolutionX / m_worldConvertCache.xzFactor;
m_worldConvertCache.coeffY = m_worldConvertCache.resolutionY / m_worldConvertCache.yzFactor;
}
struct WorldConversionCache
{
float xzFactor;
float yzFactor;
float coeffX;
float coeffY;
int resolutionX;
int resolutionY;
int halfResX;
int halfResY;
} m_worldConvertCache;
all taken from
OpenNI GitHub repository
The horizontal and vertical fov you can just get directly from the from the description of each frame.

Halide Jit compilation

Im trying to compile my halide program to jit to use it later in code few times on different images. But i think i making something wrong, can anyone correct me?
First I create halide function to run:
void m_gammaFunctionTMOGenerate()
{
Halide::ImageParam img(Halide::type_of<float>(), 3);
img.set_stride(0, 4);
img.set_stride(2, 1);
Halide::Var x, y, c;
Halide::Param<float> key, sat, clampMax, clampMin;
Halide::Param<bool> cS;
Halide::Func gamma;
// algorytm
//img.width() , img.height();
if (cS.get())
{
float k1 = 1.6774;
float k2 = 0.9925;
sat.set((1 + k1) * pow(key.get(), k2) / (1 + k1 * pow(key.get(), k2)));
}
Halide::Expr luminance = img(x, y, 0) * 0.072186f + img(x, y, 1) * 0.715158f + img(x, y, 2) * 0.212656f;
Halide::Expr ldr_lum = (luminance - clampMin) / (clampMax - clampMin);
Halide::clamp(ldr_lum, 0.f, 1.f);
ldr_lum = Halide::pow(ldr_lum, key);
Halide::Expr imLum = img(x, y, c) / luminance;
imLum = Halide::pow(imLum, sat) * ldr_lum;
Halide::clamp(imLum, 0.f, 1.f);
gamma(x, y, c) = imLum;
// rozkład
gamma.vectorize(x, 16).parallel(y);
// kompilacja
auto & obuff = gamma.output_buffer();
obuff.set_stride(0, 4);
obuff.set_stride(2, 1);
obuff.set_extent(2, 3);
std::vector<Halide::Argument> arguments = { img, key, sat, clampMax, clampMin, cS };
m_gammaFunction = (gammafunction)(gamma.compile_jit());
}
store it in pointer:
typedef int(*gammafunction)(buffer_t*, float, float, float, float, bool, buffer_t*);
gammafunction m_gammaFunction;
then i try to run it:
buffer_t output_buf = { 0 };
//// The host pointers point to the start of the image data:
buffer_t buf = { 0 };
buf.host = (uint8_t *)data; // Might also need const_cast
float * output = new float[width * height * 4];
output_buf.host = (uint8_t*)(output);
// // If the buffer doesn't start at (0, 0), then assign mins
output_buf.extent[0] = buf.extent[0] = width; // In elements, not bytes
output_buf.extent[1] = buf.extent[1] = height; // In elements, not bytes
output_buf.extent[2] = buf.extent[2] = 4; // Assuming RGBA
// // No need to assign additional extents as they were init'ed to zero above
output_buf.stride[0] = buf.stride[0] = 4; // RGBA interleaved
output_buf.stride[1] = buf.stride[1] = width * 4; // Assuming no line padding
output_buf.stride[2] = buf.stride[2] = 1; // Channel interleaved
output_buf.elem_size = buf.elem_size = sizeof(float);
// Run the pipeline
int error = m_photoFunction(&buf, params[0], &output_buf);
But it doesn't work...
Error:
Exception thrown at 0x000002974F552DE0 in Viewer.exe: 0xC0000005: Access violation executing location 0x000002974F552DE0.
If there is a handler for this exception, the program may be safely continued.
Edit:
Here is my code for running function:
buffer_t output_buf = { 0 };
//// The host pointers point to the start of the image data:
buffer_t buf = { 0 };
buf.host = (uint8_t *)data; // Might also need const_cast
float * output = new float[width * height * 4];
output_buf.host = (uint8_t*)(output);
// // If the buffer doesn't start at (0, 0), then assign mins
output_buf.extent[0] = buf.extent[0] = width; // In elements, not bytes
output_buf.extent[1] = buf.extent[1] = height; // In elements, not bytes
output_buf.extent[2] = buf.extent[2] = 3; // Assuming RGBA
// // No need to assign additional extents as they were init'ed to zero above
output_buf.stride[0] = buf.stride[0] = 4; // RGBA interleaved
output_buf.stride[1] = buf.stride[1] = width * 4; // Assuming no line padding
output_buf.stride[2] = buf.stride[2] = 1; // Channel interleaved
output_buf.elem_size = buf.elem_size = sizeof(float);
// Run the pipeline
int error = m_gammaFunction(&buf, params[0], params[1], params[2], params[3], params[4] > 0.5 ? true : false, &output_buf);
if (error) {
printf("Halide returned an error: %d\n", error);
return -1;
}
memcpy(output, data, size * sizeof(float));
can anyone help me with it?
Edit:
Thanks to #KhouriGiordano I found out what I was doing wrong. Indeed I switched from AOT compiling to this code. So now my code looks like that:
class GammaOperator
{
public:
GammaOperator();
int realize(buffer_t * input, float params[], buffer_t * output, int width);
private:
HalideFloat m_key;
HalideFloat m_sat;
HalideFloat m_clampMax;
HalideFloat m_clampMin;
HalideBool m_cS;
Halide::ImageParam m_img;
Halide::Var x, y, c;
Halide::Func m_gamma;
};
GammaOperator::GammaOperator()
: m_img( Halide::type_of<float>(), 3)
{
Halide::Expr w = (1.f + 1.6774f) * pow(m_key.get(), 0.9925f) / (1.f + 1.6774f * pow(m_key.get(), 0.9925f));
Halide::Expr sat = Halide::select(m_cS, m_sat, w);
Halide::Expr luminance = m_img(x, y, 0) * 0.072186f + m_img(x, y, 1) * 0.715158f + m_img(x, y, 2) * 0.212656f;
Halide::Expr ldr_lum = (luminance - m_clampMin) / (m_clampMax - m_clampMin);
ldr_lum = Halide::clamp(ldr_lum, 0.f, 1.f);
ldr_lum = Halide::pow(ldr_lum, m_key);
Halide::Expr imLum = m_img(x, y, c) / luminance;
imLum = Halide::pow(imLum, sat) * ldr_lum;
imLum = Halide::clamp(imLum, 0.f, 1.f);
m_gamma(x, y, c) = imLum;
}
int GammaOperator::realize(buffer_t * input, float params[], buffer_t * output, int width)
{
m_img.set(Halide::Buffer(Halide::type_of<float>(), input));
m_img.set_stride(0, 4);
m_img.set_stride(1, width * 4);
m_img.set_stride(2, 4);
// algorytm
m_gamma.vectorize(x, 16).parallel(y);
//params[0], params[1], params[2], params[3], params[4] > 0.5 ? true : false
//{ img, key, sat, clampMax, clampMin, cS };
m_key.set(params[0]);
m_sat.set(params[1]);
m_clampMax.set(params[2]);
m_clampMin.set(params[3]);
m_cS.set(params[4] > 0.5f ? true : false);
//// kompilacja
m_gamma.realize(Halide::Buffer(Halide::type_of<float>(), output));
return 0;
}
and i use it like that:
buffer_t output_buf = { 0 };
//// The host pointers point to the start of the image data:
buffer_t buf = { 0 };
buf.host = (uint8_t *)data; // Might also need const_cast
float * output = new float[width * height * 4];
output_buf.host = (uint8_t*)(output);
// // If the buffer doesn't start at (0, 0), then assign mins
output_buf.extent[0] = buf.extent[0] = width; // In elements, not bytes
output_buf.extent[1] = buf.extent[1] = height; // In elements, not bytes
output_buf.extent[2] = buf.extent[2] = 4; // Assuming RGBA
// // No need to assign additional extents as they were init'ed to zero above
output_buf.stride[0] = buf.stride[0] = 4; // RGBA interleaved
output_buf.stride[1] = buf.stride[1] = width * 4; // Assuming no line padding
output_buf.stride[2] = buf.stride[2] = 1; // Channel interleaved
output_buf.elem_size = buf.elem_size = sizeof(float);
// Run the pipeline
int error = s_gamma->realize(&buf, params, &output_buf, width);
but it is still crashing on m_gamma.realize function with info in console:
Error: Constraint violated: f0.stride.0 (4) == 1 (1)
By using Halide::Param::get(), you are extracting the (default of 0) value from the Param object at the time you call get(). If you want to use the parameter value given at the time you call the generated function, just use it without calling get and it should be implicitly converted to an Expr.
Since Param is not convertible to a boolean, the Halide way of doing an if is Halide::select().
You aren't using the clamped return value of Halide::clamp().
I don't see cS being used by the Halide code, only the C code.
Now to your JIT problem. It looks like you started doing AOT compilation and switched to JIT.
You make a std::vector<Halide::Argument> but don't pass it anywhere. How can Halide know what Param you want to use? It looks at the Func and finds references to ImageParam and Param objects.
How can you know what order it expects the Param? You have no control over this. I was able to dump the bitcode by defining HL_GENBITCODE=1 and then view that with llvm-dis to see your function:
int gamma
( buffer_t *img
, float clampMax
, float key
, float clampMin
, float sat
, void *user_context
, buffer_t *result
);
Use gamma.realize(Halide::Buffer(Halide::type_of<float>(), &output_buf)) instead of using gamma.compile_jit() and trying to call the generated function properly.
For one time use:
Use Image instead of ImageParam.
Use Expr instead of Param.
For repeated use with a single JIT compile:
Keep the ImageParam and Param around and set them before realizing the Func.

Flipping the 2D texture on a sphere with Ray-Tracing

I am working on my ray-tracer and I think I've made some significant achievements. I am currently trying to place texture images onto objects. However they don't place quite well. They appear flipped on the sphere. Here is the final image of my current code:
Here are the relevant code:
-Image Class for opening image
class Image
{
public:
Image() {}
void read_bmp_file(char* filename)
{
int i;
FILE* f = fopen(filename, "rb");
unsigned char info[54];
fread(info, sizeof(unsigned char), 54, f); // read the 54-byte header
// extract image height and width from header
width = *(int*)&info[18];
height = *(int*)&info[22];
int size = 3 * width * height;
data = new unsigned char[size]; // allocate 3 bytes per pixel
fread(data, sizeof(unsigned char), size, f); // read the rest of the data at once
fclose(f);
for(i = 0; i < size; i += 3)
{
unsigned char tmp = data[i];
data[i] = data[i+2];
data[i+2] = tmp;
}
/*Now data should contain the (R, G, B) values of the pixels. The color of pixel (i, j) is stored at
data[j * 3* width + 3 * i], data[j * 3 * width + 3 * i + 1] and data[j * 3 * width + 3*i + 2].
In the last part, the swap between every first and third pixel is done because windows stores the
color values as (B, G, R) triples, not (R, G, B).*/
}
public:
int width;
int height;
unsigned char* data;
};
-Texture class
class Texture: public Material
{
public:
Texture(char* filename): Material() {
image_ptr = new Image;
image_ptr->read_bmp_file(filename);
}
virtual ~Texture() {}
virtual void set_mapping(Mapping* mapping)
{ mapping_ptr = mapping;}
virtual Vec get_color(const ShadeRec& sr) {
int row, col;
if(mapping_ptr)
mapping_ptr->get_texel_coordinates(sr.local_hit_point, image_ptr->width, image_ptr->height, row, col);
return Vec (image_ptr->data[row * 3 * image_ptr->width + 3*col ]/255.0,
image_ptr->data[row * 3 * image_ptr->width + 3*col+1]/255.0,
image_ptr->data[row * 3 * image_ptr->width + 3*col+2]/255.0);
}
public:
Image* image_ptr;
Mapping* mapping_ptr;
};
-Mapping class
class SphericalMap: public Mapping
{
public:
SphericalMap(): Mapping() {}
virtual ~SphericalMap() {}
virtual void get_texel_coordinates (const Vec& local_hit_point,
const int hres,
const int vres,
int& row,
int& column) const
{
float theta = acos(local_hit_point.y);
float phi = atan2(local_hit_point.z, local_hit_point.x);
if(phi < 0.0)
phi += 2*PI;
float u = phi/(2*PI);
float v = (PI - theta)/PI;
column = (int)((hres - 1) * u);
row = (int)((vres - 1) * v);
}
};
-Local hit points:
virtual void Sphere::set_local_hit_point(ShadeRec& sr)
{
sr.local_hit_point.x = sr.hit_point.x - c.x;
sr.local_hit_point.y = (sr.hit_point.y - c.y)/R;
sr.local_hit_point.z = sr.hit_point.z -c.z;
}
-This is how I constructed the sphere in main:
Texture* t1 = new Texture("Texture\\earthmap2.bmp");
SphericalMap* sm = new SphericalMap();
t1->set_mapping(sm);
t1->set_ka(0.55);
t1->set_ks(0.0);
Sphere *s1 = new Sphere(Vec(-60,0,50), 149);
s1->set_material(t1);
w.add_object(s1);
Sorry for long codes but if I had any idea where that problem might occur, I'd have posted that part. Finally this is how I call get_color() function from the main:
xShaded += sr.material_ptr->get_color(sr).x * in.x * max(0.0, sr.normal.dot(l)) +
sr.material_ptr->ks * in.x * pow((max(0.0,sr.normal.dot(h))),1);
yShaded += sr.material_ptr->get_color(sr).y * in.y * max(0.0, sr.normal.dot(l)) +
sr.material_ptr->ks * in.y * pow((max(0.0,sr.normal.dot(h))),1);
zShaded += sr.material_ptr->get_color(sr).z * in.z * max(0.0, sr.normal.dot(l)) +
sr.material_ptr->ks * in.z * pow((max(0.0,sr.normal.dot(h))),1);
Shot in the dark: if memory serves, BMPs are stored from the bottom up, while many other image formats are top-down. Could that possibly be the problem? Perhaps your file reader just needs to reverse the rows?
Changing float phi = atan2(local_hit_point.z, local_hit_point.x); to float phi = atan2(local_hit_point.x, local_hit_point.z); solved the problem.

Second iteration crash - order irrelevant

To save on global memory transfers, and because all of the steps of the code work individually, I have tried to combine all of the kernals into a single kernal, with the first 2 (of 3) steps being done as device calls rather than global calls.
This is failing in the second half of the first step.
There is a function that I need to call twice, to calculate the 2 halves of an image. Regardless of the order the image is calculated in, it crashes on the second iteration.
After examining the code as well as I could, and running it multiple times with different return points, I have found what makes it crash.
__device__
void IntersectCone( float* ModDistance,
float* ModIntensity,
float3 ray,
int threadID,
modParam param )
{
bool ignore = false;
float3 normal = make_float3(0.0f,0.0f,0.0f);
float3 result = make_float3(0.0f,0.0f,0.0f);
float normDist = 0.0f;
float intensity = 0.0f;
float check = abs( Dot(param.position, Cross(param.direction,ray) ) );
if(check > param.r1 && check > param.r2)
ignore = true;
float tran = param.length / (param.r2/param.r1 - 1);
float length = tran + param.length;
float Lsq = length * length;
float cosSqr = Lsq / (Lsq + param.r2 * param.r2);
//Changes the centre position?
float3 position = param.position - tran * param.direction;
float aDd = Dot(param.direction, ray);
float3 e = position * -1.0f;
float aDe = Dot(param.direction, e);
float dDe = Dot(ray, e);
float eDe = Dot(e, e);
float c2 = aDd * aDd - cosSqr;
float c1 = aDd * aDe - cosSqr * dDe;
float c0 = aDe * aDe - cosSqr * eDe;
float discr = c1 * c1 - c0 * c2;
if(discr <= 0.0f)
ignore = true;
if(!ignore)
{
float root = sqrt(discr);
float sign;
if(c1 > 0.0f)
sign = 1.0f;
else
sign = -1.0f;
//Try opposite sign....?
float3 result = (-c1 + sign * root) * ray / c2;
e = result - position;
float dot = Dot(e, param.direction);
float3 s1 = Cross(e, param.direction);
float3 normal = Cross(e, s1);
if( (dot > tran) || (dot < length) )
{
if(Dot(normal,ray) <= 0)
{
normal = Norm(normal); //This stuff (1)
normDist = Magnitude(result);
intensity = -IntensAt1m * Dot(ray, normal) / (normDist * normDist);
}
}
}
ModDistance[threadID] = normDist; and this stuff (2)
ModIntensity[threadID] = intensity;
}
There are two things I can do to to make this not crash, both off which negate the point of the function: If I do not try to write to ModDistance[] and ModIntensity[], or if I do not write to normDist and intensity.
First chance exceptions are thrown by the code above, but not if either of the blocks commented out.
Also, The program only crashes the second time this routine is called.
Have been trying to figure this out all day, any help would be fantastic.
The code that calls it is:
int subrow = threadIdx.y + Mod_Height/2;
int threadID = subrow * (Mod_Width+1) + threadIdx.x;
int obsY = windowY + subrow;
float3 ray = CalculateRay(obsX,obsY);
if( !IntersectSphere(ModDistance, ModIntensity, ray, threadID, param) )
{
IntersectCone(ModDistance, ModIntensity, ray, threadID, param);
}
subrow = threadIdx.y;
threadID = subrow * (Mod_Width+1) + threadIdx.x;
obsY = windowY + subrow;
ray = CalculateRay(obsX,obsY);
if( !IntersectSphere(ModDistance, ModIntensity, ray, threadID, param) )
{
IntersectCone(ModDistance, ModIntensity, ray, threadID, param);
}
The kernel is running out of resources. As posted in the comments, it was giving the error CudaErrorLaunchOutOfResources.
To avoid this, you should use a __launch_bounds__ specifier to specify the block dimensions you want for your kernel. This will force the compiler to ensure there are enough resources. See the CUDA programming guide for details on __launch_bounds__.