For a little project I've been trying to build a application using OpenCL in combination with OpenGL using the relative python bindings (pyOpenGL and PyOpenCL). Toe be more precise it's supposed to be a generalized raytracer.
After installing both the OpenCL Intel SDK and the NVIDIA CUDA toolkit and using the IDE by intel to write the program I've encountered a problem:
Whenever I try to build the program either the intel compiler "stopped working" or the NVIDIA one gives me a "invalid value - " message. If I however only compile the program everything goes well and nothing bad happens.
If I deliberately place a mistake into the code, both compilers return proper compiler errors with line numbers and all that.
I was able to narrow down the problem to the __kernal function but I really can't understand why it wouldn't build but still compile? Is there something I'm missing?
(please excuse the probably bad opencl code. First time I'm writing in anything similar to C)
The Kernal:
__kernel void raytrace(__write_only image2d_t renderTexture,
int2 pos, __constant float projectionsMatrix[16],
//Mesh Data
__constant int* meshIndecies, __constant float3* meshVertices,
__constant float3* meshNormals, __constant float2* meshUV,
__constant int* meshTriangleIndecies, __constant int* meshTriangles,
//Material Data
__constant float4* emissions, __constant float4* ambients,
__constant float4* diffuses, __constant float4* speculars,
__constant float* shininesses,
//Object Data
__constant float* matricies, __constant int* meshes,
__constant int* materials) {
Mesh* meshData;
int pcMeshv = 0;
int pcMesht = 0;
for (int cMesh = 0; cMesh < sizeof(meshIndecies)/sizeof(int); cMesh++) {
Mesh data;
//assign data
for (int index = 0; index < meshIndecies[cMesh] - pcMeshv; index++) {
data.vertices[index] = meshVertices[pcMeshv + index];
data.normals[index] = meshNormals[pcMeshv + index];
data.uv[index] = meshUV[pcMeshv + index];
}
pcMeshv = meshIndecies[cMesh];
for (int index = 0; index < meshTriangleIndecies[cMesh] - pcMesht; index++) {
data.triangles[index] = meshTriangles[pcMesht + index];
}
pcMesht = meshTriangleIndecies[cMesh];
//add data to array
meshData[cMesh] = data;
}
Material* materialData;
for (int index = 0; index < sizeof(shininesses)/sizeof(float); index++) {
Material data;
data.emission = emissions[index];
data.ambient = ambients[index];
data.diffuse = diffuses[index];
data.specular = speculars[index];
data.shininess = shininesses[index];
materialData[index] = data;
}
Object* objectData;
for (int index = 0; index < sizeof(meshes)/sizeof(int); index++) {
Object data;
data.mesh = &meshData[meshes[index]];
data.material = &materialData[materials[index]];
for (int i = 0; i < 16; i++) {
data.matrix[i] = matricies[index*16 + i];
}
objectData[index] = data;
}
//begin trace
trace(renderTexture, pos, projectionsMatrix,
objectData, 0);
}
The Structs:
typedef struct {
float3* vertices;
float3* normals;
float2* uv;
uint* triangles;
} Mesh;
typedef struct {
float shininess;
float4 specular;
float4 emission;
float4 ambient;
float4 diffuse;
} Material;
typedef struct {
Mesh* mesh;
Material* material;
float matrix[16];
} Object;
typedef struct {
__constant float4 ambient;
__constant float4 specular;
__constant float4 diffuse;
__constant float4 position;
__constant float3 spotDirection;
__constant float spotExponent;
__constant float spotCutoff;
__constant float constantAttenuation;
__constant float linearAttenuation;
__constant float quadraticAttenuation;
} Light;
typedef struct {
float3 origin;
float3 direction;
} Ray;
typedef struct {
float3 point;
float3 normal;
float3 bary;
float dist;
uint triangle_index;
Object* object;
} RaycastHit;
Thanks for you help,
Benproductions1
Related
so, I am writing a couple of functions so to run GLSL fragment shaders on CPU. I implemented all the basic mathematical functions to do so. However I can't even get the simplest stuff to execute correctly. I was able to trace it down to either of these functions:
template<unsigned vsize>
_SHADERH_INLINE
vec<float, vsize> normalize(const vec<float, vsize>& vs) {
vec<float, vsize> fres;
float mod = 0.0f;
for (unsigned i = 0; i < vsize; ++i) {
mod += vs[i] * vs[i];
}
float mag = glsl::sqrt(mod);
if (mag == 0) {
std::logic_error("In normalize the input vector is a zero vector");
}
for (unsigned i = 0; i < vsize; ++i) {
fres[i] = vs[i] / mag;
}
return fres;
}
template<unsigned vsize>
_SHADERH_INLINE
vec<float, vsize> length(const vec<float, vsize>& vs) {
float fres = 0;
for (unsigned it = 0; it != vsize; it++) {
fres += vs[it] * vs[it];
}
return glsl::sqrt(fres);
}
template<unsigned vsize>
_SHADERH_INLINE
vec<float, vsize> dot(const vec<float, vsize>& vs1, const vec<float, vsize>& vs2) {
return std::inner_product(vs1.begin(), vs1.end(), vs2.begin(), 0);
}
But it could still be something in my vec implementation (pretty certain it's not).
So, does anyone see anything wrong with the code above or something that does not align with glsl behavior? If nobody is able to find anything wrong there, I will make a follow-up question with my vec implementation.
There are few things that may be the origin of the problems.
Failing dot function
After a bit of testing, it turned out that your dot function fails, because you are providing an integer value 0 as initial value in your call to inner_product, which results in wrong calculations. See this post for the reason: Zero inner product when using std::inner_product
Simply write 0.0f to ensure a float as initial value for the accumulator :
vec<float, vsize> dot(const vec<float, vsize>& vs1, const vec<float, vsize>& vs2) {
return std::inner_product(vs1.begin(), vs1.end(), vs2.begin(), 0.0f);
}
More generally, I recommend you to always write any hardcoded value with clear indication of its type.
You can also manually write your dot product as well, as you did in normalize and length :
vec<float, vsize> dot(const vec<float, vsize>& vs1, const vec<float, vsize>& vs2) {
float f = 0.0f;
for (unsigned i = 0; i < vsize; ++i) {
f += vs1[i] * vs2[i];
}
return f;
}
No exception thrown when normalizing a null vector
In the normalize function, you are not actually throwing any error when mag == 0. When passing a null vector to normalize, it returns (-nan, -nan, -nan) instead of throwing the error you want. std::logic_error does not throw any error, it creates an object to be thrown (see : https://en.cppreference.com/w/cpp/error/logic_error).
Thus instead of writing :
if (mag == 0) {
// Doesn't do anything...
std::logic_error("In normalize the input vector is a zero vector");
}
You must write :
if (mag == 0.0f) {
// Actually throws a `logic_error` exception
throw std::logic_error("In normalize the input vector is a zero vector");
}
Handling of floats as vec types
This depends on your implementation of vec. length and dot return scalar values (float) and not vectors. Since (to my understanding) you didn't mention compilation errors, I assume your vec type can handle floats as 1-component vectors. Be sure that this is indeed working.
Here is the code I used to test your functions. I quickly implemented a simple vec class and adapted your functions to this type :
#include <iostream>
#include <vector>
#include <math.h>
#include <numeric>
#include <stdexcept>
class vec {
public:
int vsize;
std::vector<float> vals;
vec(int s) : vsize(s){
vals = std::vector<float>(vsize, 0.0f);
}
};
void print_vec(vec& v){
for(unsigned i = 0; i < v.vsize; i++){
std::cout << v.vals[i] << " ";
}
std::cout << std::endl;
}
vec normalize(const vec& vs) {
vec fres(vs.vsize);
float mod = 0.0f;
for (unsigned i = 0; i < vs.vsize; ++i) {
mod += vs.vals[i] * vs.vals[i];
}
float mag = sqrt(mod);
if (mag == 0.0f) {
throw std::logic_error("In normalize the input vector is a zero vector");
}
for (unsigned i = 0; i < vs.vsize; ++i) {
fres.vals[i] = vs.vals[i] / mag;
}
return fres;
}
float length(const vec& vs) {
float fres = 0;
for (unsigned it = 0; it != vs.vsize; it++) {
fres += vs.vals[it] * vs.vals[it];
}
return sqrt(fres);
}
float dot(const vec& vs1, const vec& vs2) {
return std::inner_product(vs1.vals.begin(), vs1.vals.end(), vs2.vals.begin(), 0.0f);
}
In main I simply tested some hardcoded vectors and printed the results of normalize, length and dot. I ran the code on https://www.onlinegdb.com/online_c++_compiler
I am working on an implementation of a Moog VCF filter that creates a fourth order filter by placing four first order filters in series. These first order filters all share the same values for their coefficients. One way of approaching this was to use aggregation and create a filter class whose member variables were reference types:
class FirstOrderFilter {
public:
FirstOrderFilter(float& a1, float& b0, float& b1, float& g) : a1(a1), b0(b0), b1(b1), g(g) {}
float process(float in);
private:
// Coefficients
float& a1;
float& b0;
float& b1;
float& g;
// x[n-1]
float previousInput;
// y[n-1]
float previousOutput;
};
The idea was that I could create an array or vector of filters at the start of the program and any change to the coefficients would cause the member variables for each class to be updated without the need to call a setter function.
This does work but I ran into a few issues. The only two options for initialising the containers that work are:
std::vector<FirstOrderFilter> filters (4, FirstOrderFilter{gA1, gB0, gB1, gCutoff});
std::array<FirstOrderFilter, 4> = {
FirstOrderFilter{gA1, gB0, gB1, gCutoff},
FirstOrderFilter{gA1, gB0, gB1, gCutoff},
FirstOrderFilter{gA1, gB0, gB1, gCutoff},
FirstOrderFilter{gA1, gB0, gB1, gCutoff}
};
If I do the following:
std::vector<FirstOrderFilter> filters;
for (int i = 0; i < 4; i++)
filters.push_back({gA1, gB0, gB1, gCutoff});
The program crashes because the output becomes incredibly large (although there is no runtime error produced). If I use emplace_back the program still runs but the filter does not display the correct behaviour.
I looked into using std::reference_wrapper but I am not sure of the correct way to use it. For example, the following does not improve matters:
// Coefficients
std::reference_wrapper<float> a1;
std::reference_wrapper<float> b0;
std::reference_wrapper<float> b1;
std::reference_wrapper<float> g;
I understand this is caused by the fact that references cannot be copied or assigned, however, I thought emplace_back might help get round this. If I was to use an array instead of a vector, is there another way of initialising the array instead of the cumbersome method above?
Can anyone explain what is happening here?
Here is a minimal reproducible example:
#include <iostream>
#include <vector>
float gA1 = 0.0f, gB0 = 0.0f, gB1 = 0.0f;
float gCutoff = 0.0f;
void calculate_coefficients()
{
gA1 = 1.0f; gB0 = 1.0f / 1.3f; gB1 = 0.3f / 1.3f;
gCutoff = 1.0f;
}
class Filter {
public:
Filter(float& a1, float& b0, float& b1, float& g) : a1(a1), b0(b0), b1(b1), g(g) {}
float process(float in);
private:
// Coefficients
float& a1;
float& b0;
float& b1;
float& g;
// x[n-1]
float previousInput = 0.0f;
// y[n-1]
float previousOutput = 0.0f;
};
float Filter::process(float in)
{
float out = ((b0*in + b1*previousInput - a1*previousOutput) * g) + a1*previousOutput;
previousInput = in;
previousOutput = out;
return out;
}
std::vector<Filter> filters;
// This produces the expected result
// std::vector<Filter> filters (4, Filter(gA1, gB0, gB1, gCutoff));
int main() {
// Comment out this loop if using fill constructor
for (unsigned int i = 0; i < 4; i++)
filters.push_back(Filter(gA1, gB0, gB1, gCutoff));
calculate_coefficients();
float out = 1.0f;
for (unsigned int i = 0; i < filters.size(); i++)
out = filters[i].process(out);
std::cout << std::to_string(out) << std::endl;
}
The output produced by running the above code is a negative long double. Something like -66466164548257686390571008.000000, for example.
Update: This error was caused by declaring the reference types on the same line and not initialising previousInput and previousOutput correctly.
I've been this trying for several hours now. I cannot find a way to pass a fixed-size array to an operator. I found some stuff here on stackoverflow and tried it that way, as you can see in my code, but it won't work at all. The task is, that the code shouldn't be compiled if the array is not of size 3, that means, that if the array is of size 2 or size 4, that I should get a compile error. Can someone tell me how to implement this? Thanks in advance! :)
class Vec3 {
private:
int x, y, z;
public:
Vec3 (int x, int y, int z) : x(x), y(y), z(z) {}
int getX () const
{
return x;
}
int getY () const
{
return y;
}
int getZ () const
{
return z;
}
};
Vec3 operator+(Vec3 &vec, int (*arr)[3]) {
int x,y,z;
x = vec.getX() + (*arr)[0];
y = vec.getY() + (*arr)[1];
z = vec.getZ() + (*arr)[2];
Vec3 result(x,y,z);
return result;
}
int main () {
Vec3 v1 (1,2,3);
int v3 [] = {2,4,6};
cout << "v1 + v3 = " << v1 + v3 << endl;
return 0;
}
You got the syntax slightly wrong. Instead of
Vec3 operator+(Vec3 &vec, int (*arr)[3])
it must be
Vec3 operator+(Vec3 &vec, int (&arr)[3])
to pass the array by reference. And you can drop the value-of-operator (*) before the array-access, so you end up with
Vec3 operator+(Vec3 &vec, int (&arr)[3]) {
int x,y,z;
x = vec.getX() + arr[0];
y = vec.getY() + arr[1];
z = vec.getZ() + arr[2];
Vec3 result(x,y,z);
return result;
}
Use template to do it:
template<size_t N>
Vec3 operator+(Vec3 &vec, int (&arr)[N]) {
static_assert(N==3,"wrong size of array");
// the rest of the code , small fix: arr[0] etc
static assert will be triggered when N is not equal to 3.
Demo
I have this structure:
struct Vertex {
Vertex(float px, float py, float pz,
float nx, float ny, float nz,
float tx, float ty) : position(px, py, pz),
normals(nx, ny, nz),
texCoords(tx, ty) {}
XMFLOAT3 position;
XMFLOAT3 normals;
XMFLOAT2 texCoords;
};
and I need to fill an array of that with some vectors:
std::vector<XMFLOAT3> positions;
std::vector<XMFLOAT3> normals;
std::vector<XMFLOAT2> texCoords;
The length of the array is given by
int numVertices;
I' d like to fill the array of struct Vertex with the vectors given. How can I do that?
I tried to initialize the array in this way:
Vertex points[numVertices];
but that var does not have a constant value.
Thank you for your help.
std::vector is the best option to create a dynamic array.
It takes care of memory management for you.
You can access the contents of the array using std::vector::data, std::vector::operator[], std::vector::iterator.
You can process each element of a std::vector using a range-for loop.
Instead of
Vertex points[numVertices];
use
std::vector<Vertex> points(numVertices);
If you must use raw arrays, you can try this. Assuming XMFLOAT3, XMFLOAT2 is something like follows:
struct XMFLOAT3 {
XMFLOAT3(float x, float y, float z) : _x(x), _y(y), _z(z) {};
float _x;
float _y;
float _z;
};
struct XMFLOAT2 {
XMFLOAT2(float x, float y) : _x(x), _y(y) {};
float _x;
float _y;
};
define an init function to initialize Vertex array by dynamically allocating and initializing the elements as:
Vertex **
initVertex(int numVertices)
{
Vertex **points = new Vertex *[numVertices];
for (int i = 0; i < numVertices; ++i) {
points[i] = new Vertex(positions[i]._x, positions[i]._y, positions[i]._x,
normals[i]._x, normals[i]._y, normals[i]._z,
texCoords[i]._x, texCoords[i]._y);
}
return points;
}
You can use Vertex **points = initVertex(numVertices) and dereference each element.
If you must have Vertex *points then you can use this function to create the initialized array of Vertices:
Vertex *
initVertex2(int numVertices)
{
char *points_buf = new char[sizeof(Vertex) * numVertices];
Vertex *points = reinterpret_cast<Vertex *>(points_buf);
for (int i = 0; i < numVertices; ++i) {
new (points_buf + i * sizeof(Vertex))
Vertex(positions[i]._x, positions[i]._y, positions[i]._x,
normals[i]._x, normals[i]._y, normals[i]._z,
texCoords[i]._x, texCoords[i]._y);
}
return points;
}
And call it Vertex *points = initVertex2(numVertices) and use array indexing to access each element.
I have the following code...
void draw_polygon(struct vector2d* center, int num_points,
struct vector2d* points, int mode)
{
int i;
if(mode == GL_TRIANGLE_FAN)
glVertex2f(center->x, center->y);
for(i = 0; i < num_points; i++)
glVertex2f(points[i].x, points[i].y);
glVertex2f(points[0].x, points[0].y);
}
and I am trying to convert it so it is OpenGLES 1.1 compat. From other posts I thought it looked like I would need something similar to this...
void draw_polygon(struct vector2d* center, int num_points,
struct vector2d* points, int mode)
{
int i;
int offset=0;
GL_FLOAT *arr;
if(mode == GL_TRIANGLE_FAN)i{
*arr = (GL_FLOAT *)malloc(sizeof(GL_FLOAT) * ((num_points+1)*2));
arr[0]=center->x;
arr[1]=center->y;
offset = 2;
}
else{
*arr = (int *)malloc(sizeof(GL_FLOAT) * (num_points*2)) ;
}
for(i = 0; i < num_points; i++){
int g = i+offset;
arr[g]=points[i].x;
arr[g+1]=points[i].y;
i++;
}
}
But of course that doesn't compile. Can someone help me understand the proper way to handle this in GLES?
Firs about not compiling:
struct vector2d {
GLfloat x, y;
};
void draw_polygon(struct vector2d* center, int num_points,
struct vector2d* points, int mode)
{ //1st GL_FLOAT is not a type, use GLfloat
int i;
int offset=0;
GLfloat *arr;
if(mode == GL_TRIANGLE_FAN) { //what was that "i" doing here
arr = (GLfloat *)malloc(sizeof(GLfloat) * ((num_points+1)*2)); //no "*" before arr
arr[0]=center->x;
arr[1]=center->y;
offset = 2;
}
else{
arr = (GLfloat *)malloc(sizeof(GLfloat) * (num_points*2)) ; //no "*" before arr; Why do you typcast it to "(int *)"?
}
for(i = 0; i < num_points; i++){
int g = (i*2)+offset;//i*2
arr[g]=points[i].x;
arr[g+1]=points[i].y;
i++;
}
}
As for doing the whole code. GLES does not work with begin/end calls but you have to call the draw call. At this point with the code above what you did was create an array of vertex data, now you need to push it to GPU (or set the pointer for the data) and call glDrawArrays.
So you need to add:
glEnableClientState(GL_VERTEX_ARRAY); //this is usually set only once unless you want to disable it at some point for some very unlikelly reason
glVertexPointer(2, GL_FLOAT, 0, arr); //set the pointer (alternative is to use the VBO and copy the data directly to the GPU)
glDrawArrays(mode, 0, num_points+offset/2);
free(arr); //you need to free the allocated memory
in the same function.
To make your method look a bit less complicated you can try this:
void draw_polygon2(struct vector2d* center, int num_points, struct vector2d* points, int mode) {
if(mode == GL_TRIANGLE_FAN) {
vector2d *newBuffer = (vector2d *)malloc(sizeof(vector2d)*(num_points+1));//create a new buffer just to append center to beginning
newBuffer[0] = *center; //copy center
memcpy(newBuffer+1, points, sizeof(vector2d)*num_points); //copy all other points after center
glEnableClientState(GL_VERTEX_ARRAY);
glVertexPointer(2, GL_FLOAT, sizeof(vector2d), newBuffer);
glDrawArrays(mode, 0, num_points+1);
free(newBuffer);
}
else {
glEnableClientState(GL_VERTEX_ARRAY);
glVertexPointer(2, GL_FLOAT, sizeof(vector2d), points);
glDrawArrays(mode, 0, num_points);
}
}
Do note that since this function uses directly vector2d structure it is considered, that x and y are the first 2 parameters in the vector2d structure. Even if you added 'z' as the 3rd parameter the code should work.