Optimizing a raytracing shader in GLSL - c++

I have coded a voxelization based raytracer which is working as expected but is very slow.
Currently the raytracer code is as follows:
#version 430
//normalized positon from (-1, -1) to (1, 1)
in vec2 f_coord;
out vec4 fragment_color;
struct Voxel
{
vec4 position;
vec4 normal;
vec4 color;
};
struct Node
{
//children of the current node
int children[8];
};
layout(std430, binding = 0) buffer voxel_buffer
{
//last layer of the tree, the leafs
Voxel voxels[];
};
layout(std430, binding = 1) buffer buffer_index
{
uint index;
};
layout(std430, binding = 2) buffer tree_buffer
{
//tree structure
Node tree[];
};
layout(std430, binding = 3) buffer tree_index
{
uint t_index;
};
uniform vec3 camera_pos; //position of the camera
uniform float aspect_ratio; // aspect ratio of the window
uniform float cube_dim; //Dimenions of the voxelization cube
uniform int voxel_resolution; //Side length of the cube in voxels
#define EPSILON 0.01
// Detect whether a position is inside of the voxel with size size located at corner
bool inBoxBounds(vec3 corner, float size, vec3 position)
{
bool inside = true;
position-=corner;//coordinate of the position relative to the box coordinate system
//Test that all coordinates are inside the box, if any is outisde, the point is out the box
for(int i=0; i<3; i++)
{
inside = inside && (position[i] > -EPSILON);
inside = inside && (position[i] < size+EPSILON);
}
return inside;
}
//Get the distance to a box or infinity if the box cannot be hit
float boxIntersection(vec3 origin, vec3 dir, vec3 corner0, float size)
{
dir = normalize(dir);
vec3 corner1 = corner0 + vec3(size,size,size);//Oposite corner of the box
float coeffs[6];
//Calculate the intersaction coefficients with te 6 bonding planes
coeffs[0] = (corner0.x - origin.x)/(dir.x);
coeffs[1] = (corner0.y - origin.y)/(dir.y);
coeffs[2] = (corner0.z - origin.z)/(dir.z);
coeffs[3] = (corner1.x - origin.x)/(dir.x);
coeffs[4] = (corner1.y - origin.y)/(dir.y);
coeffs[5] = (corner1.z - origin.z)/(dir.z);
//by default the distance to the box is infinity
float t = 1.f/0.f;
for(uint i=0; i<6; i++){
//if the distance to a boxis negative, we set it to infinity as we cannot travel in the negative direction
coeffs[i] = coeffs[i] < 0 ? 1.f/0.f : coeffs[i];
//The distance is the minumum of the previous calculated distance and the current distance
t = inBoxBounds(corner0,size,origin+dir*coeffs[i]) ? min(coeffs[i],t) : t;
}
return t;
}
#define MAX_TREE_HEIGHT 11
int nodes[MAX_TREE_HEIGHT];
int levels[MAX_TREE_HEIGHT];
vec3 positions[MAX_TREE_HEIGHT];
int sp=0;
void push(int node, int level, vec3 corner)
{
nodes[sp] = node;
levels[sp] = level;
positions[sp] = corner;
sp++;
}
void main()
{
int count = 0; //count the iterations of the algorithm
vec3 r = vec3(f_coord.x, f_coord.y, 1.f/tan(radians(40))); //direction of the ray
r.y/=aspect_ratio; //modify the direction based on the windows aspect ratio
vec3 dir = r;
r += vec3(0,0,-1.f/tan(radians(40))) + camera_pos; //put the ray at the camera position
fragment_color = vec4(0);
int max_level = int(log2(voxel_resolution));//height of the tree
push(0,0,vec3(-cube_dim));//set the stack
float tc = 1.f; //initial color value, to be decreased whenever a voxel is hit
//tree variables
int level=0;
int node=0;
vec3 corner;
do
{
//pop from stack
sp--;
node = nodes[sp];
level = levels[sp];
corner = positions[sp];
//set the size of the current voxel
float size = cube_dim / pow(2,level);
//set the corners of the children
vec3 corners[] =
{corner, corner+vec3(0,0,size),
corner+vec3(0, size,0), corner+vec3(0,size,size),
corner+vec3(size,0,0), corner+vec3(size,0,size),
corner+vec3(size,size,0), corner+vec3(size,size,size)};
float coeffs[8];
for(int child=0; child<8; child++)
{
//Test non zero childs, zero childs are empty and thus should be discarded
coeffs[child] = tree[node].children[child]>0?
//Get the distance to your child if it's not empty or infinity if it's empty
boxIntersection(r, dir, corners[child], size) : 1.f/0.f;
}
int indices[8] = {0,1,2,3,4,5,6,7};
//sort the children from closest to farthest
for(uint i=0; i<8; i++)
{
for(uint j=i; j<8; j++)
{
if((coeffs[j] < coeffs[i]))
{
float swap = coeffs[i];
coeffs[i] = coeffs[j];
coeffs[j] = swap;
int iSwap = indices[i];
indices[i] = indices[j];
indices[j] = iSwap;
vec3 vSwap = corners[i];
corners[i] = corners[j];
corners[j] = vSwap;
}
}
}
//push to stack
for(uint i=7; i>=0; i--)
{
if(!isinf(coeffs[i]))
{
push(tree[node].children[indices[i]],
level+1, corners[i]);
}
}
count++;
}while(level < (max_level-1) && sp>0);
//set color
fragment_color = vec4(count)/100;
}
As it may not be fully clear what this does, let me explain.
We check ray-box intersections starting with a big cube. If we hit it we test intersection with the 8 cubes that compose it.
If we hit any fo those we check intersections with the 8 cubes that make up that cube.
In 2D this would look as follows:
In this case we have 4 layers, we check the big box first, then the ones colored in red, then the ones colored in green, and finally the ones colored in blue.
Printing out the number of times the raytracing step executed as a color (which is what the code snippet I have provided does)
results in the following image:
As you can see, most of the time the shader doesn't do more than 100 iterations.
However this shader takes 200 000 microseconds to execute on average in a gtx 1070.
Since the issue is not number of executions, my problem is likely to be on thread execution.
Does anyone know how I could optimize this code?
The biggest botttleneck seems to be the use of a stack.
If I run the same code without pushing to the stack (generating wrong output), there is a 10 fold improvement in runtime

It seems you test for intersection with the ray most of all voxels in each level of the octree. And sort them (by some distance) also in each level.
I propose another approach.
If the ray intersects with the bounding box (level 0 of the octree) it makes it at two faces of the box. Or in a corner or an edge, these are "corner" cases.
Finding the 3D ray-plane intersection can be done like here. Finding if the intersection is inside the face (quad) can be done by testing if the point is inside of one of the two triangles of the face, like here.
Get the farthest intersection I0 from the camera. Also let r be a unit vector of the ray in the direction I0 toward the camera.
Find the deepest voxel for I0 coordinates. This is the farthest voxel from the camera.
Now we want the exit-coordinates I0e for the ray in that voxel, through another face. While you could do again the calculations for all 6 faces, if your voxels are X,Y,X aligned and you define the ray in the same coordinates system as the octree, then calculae simplify a lot.
Apply a little displacement (e.g. a 1/1000 of the smallest voxel size) to I0e by the r unit vector of the ray: I1 = I0e + r/1000. Find the voxel to these I1. This is the next voxel in the sorted list of voxel-ray intersections.
Repeat finding I1e then I2 then I2e then I3 etc. until the bounding box is exited. The list of crossed voxels is sorted.
Working with the octree can be optimized depending on how you store its info: All possible nodes or just used. Nodes with data or just "pointers" to another container with the data. This is matter for another question.

The first thing that stands out is your box intersection function. Have a look at inigo quilez' procedural box function for a much faster version. Since your boxsize is uniform in all axes and you don't need outNormal, you can get an even lighter version. In essence, use maths instead of the brute force approach that tests each box plane.
Also, try to avoid temporary storage where possible. For example, the corners array could be computed on demand for each octree box. Of course, with the above suggestion, these will be changed to box centers.
Since nodes, levels and positions are always accessed together, try co-locating them in a new single struct and access them as a single unit.
Will look more later...

Thread execution on a GPU may be massively parallel, but that doesn’t mean that all threads run independently from one another. Groups of threads execute exactly the same instructions, the only difference is the input data. That means that branches and therefore loops can’t make a thread diverge in execution and therefore also not let them terminate early.
Your example shows the most extreme edge case of this: when there is a high likelyhood that in a group of threads all work that’s done is relevant to one thread only.
To alleviate this, you should try to reduce the difference in execution length (iterations in your case) for threads in a group (or in total). This can be done by setting a limit on the number of iterations per shader pass and rescheduling only those threads/pixels that need more iterations.

Related

Vulkan compute shader outputs black lines on frames when on macOs if workgroup size is at maximum of 1024 invocations

I'm currently working on a small SDF based graphics engine that uses compute shaders to calculate the pixel colours from data provided via storage buffers.
I've encountered a weird problem where the frames rendered by my compute shader render black lines, like so (the green pixels are correct, showing green to see the black lines better):
I've isolated a minimal example of glsl code that produces this issue, commented for clarity. The issue occurs during traversal of my spatial acceleration data structure, a bounding interval hierarchy. The only part that's relevant for the issue is, that i'm extracting booleans from flags set in the first bits of a bounding interval link integer.
// relevant struct definition of a bounding interval
struct bounding_interval
{
uint link;
uint target;
float coordinate;
};
// compute work group dimensions
layout(local_size_x = 32, local_size_y = 32) in;
// push constants provide window size and scene bounding box
layout(push_constant) uniform constants
{
uvec2 window_size;
vec2 scene_lower_bound;
vec2 scene_upper_bound;
};
// relevant input buffer
...
layout(set = 0, binding = 3) buffer _tlbih { bounding_interval tlbih[]; };
// output image
layout(set = 1, binding = 0, rgba8_snorm) uniform writeonly image2D output_image;
// checks if the query point is within the scene bounding box
// the scene bounding box is provided by push constants
bool point_is_within_scene (vec2 point)
{
return point.x > scene_lower_bound.x && point.x < scene_upper_bound.x && point.y > scene_lower_bound.y && point.y < scene_upper_bound.y;
}
// checks if the query point is to the left of the bounding interval border
bool point_is_left_in_tlbi (vec2 point, uint index)
{
return point[ (tlbih[index].link & 0x40000000) >> 30 ] < tlbih[index].coordinate;
}
// checks if the query point is directly on the bounding interval border
bool point_is_center_in_tlbi (vec2 point, uint index)
{
return point[ (tlbih[index].link & 0x40000000) >> 30 ] == tlbih[index].coordinate;
}
// checks if the current bounding interval is "left" or "right"
bool current_tlbi_is_right (uint index)
{
return bool(tlbih[index].link & 0x20000000);
}
// checks if a query point intersects a bounding interval
bool point_intersects_tlbi (vec2 point, uint index)
{
return ((point_is_left_in_tlbi(point, index) || point_is_center_in_tlbi(point, index)) && !current_tlbi_is_right(index)) || ((!point_is_left_in_tlbi(point, index) || point_is_center_in_tlbi(point, index)) && current_tlbi_is_right(index));
}
// checks if the current bounding interval is a "leaf" and therefore points to a primitive shape
bool current_tlbi_is_leaf (uint index)
{
return bool(tlbih[index].link & 0x80000000);
}
// supposed to iterate over bounding interval hierarchy and output a colour
// removed most irrelevant code
vec4 determine_pixel_colour (vec2 point)
{
vec3 colour = vec3(0.0);
uint current_tlbi = 0;
if ( !point_is_within_scene( point )) return vec4(colour, 1.0);
while (true) {
colour.x = 0.0;
if ( point_intersects_tlbi( point, current_tlbi )) {
// the issue only appears once this nested if statement is added
if ( current_tlbi_is_leaf( current_tlbi )) {
colour.x = 1.0;
}
return vec4(colour, 1.0);
}
else {
return vec4(colour, 1.0);
}
}
}
// calculates the pixel coordinates based on the compute thread ids
ivec2 determine_pixel_coordinates ()
{
return ivec2( clamp(gl_LocalInvocationID.xy + gl_WorkGroupID.xy * 32, ivec2(0), window_size) );
}
void main()
{
ivec2 coordinates = determine_pixel_coordinates();
// flips y coordinate so y+ is up, not down
ivec2 y_inverted_coordinates = ivec2( coordinates.x, window_size.y - coordinates.y );
vec2 point = (2.0 * y_inverted_coordinates - window_size) / float(window_size.y);
vec4 pixel_colour = determine_pixel_colour(point);
// adds green colour to make black lines visible
pixel_colour += vec4(0.0, 0.5, 0.0, 1.0);
imageStore(output_image, coordinates, pixel_colour);
}
What I've determined so far:
The very same code does NOT create the same black lines on my desktop windows pc (nvidia gtx 1080 ti) --> the issue occurs on my Macbook Pro (running macOs, therefore requiring moltonVK to run vulkan)
The issue does NOT occur when that nested if statement (marked by comment) in determine_pixel_colour is not presented
The issue does NOT occur when the current_tlbi_is_leaf function is called in the outer if statement
The amount of black lines is equal to the amount of work groups on the vertical axis. So each compute workgroup (32 x 32 threads) returns the correct colour for the first half, then returns black for the second half
The issue does NOT occur when the boolean is determined based on hardcoded values e.g. bool(2147483648 & 0x80000000) works fine --> is the buffer access the problem?
Question:
is this a moltonVK issue (due to it only happening on my mac device)?
is this a buffer access issue (due to the boolean not being a problem when using some hardcoded number)?
could this be a present mode issue (macOS version uses FIFO, windows version uses Mailbox)?
---------- Update 1 ----------
I discovered that this issue does not occur, when I set the local workgroup size to 16 * 16 invocations. While this would technically fix the issue, I still do not understand, why 32 * 32 invocations fail to render half of themselves. The maximum work group invocations of my macbook is set to 1024 - so my invocation count does not exceed this maximum.
---------- Update 2 ----------
When choosing 16 * 16 * 4 invocations as a local size in my compute shader and therefore achieving the same invocation count as 32 * 32, the same issue occurs. It seems to be an issue that only occurs, when my local invocation count equals the maximum specified by the device used.

Mesh color is messed up probably due to bad vertex normal computation

I have vertex position and index and I want vertex normal:
// input
vector<Vec3f> points = ... // position
vector<Vec3i> facets = ... // index (triangles)
// output
vector<Vec3f> norms; // normal
Method 1
I compute normal like this:
norms.resize(points.size()); // for each vertex there is a normal
for (Vec3i f : facets) {
int i0 = f.x();
int i1 = f.y(); // index
int i2 = f.z();
Vec3d pos0 = points.at(i0);
Vec3d pos1 = points.at(i1); // position
Vec3d pos2 = points.at(i2);
Vec3d N = triangleNormal(pos0, pos1, pos2); // face/triangle normal
norms[i0] = N;
norms[i1] = N; // Use the same normal for all 3 vertices
norms[i2] = N;
}
Then, the output mesh is rendered like this with a Phong material:
Method 1 with reversed normal
When I reverse normal direction in method 1:
norms[i0] = -N;
norms[i1] = -N;
norms[i2] = -N;
The dark and light regions are swapped:
The same happens by swapping position 0 with position 1 by:
// Vec3d N = triangleNormal(pos0, pos1, pos2);
Vec3d N = triangleNormal(pos1, pos0, pos2); // Swap pos0 with pos1
Method 2
I compute the normal by this method:
// Count how many faces/triangles a vertex is shared by
vector<int> counters;
counters.resize(points.size());
norms.resize(points.size());
for (Vec3i f : facets) {
int i0 = f.x();
int i1 = f.y(); // index
int i2 = f.z();
Vec3d pos0 = points.at(i0);
Vec3d pos1 = points.at(i1); // position
Vec3d pos2 = points.at(i2);
Vec3d N = triangleNormal(pos0, pos1, pos2);
// Must be normalized
// https://stackoverflow.com/a/21930058/3405291
N.normalize();
norms[i0] += N;
norms[i1] += N; // add normal to all vertices used in face
norms[i2] += N;
counters[i0]++;
counters[i1]++; // increment count for all vertices used in face
counters[i2]++;
}
// https://stackoverflow.com/a/21930058/3405291
for (int i = 0; i < static_cast<int>(norms.size()); ++i) {
if (counters[i] > 0)
norms[i] /= counters[i];
else
norms[i].normalize();
}
This method yields a totally dark final render by a Phong material:
I also tried methods suggested here and there which are similar to method 2. They all result in a final render which looks like that of method 2 i.e. all dark regions without any light one.
Method 2 with reversed normal
I used method 2, but at the end, I reversed the normal direction by:
for (Vec3d & n : norms) {
n = -n;
}
To my surprise, the final render is all darK:
Also in method 2, I tried swapping position 0 with position 1:
// Vec3d N = triangleNormal(pos0, pos1, pos2);
Vec3d N = triangleNormal(pos1, pos0, pos2); // swap pos0 with pos1
The final render is all dark regions without any light ones.
How?
Any idea how I can get my final render to be all light without any dark region?
That looks like your mesh does not have consistent winding rule. So some triangles/faces are defined CW other in CCW order of vertexes causing that some of your normals are facing in opposite direction. There are few things you can do to remedy:
use double sided normals lighting
this is easiest... somwhere in fragment or wherever you are computing the shading something like this:
out_color = face_color*(ambient_light+diffuse_light*max(0.0,dot(face_normal,light_direction)));
when the normal is in wrong direction the result of dot is negative leading to dark color so just use abs value instead:
out_color = face_color*(ambient_light+diffuse_light*abs(dot(face_normal,light_direction)));
In fixed function pipeline there is even switch for this IIRC:
glLightModeli(GL_LIGHT_MODEL_TWO_SIDE, GL_TRUE);
repair mesh winding
there must be 3D tools to do this (Blender,3DS,...) or if your mesh is generated on the fly you could update your code to create consistent winding on your own.
Correct winding enables you the use of GL_CULL_FACE which speeds up rendering considerably. Also it enables more advanced stuff like this:
OpenGL - How to create Order Independent transparency?
repair normals
In some cases there are ways to detect if the normal is pointing outwards or inwards to mesh for example like this:
Determing the direction of face normals consistently?
So just negate the wrong ones during computation of normal and that is it. However if your mesh is too complicated (too far from convex) is this not so easily done as you need to use local "centers" of mesh or even inside polygon tests which are expensive.
The averaging method of generating normals gives you dark colors for both directions of normals which means you wrongly computed them and they are most likely zero. For more info about such approach see:
How to achieve smooth tangent space normals?
Anyway to debug problems like this its best to render your normals as lines going from the vertexes of your mesh (use wireframe). Then you would see directly what normals are good and bad. Here example:

How to handle incorrect index calculation for discretized ray tracing?

The situation si as follows. I am trying to implement a linear voxel search in a glsl shader for efficient voxel ray tracing. In toehr words, I have a 3D texture and I am ray tracing on it but I am trying to ray trace such that I only ever check voxels intersected by the ray once.
To this effect I have written a program with the following results:
Not efficient but correct:
The above image was obtained by adding a small epsilon ray multiple times and sampling from the texture on each iteration. Which produces the correct results but it's very inefficient.
That would look like:
loop{
start += direction*0.01;
sample(start);
}
To make it efficient I decided to instead implement the following lookup function:
float bound(float val)
{
if(val >= 0)
return voxel_size;
return 0;
}
float planeIntersection(vec3 ray, vec3 origin, vec3 n, vec3 q)
{
n = normalize(n);
if(dot(ray,n)!=0)
return (dot(q,n)-dot(n,origin))/dot(ray,n);
return -1;
}
vec3 get_voxel(vec3 start, vec3 direction)
{
direction = normalize(direction);
vec3 discretized_pos = ivec3((start*1.f/(voxel_size))) * voxel_size;
vec3 n_x = vec3(sign(direction.x), 0,0);
vec3 n_y = vec3(0, sign(direction.y),0);
vec3 n_z = vec3(0, 0,sign(direction.z));
float bound_x, bound_y, bound_z;
bound_x = bound(direction.x);
bound_y = bound(direction.y);
bound_z = bound(direction.z);
float t_x, t_y, t_z;
t_x = planeIntersection(direction, start, n_x,
discretized_pos+vec3(bound_x,0,0));
t_y = planeIntersection(direction, start, n_y,
discretized_pos+vec3(0,bound_y,0));
t_z = planeIntersection(direction, start, n_z,
discretized_pos+vec3(0,0,bound_z));
if(t_x < 0)
t_x = 1.f/0.f;
if(t_y < 0)
t_y = 1.f/0.f;
if(t_z < 0)
t_z = 1.f/0.f;
float t = min(t_x, t_y);
t = min(t, t_z);
return start + direction*t;
}
Which produces the following result:
Notice the triangle aliasing on the left side of some surfaces.
It seems this aliasing occurs because some coordinates are not being set to their correct voxel.
For example modifying the truncation part as follows:
vec3 discretized_pos = ivec3((start*1.f/(voxel_size)) - vec3(0.1)) * voxel_size;
Creates:
So it has fixed the issue for some surfaces and caused it for others.
I wanted to know if there is a way in which I can correct this truncation so that this error does not happen.
Update:
I have narrowed down the issue a bit. Observe the following image:
The numbers represent the order in which I expect the boxes to be visited.
As you can see for some of the points the sampling of the fifth box seems to be ommitted.
The following is the sampling code:
vec4 grabVoxel(vec3 pos)
{
pos *= 1.f/base_voxel_size;
pos.x /= (width-1);
pos.y /= (depth-1);
pos.z /= (height-1);
vec4 voxelVal = texture(voxel_map, pos);
return voxelVal;
}
yep that was the +/- rounding I was talking about in my comments somewhere in your previous questions related to this. What you need to do is having step equal to grid size in one of the axises (and test 3 times once for |dx|=1 then for |dy|=1 and lastly |dz|=1).
Also you should create a debug draw 2D slice through your map to actually see where the hits for a single specific test ray occurred. Now based on direction of ray in each axis you set the rounding rules separately. Without this you are just blindly patching one case and corrupting other two ...
Now actually Look at this (I linked it to your before but you clearly did not):
Wolf and Doom ray casting techniques
especially pay attention to:
On the right It shows you how to compute the ray step (your epsilon). You simply scale the ray direction so one of the coordinate is +/-1. For simplicity start with 2D slice through your map. The red dot is ray start position. Green is ray step vector for vertical grid lines hits and red is for horizontal grid lines hits (z will be analogically the same).
Now you should add the 2D overview of your map through some height slice that is visible (like on the image on the left) add a dot or marker to each intersection detected but distinguish between x,y and z hits by color. Do this for single ray only (I use the center of view ray). Fist handle view when you look at X+ directions than X- and when done move to Y,Z ...
In my GLSL volumetric 3D back raytracer I also linked you before look at these lines:
if (dir.x<0.0) { p+=dir*(((floor(p.x*n)-_zero)*_n)-ray_pos.x)/dir.x; nnor=vec3(+1.0,0.0,0.0); }
if (dir.x>0.0) { p+=dir*((( ceil(p.x*n)+_zero)*_n)-ray_pos.x)/dir.x; nnor=vec3(-1.0,0.0,0.0); }
if (dir.y<0.0) { p+=dir*(((floor(p.y*n)-_zero)*_n)-ray_pos.y)/dir.y; nnor=vec3(0.0,+1.0,0.0); }
if (dir.y>0.0) { p+=dir*((( ceil(p.y*n)+_zero)*_n)-ray_pos.y)/dir.y; nnor=vec3(0.0,-1.0,0.0); }
if (dir.z<0.0) { p+=dir*(((floor(p.z*n)-_zero)*_n)-ray_pos.z)/dir.z; nnor=vec3(0.0,0.0,+1.0); }
if (dir.z>0.0) { p+=dir*((( ceil(p.z*n)+_zero)*_n)-ray_pos.z)/dir.z; nnor=vec3(0.0,0.0,-1.0); }
they are how I did this. As you can see I use different rounding/flooring rule for each of the 6 cases. This way you handle case without corrupting the other. The rounding rule depends on a lot of stuff like how is your coordinate system offseted to (0,0,0) and more so it might be different in your code but the if conditions should be the same. Also as you can see I am handling this by offsetting the ray start position a bit instead of having these conditions inside the ray traversal loop castray.
That macro cast ray and look for intersections with grid and on top of that actually zsorts the intersections and use the first valid one (that is what l,ll are for and no other conditions or combination of ray results are needed). So my way of deal with this is cast ray for each type of intersection (x,y,z) starting on the first intersection with the grid for the same axis. You need to take into account the starting offset so the l,ll resembles the intersection distance to real start of ray not to offseted one ...
Also a good idea is to do this on CPU side first and when 100% working port to GLSL as in GLSL is very hard to debug things like this.

Why is detail lost when computing shadow and reflections in my ray tracer

I am building a ray tracer and I am able to correctly render diffuse and specular parts of my sphere. When I come to calculate shadows and reflections however I end up with a very pixelated result as shown in the below image:
I can see that the shadow is cast in the correct place and if you zoom in the reflection is also visible but again pixelated. I call this method to determine if a pixel is in shade and it is also called recursively by my reflect ray method to determine the reflected colours.
RGBColour Scene::illumination(Ray incidentRay, Shape *closestShape, RGBColour shapeColour, Ray ray)
{
RGBColour diffuseLight = _backgroundColour;
RGBColour specularLight = _backgroundColour;
double projectionNormalToSource = 0.0;
for (int i = 0; i < _lightSources.size(); i++)
{
Ray shadowRay(incidentRay.Direction(), (_lightSources.at(i).GetPosition() - incidentRay.Direction()).UnitVector());
Vector surfaceNormal = closestShape->SurfaceNormal(incidentRay);
//lambertian shading.
projectionNormalToSource = surfaceNormal.ScalarProduct(shadowRay.Direction());
if (projectionNormalToSource > 0)
{
bool isShadow = false;
std::vector<double> shadowIntersections;
Ray temp(incidentRay.Direction(), (_lightSources.at(i).GetPosition() - incidentRay.Direction()));
for (int j = 0; j < _sceneObjects.size(); j++)
{
shadowIntersections.push_back(_sceneObjects.at(j)->Intersection(temp));
}
//Test each point to see if it is in shadow.
for (int j = 0; j < shadowIntersections.size(); j++)
{
if (shadowIntersections.at(j) != -1)
{
if (shadowIntersections.at(j) > _epsilon && shadowIntersections.at(j) <= temp.Direction().Magnitude() && closestShape != _sceneObjects.at(j))
{
isShadow = true;
}
break;
}
}
if (!isShadow)
{
diffuseLight = diffuseLight + (closestShape->Colour() * projectionNormalToSource * closestShape->DiffuseCoefficient() * _lightSources.at(i).DiffuseIntensity());
specularLight = specularLight + specularReflection(_lightSources.at(i), projectionNormalToSource, closestShape, incidentRay, temp, ray);
}
}
}
return diffuseLight + specularLight;
}
As I am able to correctly render the spheres apart from these aspects I am convinced the problem must lie within this particular method so I have not posted the others. What I think is happening is that where the pixel values retain their initial colour instead of being shaded I must incorrectly be calculating very small values or the other option is that the calculated ray did not intersect, however I do not think the latter option is valid otherwise the same intersection method would return incorrect results elsewhere in the program but as the spheres render correctly (excluding the shading and reflection).
So typically what causes results like this and can you spot any obvious logic errors in my method?
Edit: I have moved my light source in front and I can now see that the shadow appears to be correctly cast for the green sphere and the blue one becomes pixelated. So I think on any subsequent shape iterations something must not be updating correctly.
Edit 2: The first issue has been fixed and the shadows are now not pixelated, the resolution was to move the break statement into the if statement directly preceding it. The issue that the reflections are still pixelated still occurs currently.
Pixelation like this could occur due to numerical instability. An example: Suppose you calculate an intersection point that lies on a curved surface. You then use that point as the origin of a ray (a shadow ray, for example). You would assume that the ray wouldn't intersect that curved surface, but in practice it sometimes can. You could check for this by discarding such self intersections, but that could cause problems if you decide to implement concave shapes. Another approach could be to move the origin of the generated ray along its direction vector by some infinitesimally small amount, so that no unwanted self-intersection occurs.

CPU Ray Casting

I'm attempting ray casting an octree on the CPU (I know the GPU is better, but I'm unable to get that working at this time, I believe my octree texture is created incorrectly).
I understand what needs to be done, and so far I cast a ray for each pixel, and check if that ray intersects any nodes within the octree. If it does and the node is not a leaf node, I check if the ray intersects it's child nodes. I keep doing this until a leaf node is hit. Once a leaf node is hit, I get the colour for that node.
My question is, what is the best way to draw this to the screen? Currently im storing the colours in an array and drawing them with glDrawPixels, but this does not produce correct results, with gaps in the renderings, as well as the projection been wrong (I am using glRasterPos3fv).
Edit: Here is some code so far, it needs cleaning up, sorry. I have omitted the octree ray casting code as I'm not sure it's needed, but I will post if it'll help :)
void Draw(Vector cameraPosition, Vector cameraLookAt)
{
// Calculate the right Vector
Vector rightVector = Cross(cameraLookAt, Vector(0, 1, 0));
// Set up the screen plane starting X & Y positions
float screenPlaneX, screenPlaneY;
screenPlaneX = cameraPosition.x() - ( ( WINDOWWIDTH / 2) * rightVector.x());
screenPlaneY = cameraPosition.y() + ( (float)WINDOWHEIGHT / 2);
float deltaX, deltaY;
deltaX = 1;
deltaY = 1;
int currentX, currentY, index = 0;
Vector origin, direction;
origin = cameraPosition;
vector<Vector4<int>> colours(WINDOWWIDTH * WINDOWHEIGHT);
currentY = screenPlaneY;
Vector4<int> colour;
for (int y = 0; y < WINDOWHEIGHT; y++)
{
// Set the current pixel along x to be the left most pixel
// on the image plane
currentX = screenPlaneX;
for (int x = 0; x < WINDOWWIDTH; x++)
{
// default colour is black
colour = Vector4<int>(0, 0, 0, 0);
// Cast the ray into the current pixel. Set the length of the ray to be 200
direction = Vector(currentX, currentY, cameraPosition.z() + ( cameraLookAt.z() * 200 ) ) - origin;
direction.normalize();
// Cast the ray against the octree and store the resultant colour in the array
colours[index] = RayCast(origin, direction, rootNode, colour);
// Move to next pixel in the plane
currentX += deltaX;
// increase colour arry index postion
index++;
}
// Move to next row in the image plane
currentY -= deltaY;
}
// Set the colours for the array
SetFinalImage(colours);
// Load array to 0 0 0 to set the raster position to (0, 0, 0)
GLfloat *v = new GLfloat[3];
v[0] = 0.0f;
v[1] = 0.0f;
v[2] = 0.0f;
// Set the raster position and pass the array of colours to drawPixels
glRasterPos3fv(v);
glDrawPixels(WINDOWWIDTH, WINDOWHEIGHT, GL_RGBA, GL_FLOAT, finalImage);
}
void SetFinalImage(vector<Vector4<int>> colours)
{
// The array is a 2D array, with the first dimension
// set to the size of the window (WINDOW_WIDTH * WINDOW_HEIGHT)
// Second dimension stores the rgba values for each pizel
for (int i = 0; i < colours.size(); i++)
{
finalImage[i][0] = (float)colours[i].r;
finalImage[i][1] = (float)colours[i].g;
finalImage[i][2] = (float)colours[i].b;
finalImage[i][3] = (float)colours[i].a;
}
}
Your pixel drawing code looks okay. But I'm not sure that your RayCasting routines are correct. When I wrote my raytracer, I had a bug that caused horizontal artifacts in on the screen, but it was related to rounding errors in the render code.
I would try this...create a result set of vector<Vector4<int>> where the colors are all red. Now render that to the screen. If it looks correct, then the opengl routines are correct. Divide and conquer is always a good debugging method.
Here's a question though....why are you using Vector4 when later on you write the image as GL_FLOAT? I'm not seeing any int->float conversion here....
You problem may be in your 3DDDA (octree raycaster), and specifically with adaptive termination. It results from the quantisation of rays into gridcell form, that causes certain octree nodes which lie slightly behind foreground nodes (i.e. of a higher z depth) and which thus should be partly visible & partly occluded, to not be rendered at all. The smaller your voxels are, the less noticeable this will be.
There is a very easy way to test whether this is the problem -- comment out the adaptive termination line(s) in your 3DDDA and see if you still get the same gap artifacts.