oct tree generation going wrong at the last step - c++

NOTE: THIS QUESTION HAS BEEN DRASTICALLY EDITED FROM ITS ORIGINAL FORM
I am attempting to create a logarithmic raytracer by implementing an oct tree data structure combined with voxelization to achieve fast ray tracing.
Currently I am having issues with the ray collision detection.
The expected output should be the voxelized stanford dragon with its normal map.
Currrently the issue is that some regions are transparent:
The full dragon:
Transparent regions:
From these images it should be clear that the geometry is correct, but the collision checks are wrong.
There are 2 fragment shaders involved in this process:
The voxelizer fragment shader:
#version 430
in vec3 f_pos;
in vec3 f_norm;
in vec2 f_uv;
out vec4 f_color;
struct Voxel
{
vec4 position;
vec4 normal;
vec4 color;
};
struct Node
{
int children[8];
};
layout(std430, binding = 0) buffer voxel_buffer
{
Voxel voxels[];
};
layout(std430, binding = 1) buffer buffer_index
{
uint index;
};
layout(std430, binding = 2) buffer tree_buffer
{
Node tree[];
};
layout(std430, binding = 3) buffer tree_index
{
uint t_index;
};
out vec4 fragment_color;
uniform int voxel_resolution;
uniform int cube_dim;
int getVIndex(vec3 position, int level)
{
float size = cube_dim / pow(2,level);
int bit2 = int(position.x > size);
int bit1 = int(position.y > size);
int bit0 = int(position.z > size);
return 4*bit2 + 2*bit1 + bit0;
}
void main()
{
uint m_index = atomicAdd(index, 1);
voxels[m_index].position = vec4(f_pos*cube_dim,1);
voxels[m_index].normal = vec4(f_norm,1);
voxels[m_index].color = vec4(f_norm,1);
int max_level = int(log2(voxel_resolution));
int node = 0;
vec3 corner = vec3(-cube_dim);
int child;
for(int level=0; level<max_level-1; level++)
{
float size = cube_dim / pow(2,level);
vec3 corners[] =
{corner, corner+vec3(0,0,size),
corner+vec3(0,size,0), corner+vec3(0,size,size),
corner+vec3(size,0,0), corner+vec3(size,0,size),
corner+vec3(size,size,0), corner+vec3(size,size,size)};
vec3 offsetPos = (vec3(voxels[m_index].position));
child = getVIndex(offsetPos-corner, level);
int mrun = 500;
while ((tree[node].children[child] <= 0) && (mrun > 0)){
mrun--;
if( (atomicCompSwap( tree[node].children[child] , 0 , -1) == 0 ))
{
tree[node].children[child] = int(atomicAdd(t_index, 1));
}
}
if(mrun < 1)
discard;
if(level==max_level-2)
break;
node = tree[node].children[child];
corner = corners[child];
}
tree[node].children[child] = int(m_index);
}
I understand the logic may not be clear so let me explain:
We start with a 3D psoition voxels[m_index].position = vec4(f_pos*cube_dim,1); And we know there is a cube with dimensions (-cube_dim,-cube_dim,-cube_dim) to (cube_dim,cube_dim,cube_dim)
So a cube whose diagonals intersect at the origin with side length of 2*cube_dim. That has been divided into multiple little cubes with side length 2*cube_dim/voxel_resolution. Basically this is just a cube subdivided n times to make a cartesian grid.
Using this coordinate we start at the big cube, subdividing it into 8 equal sized subsapaces and detecting which of these subspaces contians the coordinate.
We do this until we find the smallest box containing the position.
The raytracer
#version 430
in vec2 f_coord;
out vec4 fragment_color;
struct Voxel
{
vec4 position;
vec4 normal;
vec4 color;
};
struct Node
{
int children[8];
};
layout(std430, binding = 0) buffer voxel_buffer
{
Voxel voxels[];
};
layout(std430, binding = 1) buffer buffer_index
{
uint index;
};
layout(std430, binding = 2) buffer tree_buffer
{
Node tree[];
};
layout(std430, binding = 3) buffer tree_index
{
uint t_index;
};
uniform vec3 camera_pos;
uniform float aspect_ratio;
uniform float cube_dim;
uniform int voxel_resolution;
float planeIntersection(vec3 origin, vec3 ray, vec3 pNormal, vec3 pPoint)
{
pNormal = normalize(pNormal);
return (dot(pPoint,pNormal)-dot(pNormal,origin))/dot(ray,pNormal);
}
#define EPSILON 0.001
bool inBoxBounds(vec3 corner, float size, vec3 position)
{
bool inside = true;
position-=corner;
for(int i=0; i<3; i++)
{
inside = inside && (position[i] > -EPSILON);
inside = inside && (position[i] < size+EPSILON);
}
return inside;
}
float boxIntersection(vec3 origin, vec3 dir, vec3 corner0, float size)
{
dir = normalize(dir);
vec3 corner1 = corner0 + vec3(size,size,size);
vec3 normals[6] =
{ vec3(-1,0,0), vec3(0,-1,0), vec3(0,0,-1), vec3(1,0,0), vec3(0,1,0), vec3(0,0,1) };
float coeffs[6];
for(uint i=0; i<3; i++)
coeffs[i] = planeIntersection(origin, dir, normals[i], corner0);
for(uint i=3; i<6; i++)
coeffs[i] = planeIntersection(origin, dir, normals[i], corner1);
float t = 1.f/0.f;
for(uint i=0; i<6; i++){
coeffs[i] = coeffs[i] < 0 ? 1.f/0.f : coeffs[i];
t = inBoxBounds(corner0,size,origin+dir*coeffs[i]) ? min(coeffs[i],t) : t;
}
return t;
}
void sort(float elements[8], int indices[8], vec3 vectors[8])
{
for(uint i=0; i<8; i++)
{
for(uint j=i; j<8; j++)
{
if(elements[j] < elements[i])
{
float swap = elements[i];
elements[i] = elements[j];
elements[j] = swap;
int iSwap = indices[i];
indices[i] = indices[j];
indices[j] = iSwap;
vec3 vSwap = vectors[i];
vectors[i] = vectors[j];
vectors[j] = vSwap;
}
}
}
}
int getVIndex(vec3 position, int level)
{
float size = cube_dim / pow(2,level);
int bit2 = int(position.x > size);
int bit1 = int(position.y > size);
int bit0 = int(position.z > size);
return 4*bit2 + 2*bit1 + bit0;
}
#define MAX_TREE_HEIGHT 11
int nodes[8*MAX_TREE_HEIGHT];
int levels[8*MAX_TREE_HEIGHT];
vec3 positions[8*MAX_TREE_HEIGHT];
int sp=0;
void push(int node, int level, vec3 corner)
{
nodes[sp] = node;
levels[sp] = level;
positions[sp] = corner;
sp++;
}
void main()
{
vec3 r = vec3(f_coord.x, f_coord.y, 1.f/tan(radians(40)));
r.y/=aspect_ratio;
vec3 dir = r;
r += vec3(0,0,-1.f/tan(radians(40))) + camera_pos;
fragment_color = vec4(0);
//int level = 0;
int max_level = int(log2(voxel_resolution));
push(0,0,vec3(-cube_dim));
float tc = 1.f;
int level=0;
int node=0;
do
{
sp--;
node = nodes[sp];
level = levels[sp];
vec3 corner = positions[sp];
float size = cube_dim / pow(2,level);
vec3 corners[] =
{corner, corner+vec3(0,0,size),
corner+vec3(0, size,0), corner+vec3(0,size,size),
corner+vec3(size,0,0), corner+vec3(size,0,size),
corner+vec3(size,size,0), corner+vec3(size,size,size)};
float t = boxIntersection(r, dir, corner, size*2);
if(!isinf(t))
tc *= 0.9f;
float coeffs[8];
for(int child=0; child<8; child++)
{
if(tree[node].children[child]>0)
coeffs[child] = boxIntersection(r, dir, corners[child], size);
else
coeffs[child] = 1.f/0.f;
}
int indices[8] = {0,1,2,3,4,5,6,7};
sort(coeffs, indices, corners);
for(uint i=7; i>=0; i--)
{
if(!isinf(coeffs[i]))
{
push(tree[node].children[indices[i]],
level+1, corners[i]);
}
}
}while(level < (max_level-1) && sp>0);
if(level==max_level-1)
{
fragment_color = abs(voxels[node].normal);
}
else
{
fragment_color=vec4(tc);
}
}
}
In here, we start at the biggest cube, testing intersections with each set of 8 children (the 8 cubes resulting from subdividing a cube). Each time we successfully detect a collision, we move down the tree, until we reach the lowest level which describes the actual geometry and we color the scene based on that.
Debugging and Problem
The important part is that there are 2 buffers, one to store the tree except the leafs, and one to store the leafs.
So in both the voxelization and the ray tracing, the last layer needs to be treated differently.
The issues I have noticed about the transparency are as follows:
It happens only on planes aligned with the cartesian grid
It seems it happens when the ray moves in a negative direction (down
or to the left). (At least that's my imperssion but it's not 100%
certain)
I am not sure what I am doing wrong.
EDIT:
The original issue seems to have been fixed, however the raytracer is still bugged. I have edited the question to refelct the current state of the problem.

The error comes from the sorting function as someone in the comments mentioned although not for the same reasons.
What has happened is that, I thought the sort function would modify the arrays passed to it, but it seems to be copying the data, so it does not return anything.
In other words:
void sort(float elements[8], int indices[8], vec3 vectors[8])
{
for(uint i=0; i<8; i++)
{
for(uint j=i; j<8; j++)
{
if((elements[j] < elements[i]))
{
float swap = elements[i];
elements[i] = elements[j];
elements[j] = swap;
int iSwap = indices[i];
indices[i] = indices[j];
indices[j] = iSwap;
vec3 vSwap = vectors[i];
vectors[i] = vectors[j];
vectors[j] = vSwap;
}
}
}
}
Does not return the correct values inside of elements, indices and vectors, so calling this function does nothing but waste computation cycles.

Related

Slower read/write to shared memory in CUDA than in Compute Shader [closed]

Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 4 months ago.
Improve this question
I am currently comparing the implementation of a n-body simulation in the GPU using CUDA and OpenGL (Compute Shaders) for a project, but I run into a problem using shared memory.
First I implemented the version with no shared memory as follows:
CUDA
#include "helper_math.h"
//...
__device__ float dist2(float3 A, float3 B)
{
float3 C = A - B;
return dot(C, C);
}
__global__ void n_body_vel_calc(float3* positions, float3* velocities,
unsigned numParticles, float mass, float deltaTime)
{
unsigned i = blockDim.x * blockIdx.x + threadIdx.x;
if (i >= numParticles)
return;
const float G = 6.6743e-11f;
float3 cur_position = positions[i];
float3 force = make_float3(0.0f, 0.0f, 0.0f);
for (unsigned j = 0; j < numParticles; ++j)
{
if (i == j)
continue;
float3 neighbor_position = positions[j];
float inv_distance2 = 1.0f / dist2(cur_position, neighbor_position);
float3 direction = normalize(neighbor_position - cur_position);
force += G * mass * mass * inv_distance2 * direction;
}
float3 acceleration = force / mass;
velocities[i] += acceleration * deltaTime;
}
OpenGL
// glBufferStorage(GL_SHADER_STORAGE_BUFFER, ..., ..., ...);
#version 460
layout(local_size_x=128) in;
layout(location = 0) uniform int numParticles;
layout(location = 1) uniform float mass;
layout(location = 2) uniform float dt;
layout(std430, binding=0) buffer pblock { vec3 positions[]; };
layout(std430, binding=1) buffer vblock { vec3 velocities[]; };
float dist2(vec3 A, vec3 B)
{
vec3 C = A - B;
return dot( C, C );
}
void main()
{
int i = int(gl_GlobalInvocationID);
if (i >= numParticles)
return;
const float G = 6.6743e-11f;
vec3 cur_position = positions[i];
vec3 force = vec3(0.0);
for (uint j = 0; j < numParticles; ++j)
{
if (i == j)
continue;
vec3 neighbor_position = positions[j];
float inv_distance2 = 1.0 / dist2(cur_position, neighbor_position);
vec3 direction = normalize(neighbor_position - cur_position);
force += G * mass * mass * inv_distance2 * direction;
}
vec3 acceleration = force / mass;
velocities[i] += acceleration * dt;
}
With the same number of threads per group, number of particles and the same number of times executing the kernel, the CUDA version takes 82 ms and OpengGL takes 70 ms. Weird thing that there speed is much different, but I can attribute that to GLSL having geometric operations optimized somehow.
My problem comes next, when I write the versions with shared memory, which should increase the performance by not reading from global memory multiple times.
CUDA
__global__ void n_body_vel_calc(float3* positions, float3 * velocities, unsigned workgroupSize,
unsigned numParticles, float mass, float deltaTime)
{
// size of array == workgroupSize
extern __shared__ float3 temp_tile[];
unsigned i = blockDim.x * blockIdx.x + threadIdx.x;
if (i >= numParticles)
return;
const float G = 6.6743e-11f;
float3 cur_position = positions[i];
float3 force = make_float3(0.0f, 0.0f, 0.0f);
for (unsigned tile = 0; tile < numParticles; tile += workgroupSize)
{
temp_tile[threadIdx.x] = positions[tile + threadIdx.x];
__syncthreads();
for (unsigned j = 0; j < workgroupSize; ++j)
{
if (i == j || ((tile + j) >= numParticles))
continue;
float3 neighbor_position = temp_tile[j];
float inv_distance2 = 1.0f / dist2(cur_position, neighbor_position);
float3 direction = normalize(neighbor_position - cur_position);
force += G * mass * mass * inv_distance2 * direction;
}
__syncthreads();
}
float3 acceleration = force / mass;
velocities[i] += acceleration * deltaTime;
}
OpenGL
#version 460
layout(local_size_x=128) in;
layout(location = 0) uniform int numParticles;
layout(location = 1) uniform float mass;
layout(location = 2) uniform float dt;
layout(std430, binding=0) buffer pblock { vec3 positions[]; };
layout(std430, binding=1) buffer vblock { vec3 velocities[]; };
// Shared variables
shared vec3 temp_tile[gl_WorkGroupSize.x];
void main()
{
int i = int(gl_GlobalInvocationID);
if (i >= numParticles)
return;
const float G = 6.6743e-11f;
vec3 cur_position = positions[i];
vec3 force = vec3(0.0);
for (uint tile = 0; tile < numParticles; tile += gl_WorkGroupSize.x)
{
temp_tile[gl_LocalInvocationIndex] = positions[tile + gl_LocalInvocationIndex];
groupMemoryBarrier();
barrier();
for (uint j = 0; j < gl_WorkGroupSize.x; ++j)
{
if (i == j || (tile + j) >= numParticles)
continue;
vec3 neighbor_position = temp_tile[j];
float inv_distance2 = 1.0 / dist2(cur_position, neighbor_position);
vec3 direction = normalize(neighbor_position - cur_position);
force += G * mass * mass * inv_distance2 * direction;
}
groupMemoryBarrier();
barrier();
}
vec3 acceleration = force / mass;
velocities[i] += acceleration * dt;
}
My principal problem comes next. With the same parameters as above, the CUDA version increases its execution time to 128 ms (greatly diminishing its performance), and the OpenGL one took 68 (a small improvement over the other version).
I have compiled the CUDA version with the toolkit version 11.7 and 10.0 with MSVC V143 and V142 and the results are more or less the same.
Why the OpenGL implementation is faster with shared memory, but the CUDA one its not? Am I missing something?

Why are my Ray march fragment shader refelction texture lookups slowing my frame rate?

I’ve written a Fragment shader in GLSL, using shader toy.
Link : https://www.shadertoy.com/view/wtGSzy
most of it works, but when I enable texture lookups in the reflection function, the performance drops from 60FPS to 5~FPS.
The code in question is on lines 173 - 176
if(SDFObjectToDraw.texChannelID == 0)
col = texture(iChannel0, uv);
if(SDFObjectToDraw.texChannelID == 1)
col = texture(iChannel1, uv);
This same code can bee seen in my rayMarch function (lines 274-277) and works fine for colouring my objects. It only causes issues in the reflection function.
My question is, why are my texture lookups, in the reflection code, dropping my performance this much and what can I do to improve it?
/**
* Return the normalized direction to march in from the eye point for a single pixel.
*
* fieldOfView: vertical field of view in degrees
* size: resolution of the output image
* fragCoord: the x,y coordinate of the pixel in the output image
*/
vec3 rayDirection(float fieldOfView, vec2 size, vec2 fragCoord) {
vec2 xy = fragCoord - size / 2.0;
float z = size.y / tan(radians(fieldOfView) / 2.0);
return normalize(vec3(xy, -z));
}
float start = 0.0;
vec3 eye = vec3(0,0,5);
int MAX_MARCHING_STEPS = 255;
float EPSILON = 0.00001;
float end = 10.0;
const uint Shpere = 1u;
const uint Box = 2u;
const uint Plane = 4u;
vec3 lightPos = vec3(-10,0,5);
#define M_PI 3.1415926535897932384626433832795
const int SDF_OBJECT_COUNT = 4;
struct SDFObject
{
uint Shape;
vec3 Position;
float Radius;
int texChannelID;
float Ambiant;
float Spec;
float Diff;
vec3 BoxSize;
bool isMirror; //quick hack to get refletions working
};
SDFObject SDFObjects[SDF_OBJECT_COUNT] = SDFObject[SDF_OBJECT_COUNT](
SDFObject(Shpere, vec3(2,0,-3),1.0,0,0.2,0.2,0.8, vec3(0,0,0),true)
,SDFObject(Shpere, vec3(-2,0,-3),1.0,0,0.1,1.0,1.0, vec3(0,0,0),false)
,SDFObject(Box, vec3(0,0,-6),0.2,1,0.2,0.2,0.8, vec3(1.0,0.5,0.5),false)
,SDFObject(Plane, vec3(0,0,0),1.0,1,0.2,0.2,0.8, vec3(0.0,1.0,0.0),false)
);
float shereSDF(vec3 p, SDFObject o)
{
return length(p-o.Position)-o.Radius;
}
float boxSDF(vec3 pointToTest, vec3 boxBoundery, float radius, vec3 boxPos)
{
vec3 q = abs(pointToTest - boxPos) - boxBoundery;
return length(max(q,0.0)) + min(max(q.x, max(q.y,q.z)) ,0.0) -radius;
}
float planeSDF(vec3 p, vec4 n, vec3 Pos)
{
return dot(p-Pos, n.xyz) + n.w;
}
bool IsShadow(vec3 LightPos, vec3 HitPos)
{
bool isShadow = false;
vec3 viewRayDirection = normalize(lightPos- HitPos) ;
float depth = start;
vec3 hitpoint;
for(int i=0; i<MAX_MARCHING_STEPS; i++)
{
hitpoint = (HitPos+ depth * viewRayDirection);
float dist = end;
for(int j =0; j<SDF_OBJECT_COUNT; j++)
{
float distToObjectBeingConsidered;
if(SDFObjects[j].Shape == Shpere)
distToObjectBeingConsidered = shereSDF(hitpoint, SDFObjects[j]);
if(SDFObjects[j].Shape == Box)
distToObjectBeingConsidered = boxSDF(hitpoint, SDFObjects[j].BoxSize , SDFObjects[j].Radius, SDFObjects[j].Position);
if(SDFObjects[j].Shape == Plane)
distToObjectBeingConsidered= planeSDF(hitpoint, vec4(SDFObjects[j].BoxSize, SDFObjects[j].Radius), SDFObjects[j].Position);
if( distToObjectBeingConsidered < dist)
{
dist = distToObjectBeingConsidered;
}
}
if(dist < EPSILON)
{
isShadow = true;
}
depth += dist;
if(depth >= end)
{
isShadow = false;
}
}
return isShadow;
}
vec3 MirrorReflection(vec3 inComingRay, vec3 surfNormal, vec3 HitPos, int objectIndexToIgnore)
{
vec3 returnCol;
vec3 reflectedRay = reflect(inComingRay, surfNormal);
vec3 RayDirection = normalize(reflectedRay) ;
float depth = start;
vec3 hitpoint;
int i;
for(i=0; i<MAX_MARCHING_STEPS; i++)
{
hitpoint = (HitPos+ depth * RayDirection);
SDFObject SDFObjectToDraw;
float dist = end;
for(int j =0; j<SDF_OBJECT_COUNT; j++)
{
float distToObjectBeingConsidered;
if(SDFObjects[j].Shape == Shpere)
distToObjectBeingConsidered = shereSDF(hitpoint, SDFObjects[j]);
if(SDFObjects[j].Shape == Box)
distToObjectBeingConsidered = boxSDF(hitpoint, SDFObjects[j].BoxSize , SDFObjects[j].Radius, SDFObjects[j].Position);
if(SDFObjects[j].Shape == Plane)
distToObjectBeingConsidered= planeSDF(hitpoint, vec4(SDFObjects[j].BoxSize, SDFObjects[j].Radius), SDFObjects[j].Position);
if( distToObjectBeingConsidered < dist && j!= objectIndexToIgnore )// D > 0.0)
{
dist = distToObjectBeingConsidered;
SDFObjectToDraw = SDFObjects[j];
}
}
if(dist < EPSILON)
{
vec3 normal =normalize(hitpoint-SDFObjectToDraw.Position);
float u = 0.5+ (atan(normal.z, normal.x)/(2.0*M_PI));
float v = 0.5+ (asin(normal.y)/(M_PI));
vec2 uv =vec2(u,v);
vec4 col = vec4(0,0.5,0.5,0);
///>>>>>>>>>>>> THESE LINES ARE broken, WHY?
//if(SDFObjectToDraw.texChannelID == 0)
//col = texture(iChannel0, uv);
//if(SDFObjectToDraw.texChannelID == 1)
//col = texture(iChannel1, uv);
vec3 NormalizedDirToLight = normalize(lightPos-SDFObjectToDraw.Position);
float theta = dot(normal,NormalizedDirToLight);
vec3 reflectionOfLight = reflect(NormalizedDirToLight, normal);
vec3 viewDir = normalize(SDFObjectToDraw.Position);
float Spec = dot(reflectionOfLight, viewDir);
if(IsShadow(lightPos, hitpoint))
{
returnCol= (col.xyz*SDFObjectToDraw.Ambiant);
}
else
{
returnCol= (col.xyz*SDFObjectToDraw.Ambiant)
+(col.xyz * max(theta *SDFObjectToDraw.Diff, SDFObjectToDraw.Ambiant));
}
break;
}
depth += dist;
if(depth >= end)
{
//should look up bg texture here but cant be assed right now
returnCol = vec3(1.0,0.0,0.0);
break;
}
}
return returnCol;//*= (vec3(i+1)/vec3(MAX_MARCHING_STEPS));
}
vec3 rayMarch(vec2 fragCoord)
{
vec3 viewRayDirection = rayDirection(45.0, iResolution.xy, fragCoord);
float depth = start;
vec3 hitpoint;
vec3 ReturnColour = vec3(0,0,0);
for(int i=0; i<MAX_MARCHING_STEPS; i++)
{
hitpoint = (eye+ depth * viewRayDirection);
float dist = end;
SDFObject SDFObjectToDraw;
int objectInDexToIgnore=-1;
//find closest objecct to current point
for(int j =0; j<SDF_OBJECT_COUNT; j++)
{
float distToObjectBeingConsidered;
if(SDFObjects[j].Shape == Shpere)
distToObjectBeingConsidered = shereSDF(hitpoint, SDFObjects[j]);
if(SDFObjects[j].Shape == Box)
distToObjectBeingConsidered = boxSDF(hitpoint, SDFObjects[j].BoxSize , SDFObjects[j].Radius, SDFObjects[j].Position);
if(SDFObjects[j].Shape == Plane)
distToObjectBeingConsidered= planeSDF(hitpoint, vec4(SDFObjects[j].BoxSize, SDFObjects[j].Radius), SDFObjects[j].Position);
if( distToObjectBeingConsidered < dist)
{
dist = distToObjectBeingConsidered;
SDFObjectToDraw = SDFObjects[j];
objectInDexToIgnore = j;
}
}
//if we are close enough to an objectoto hit it.
if(dist < EPSILON)
{
vec3 normal =normalize(hitpoint-SDFObjectToDraw.Position);
if(SDFObjectToDraw.isMirror)
{
ReturnColour = MirrorReflection( viewRayDirection, normal, hitpoint, objectInDexToIgnore);
}
else
{
float u = 0.5+ (atan(normal.z, normal.x)/(2.0*M_PI));
float v = 0.5+ (asin(normal.y)/(M_PI));
vec2 uv =vec2(u,v);
vec4 col;
if(SDFObjectToDraw.texChannelID == 0)
col = texture(iChannel0, uv);
if(SDFObjectToDraw.texChannelID == 1)
col = texture(iChannel1, uv);
vec3 NormalizedDirToLight = normalize(lightPos-SDFObjectToDraw.Position);
float theta = dot(normal,NormalizedDirToLight);
vec3 reflectionOfLight = reflect(NormalizedDirToLight, normal);
vec3 viewDir = normalize(SDFObjectToDraw.Position);
float Spec = dot(reflectionOfLight, viewDir);
if(IsShadow(lightPos, hitpoint))
{
ReturnColour= (col.xyz*SDFObjectToDraw.Ambiant);
}
else
{
ReturnColour= (col.xyz*SDFObjectToDraw.Ambiant)
+(col.xyz * max(theta *SDFObjectToDraw.Diff, SDFObjectToDraw.Ambiant));
//+(col.xyz* Spec * SDFObjectToDraw.Spec);
}
}
return ReturnColour;
}
depth += dist;
if(depth >= end)
{
float u = fragCoord.x/ iResolution.x;
float v = fragCoord.y/ iResolution.y;
vec4 col = texture(iChannel2, vec2(u,v));
ReturnColour =col.xyz;
}
}
return ReturnColour;
}
void mainImage( out vec4 fragColor, in vec2 fragCoord )
{
// Normalized pixel coordinates (from 0 to 1)
//vec2 uv = fragCoord/iResolution.xy;
// Time varying pixel color
//vec3 col = 0.5 + 0.5*cos(iTime+uv.xyx+vec3(0,2,4));
// Output to screen
lightPos *= cos(iTime+vec3(1.5,2,2));
//lightPos= vec3(cos(iTime)*2.0,0,0);
vec3 SDFCol= rayMarch(fragCoord);
vec3 col = vec3(0);
//if(SDFVal <=1.0)
// col = vec3(1,0,0);
//col = vec3(SDFVal,0,0);
col = vec3(0.5,0,0);
col = SDFCol;
fragColor = vec4(col,1.0);
}
[...] This same code can bee seen in my rayMarch function (lines 274-277) and works fine for colouring my objects. [...]
The "working" texture lookup is executed in a loop in rayMarch. MAX_MARCHING_STEPS is 255, so the lookup is done at most 255 times.
vec3 rayMarch(vec2 fragCoord)
{
// [...]
for(int i=0; i<MAX_MARCHING_STEPS; i++)
{
// [...]
if(SDFObjectToDraw.texChannelID == 0)
col = texture(iChannel0, uv);
if(SDFObjectToDraw.texChannelID == 1)
col = texture(iChannel1, uv);
// [...]
}
// [...]
}
When you do the lookup in MirrorReflection then the performance breaks down, because it is done in a loop in MirrorReflection and MirrorReflection is called in a loop in rayMarch. In this case the lookup is done up to 255*255 = 65025 times.
~65000 texture lookups for a fragment is far to much and cause the break down of performance.
vec3 MirrorReflection(vec3 inComingRay, vec3 surfNormal, vec3 HitPos, int objectIndexToIgnore)
{
// [...]
for(i=0; i<MAX_MARCHING_STEPS; i++)
{
// [...]
if(SDFObjectToDraw.texChannelID == 0)
col = texture(iChannel0, uv);
if(SDFObjectToDraw.texChannelID == 1)
col = texture(iChannel1, uv);
// [...]
}
// [...]
}
vec3 rayMarch(vec2 fragCoord)
{
// [...]
for(int i=0; i<MAX_MARCHING_STEPS; i++)
{
// [...]
ReturnColour = MirrorReflection(viewRayDirection, normal, hitpoint, objectInDexToIgnore);
// [...]
}
// [...]
}

OpenGL "optimizing" uniform variable

I have a uniform variable called control_count (count of the control points in a bezier curve). In the marked part in my code, if I replace the constant 4 with this variable, it's just stops working, if it's 4 it's working fine. The variable must have the value 4 in it, I tested it before and after the loop as well, I marked this in the code too. It should be an unrolling problem? How do I force the compiler not to do this?
#version 150
layout(lines_adjacency) in;
layout(line_strip, max_vertices = 101) out;
out vec4 gs_out_col;
uniform mat4 MVP;
uniform int control_count;
uniform int tess_count;
int degree;
int binom( int n, int k );
void main()
{
degree = control_count - 1;
vec3 b[10];
float B[10];
////////////MARK//////////////////
//control_count must be 4, other ways it'd draw less points
for(int i = 0; i < control_count; ++i){
b[i] = gl_in[i].gl_Position.xyz;
}
////////////END MARK//////////////////
for(int i = 0; i <= tess_count; ++i){
float t = i / float(tess_count);
gl_Position = vec4(0);
////////////MARK//////////////////
//here, if I write control_count instead of 4, I don't get what I expect
for(int j = 0; j < 4; ++j){
////////////END MARK//////////////////
B[j] = binom(3, j) * pow(1 - t, 3 - j) * pow(t, j);
gl_Position += vec4(b[j] * B[j], B[j]);
}
gl_Position = MVP * gl_Position;
////////////MARK//////////////////
//control_count - 4 --> I get red color,
//control_count - 3 --> I get purple,
//so the variable must have the value 4
gs_out_col = vec4(1, 0, control_count - 4, 1);//gl_Position;
////////////END MARK//////////////////
EmitVertex();
}
}
The "good" result using the constant 4:
The "wrong" result using the variable control_count:

How to create billboard matrix in glm

How to create a billboard translation matrix from a point in space using glm?
Just set the upper left 3×3 submatrix of the transformation to identity.
Update: Fixed function OpenGL variant:
void makebillboard_mat4x4(double *BM, double const * const MV)
{
for(size_t i = 0; i < 3; i++) {
for(size_t j = 0; j < 3; j++) {
BM[4*i + j] = i==j ? 1 : 0;
}
BM[4*i + 3] = MV[4*i + 3];
}
for(size_t i = 0; i < 4; i++) {
BM[12 + i] = MV[12 + i];
}
}
void mygltoolMakeMVBillboard(void)
{
GLenum active_matrix;
double MV[16];
glGetIntegerv(GL_MATRIX_MODE, &active_matrix);
glMatrixMode(GL_MODELVIEW);
glGetDoublev(GL_MODELVIEW_MATRIX, MV);
makebillboard_mat4x4(MV, MV);
glLoadMatrixd(MV);
glMatrixMode(active_matrix);
}
mat4 billboard(vec3 position, vec3 cameraPos, vec3 cameraUp) {
vec3 look = normalize(cameraPos - position);
vec3 right = cross(cameraUp, look);
vec3 up2 = cross(look, right);
mat4 transform;
transform[0] = vec4(right, 0);
transform[1] = vec4(up2, 0);
transform[2] = vec4(look, 0);
// Uncomment this line to translate the position as well
// (without it, it's just a rotation)
//transform[3] = vec4(position, 0);
return transform;
}

sobel operator - false edges natural images

I'm having a problem with edge detection using Sobel operator: it produces too many false edges, effect is shown on pictures below.
I'm using a 3x3 sobel operator - first extracting vertical then horizontal, final output is magnitude of each filter output.
Edges on synthetic images are extracted properly but natural images produce have too many false edges or "noise" even if image is preprocessed by applying blur or median filter.
What might be cause of this? Is it implementation problem (then: why synthetic images are fine?) or I need to do some more preprocessing?
Original:
Output:
code:
void imageOp::filter(image8* image, int maskSize, int16_t *mask)
{
if((image == NULL) || (maskSize/2 == 0) || maskSize < 1)
{
if(image == NULL)
{
printf("filter: image pointer == NULL \n");
}
else if(maskSize < 1)
{
printf("filter: maskSize must be greater than 1\n");
}
else
{
printf("filter: maskSize must be odd number\n");
}
return;
}
image8* fImage = new image8(image->getHeight(), image->getWidth());
uint16_t sum = 0;
int d = maskSize/2;
int ty, tx;
for(int x = 0; x < image->getHeight(); x++) //
{ // loop over image
for(int y = 0; y < image->getWidth(); y++) //
{
for(int xm = -d; xm <= d; xm++)
{
for(int ym = -d; ym <= d; ym++)
{
ty = y + ym;
if(ty < 0) // edge conditions
{
ty = (-1)*ym - 1;
}
else if(ty >= image->getWidth())
{
ty = image->getWidth() - ym;
}
tx = x + xm;
if(tx < 0) // edge conditions
{
tx = (-1)*xm - 1;
}
else if(tx >= image->getHeight())
{
tx = image->getHeight() - xm;
}
sum += image->img[tx][ty] * mask[((xm+d)*maskSize) + ym + d];
}
}
if(sum > 255)
{
fImage->img[x][y] = 255;
}
else if(sum < 0)
{
fImage->img[x][y] = 0;
}
else
{
fImage->img[x][y] = (uint8_t)sum;
}
sum = 0;
}
}
for(int x = 0; x < image->getHeight(); x++)
{
for(int y = 0; y < image->getWidth(); y++)
{
image->img[x][y] = fImage->img[x][y];
}
}
delete fImage;
}
This appears to be due to a math error somewhere in your code. To follow on my comment, this is what I get when I run your image through a Sobel operator here (edge strength is indicated by brightness of the output image):
I used a GLSL fragment shader to produce this:
precision mediump float;
varying vec2 textureCoordinate;
varying vec2 leftTextureCoordinate;
varying vec2 rightTextureCoordinate;
varying vec2 topTextureCoordinate;
varying vec2 topLeftTextureCoordinate;
varying vec2 topRightTextureCoordinate;
varying vec2 bottomTextureCoordinate;
varying vec2 bottomLeftTextureCoordinate;
varying vec2 bottomRightTextureCoordinate;
uniform sampler2D inputImageTexture;
void main()
{
float bottomLeftIntensity = texture2D(inputImageTexture, bottomLeftTextureCoordinate).r;
float topRightIntensity = texture2D(inputImageTexture, topRightTextureCoordinate).r;
float topLeftIntensity = texture2D(inputImageTexture, topLeftTextureCoordinate).r;
float bottomRightIntensity = texture2D(inputImageTexture, bottomRightTextureCoordinate).r;
float leftIntensity = texture2D(inputImageTexture, leftTextureCoordinate).r;
float rightIntensity = texture2D(inputImageTexture, rightTextureCoordinate).r;
float bottomIntensity = texture2D(inputImageTexture, bottomTextureCoordinate).r;
float topIntensity = texture2D(inputImageTexture, topTextureCoordinate).r;
float h = -topLeftIntensity - 2.0 * topIntensity - topRightIntensity + bottomLeftIntensity + 2.0 * bottomIntensity + bottomRightIntensity;
float v = -bottomLeftIntensity - 2.0 * leftIntensity - topLeftIntensity + bottomRightIntensity + 2.0 * rightIntensity + topRightIntensity;
float mag = length(vec2(h, v));
gl_FragColor = vec4(vec3(mag), 1.0);
You don't show your mask values, which I assume contain the Sobel kernel. In the above code, I've hardcoded the calculations performed against the red channel of each pixel in a 3x3 Sobel kernel. This is purely for performance on my platform.
One thing I don't notice in your code (again, I may be missing it like I did the sum being set back to 0) is the determination of the magnitude of the vector for the two portions of the Sobel operator. I'd expect to see a square root operation in there somewhere, if that was present.