Why is my Open GL Compute Shader so slow? - opengl

I have been building an OpenGL compute shader that implements ray tracing. Currently it just computes the pixel color by casting a ray against an array of triangles.
#version 430 core
struct Triangle {
vec3 vertex1;
vec3 vertex2;
vec3 vertex3;
vec3 color1;
vec3 color2;
vec3 color3;
vec3 normal1;
vec3 normal2;
vec3 normal3;
vec3 edge1;
vec3 edge2;
};
layout (std430, binding = 0) readonly buffer TriangleBuffer {
int numTriangles;
Triangle triangles[];
};
layout (std430, binding = 1, column_major) buffer CameraBuffer {
vec3 cameraPosition;
mat4 view;
mat4 projection;
mat4 inverseViewProjection;
};
layout (rgba8, binding = 2) writeonly uniform image2D outputImage;
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
vec3 getBarycentricCoords(int triangleIndex, vec3 closestIntersectionPoint) {
vec3 v0 = triangles[triangleIndex].vertex2 - triangles[triangleIndex].vertex1;
vec3 v1 = triangles[triangleIndex].vertex3 - triangles[triangleIndex].vertex1;
vec3 v2 = closestIntersectionPoint - triangles[triangleIndex].vertex1;
float d00 = dot(v0, v0);
float d01 = dot(v0, v1);
float d11 = dot(v1, v1);
float d20 = dot(v2, v0);
float d21 = dot(v2, v1);
float denom = d00 * d11 - d01 * d01;
float b1 = (d11 * d20 - d01 * d21) / denom;
float b2 = (d00 * d21 - d01 * d20) / denom;
float b0 = 1.0f - b1 - b2;
return vec3(b0, b1, b2);
}
vec3 getTriangleColor(int triangleIndex, vec3 closestIntersectionPoint) {
vec3 barycentric = getBarycentricCoords(triangleIndex, closestIntersectionPoint);
vec3 triangleColor = barycentric.x * triangles[triangleIndex].color1 + barycentric.y * triangles[triangleIndex].color2 + barycentric.z * triangles[triangleIndex].color3;
return triangleColor;
}
bool rayTriangleIntersection(vec3 rayOrigin, vec3 rayDirection, int triangleIndex, out vec3 intersectionPoint) {
vec3 h = cross(rayDirection, triangles[triangleIndex].edge2);
float a = dot(triangles[triangleIndex].edge1, h);
if (a > -0.00001 && a < 0.00001) {
return false;
}
float f = 1.0 / a;
vec3 s = rayOrigin - triangles[triangleIndex].vertex1;
float u = f * dot(s, h);
if (u < 0.0 || u > 1.0) {
return false;
}
vec3 q = cross(s, triangles[triangleIndex].edge1);
float v = f * dot(rayDirection, q);
if (v < 0.0 || u + v > 1.0) {
return false;
}
float t = f * dot(triangles[triangleIndex].edge2, q);
if (t > 0.00001) {
intersectionPoint = rayOrigin + rayDirection * t;
return true;
}
return false;
}
vec3 unProject(vec3 win, mat4 model, mat4 proj, vec4 viewport) {
vec4 tmp = vec4(win, 1);
tmp.x = (tmp.x - viewport[0]) / viewport[2];
tmp.y = (tmp.y - viewport[1]) / viewport[3];
tmp.x = tmp.x * 2 - 1;
tmp.y = tmp.y * 2 - 1;
vec4 obj = inverseViewProjection * tmp;
obj /= obj.w;
return obj.xyz;
}
void main() {
ivec2 pixelCoord = ivec2(gl_GlobalInvocationID.xy);
vec4 viewport = vec4(0, 0, vec2(imageSize(outputImage)).xy);
vec3 near = vec3(pixelCoord.x, pixelCoord.y, -1);
vec3 far = vec3(pixelCoord.x, pixelCoord.y, 0.9518f);
vec3 rayOrigin = unProject(near, view, projection, viewport);
vec3 rayWorldFar = unProject(far, view, projection, viewport);
vec3 rayDirection = normalize(rayWorldFar - rayOrigin);
vec3 intersectionPoint;
vec3 closestIntersectionPoint = vec3(0,0,0);
float closestIntersectionDistance = 999999999.0f;
vec3 finalColor = vec3(0,0,0);
bool intersectionFound = false;
for (int triangleIndex = 0; triangleIndex < numTriangles; triangleIndex++) {
if (rayTriangleIntersection(rayOrigin, rayDirection, triangleIndex, intersectionPoint)) {
float intersectionDistance = distance(intersectionPoint, rayOrigin);
if (intersectionDistance < closestIntersectionDistance) {
closestIntersectionDistance = intersectionDistance;
closestIntersectionPoint = intersectionPoint;
finalColor = getTriangleColor(triangleIndex, closestIntersectionPoint);
intersectionFound = true;
}
}
}
if (intersectionFound) {
imageStore(outputImage, pixelCoord, vec4(finalColor, 1.0f));
}
else
imageStore(outputImage, pixelCoord, vec4(0));
}
However when running the shader I only get 30fps. There is a significant bottleneck in the code. This is running with only 20 triangles.
What optimizations can I make to increase the performance of the code? Why is there a bottleneck?

I managed to more than double my framerate by making the following modifications:
Change layout to a higher value
for this I used GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS
GLint glMaxComputeWorkGroupInvocations = 0;
glGetIntegerv(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &glMaxComputeWorkGroupInvocations);
LIGHTING_SHADER_LOCAL_SIZE_Y = LIGHTING_SHADER_LOCAL_SIZE_X = sqrt(glMaxComputeWorkGroupInvocations);
and update the layout sizes:
layout (local_size_x = ${LIGHTING_SHADER_LOCAL_SIZE_X}, local_size_y = ${LIGHTING_SHADER_LOCAL_SIZE_Y}, local_size_z = 1) in;
Get pixelCoord based on group_id and local_id
ivec3 groupId = ivec3(gl_WorkGroupID);
ivec3 localId = ivec3(gl_LocalInvocationID);
ivec3 globalId = ivec3(gl_GlobalInvocationID);
ivec3 coords = groupId * ivec3(gl_WorkGroupSize) + localId;
ivec2 pixelCoord = ivec2(coords.xy);
Update glDispatchCompute
glDispatchCompute(windowWidth / LIGHTING_SHADER_LOCAL_SIZE_X, windowHeight / LIGHTING_SHADER_LOCAL_SIZE_Y, 1);

Related

Lattice Boltzman WebGL translation results in NaNs everywhere

I came across this implementation of Lattice Boltzmann fluid solver from this blog which goes over its implementation. I decided I wanted to translate this to ShaderToy with webgl. Instead of computing each step necessary for equilibrium, the limitations of shader toy force me to use multiple texture frames.
My algorithm is basically like this:
Calculate bulk velocity and density for each cell in texture A from textures B, C, and D which hold the 9 direction components (NW,N,NE,W in each vec4 in B, E,SW,S,SE in each vec4 in C, center in each vec4 in D).
After these components are calculated, recalculate stream/advection for each necessary cell for each texture frame, B,C,D, and take the density and velocity components from A to compute the equilibrium.
set the final values for each direction to new_direction - (new_direction - new_direction_equilibrium), ie(north_west - (north_west - north_west_eq))`. This isn't any different from the reference code.
Additionally I wrap coordinates around, so there are no boundary conditions in order to avoid dealing with boundary logic, and the user clicks to cause disturbances in the lattice, which sets the "not moving" direction value to a number.
In my code however, I end up getting lots of NaNs (colored here in white, red represents the density) and I'm not sure why. I put safe guards in locations in my code to avoid zero densities causing issues, but that didn't seem to do anything.
You can test this out on shadertoy, but my code is as follows:
//COMMON functions
const int DIRECTION_COUNT = 9;
const int DIMENSION_COUNT = 2;
const float LATTICE_SPEED = 0.1;
const float TAU = 0.9;
const vec2 north_offset = vec2(0.0,1.0);
const vec2 north_west_offset = vec2(-1.0,1.0);
const vec2 north_east_offset = vec2(1.0,1.0);
const vec2 west_offset = vec2(-1.0,0.0);
const vec2 east_offset = vec2(1.0,0.0);
const vec2 south_offset = vec2(0.0,-1.0);
const vec2 south_west_offset = vec2(-1.0,-1.0);
const vec2 south_east_offset = vec2(1.0,-1.0);
const vec2 center_offset = vec2(0.0,0.0);
const vec2 offsets[DIRECTION_COUNT] = vec2[DIRECTION_COUNT](
north_west_offset,
north_offset,
north_east_offset,
west_offset,
center_offset,
east_offset,
south_west_offset,
south_offset,
south_east_offset);
const int north_west_tex_idx = 0;
const int north_tex_idx = 1;
const int north_east_tex_idx = 2;
const int west_tex_idx = 3;
const int east_tex_idx = 0;
const int south_west_tex_idx = 1;
const int south_tex_idx = 2;
const int south_east_tex_idx = 3;
const int center_tex_idx = 0;
float textureN(sampler2D NW_N_NE_W_channel, vec2 coord, vec2 resolution){
vec2 offset_coord = coord + south_offset;
return texture(NW_N_NE_W_channel, offset_coord/resolution)[north_tex_idx];
}
float textureNW(sampler2D NW_N_NE_W_channel, vec2 coord, vec2 resolution){
vec2 offset_coord = coord + south_east_offset;
return texture(NW_N_NE_W_channel, offset_coord/resolution)[north_west_tex_idx];
}
float textureNE(sampler2D NW_N_NE_W_channel, vec2 coord, vec2 resolution){
vec2 offset_coord = coord + south_west_offset;
return texture(NW_N_NE_W_channel, offset_coord/resolution)[north_east_tex_idx];
}
float textureW(sampler2D NW_N_NE_W_channel, vec2 coord, vec2 resolution){
vec2 offset_coord = coord + east_offset;
return texture(NW_N_NE_W_channel, offset_coord/resolution)[west_tex_idx];
}
float textureS(sampler2D E_SW_S_SE_channel, vec2 coord, vec2 resolution){
vec2 offset_coord = coord + north_offset;
return texture(E_SW_S_SE_channel, offset_coord/resolution)[south_tex_idx];
}
float textureSW(sampler2D E_SW_S_SE_channel, vec2 coord, vec2 resolution){
vec2 offset_coord = coord + north_east_offset;
return texture(E_SW_S_SE_channel, offset_coord/resolution)[south_west_tex_idx];
}
float textureSE(sampler2D E_SW_S_SE_channel, vec2 coord, vec2 resolution){
vec2 offset_coord = coord + north_west_offset;
return texture(E_SW_S_SE_channel, offset_coord/resolution)[south_east_tex_idx];
}
float textureE(sampler2D E_SW_S_SE_channel, vec2 coord, vec2 resolution){
vec2 offset_coord = coord + west_offset;
return texture(E_SW_S_SE_channel, offset_coord/resolution)[east_tex_idx];
}
float textureC(sampler2D C_channel, vec2 coord, vec2 resolution){
vec2 offset_coord = coord + center_offset;
return texture(C_channel, offset_coord/resolution)[center_tex_idx];
}
float calc_equilibrium(const in float density,
const in vec2 velocity,
const in ivec2 ij) {
int i = ij.x;
int j = ij.y;
// u . u
float velmag = dot(velocity, velocity);
// Compute the weight.
float weight;
if(i == 0 && j == 0) {
weight = 4.0 / 9.0;
} else if(i == 0 || j == 0) {
weight = 1.0 / 9.0;
} else {
weight = 1.0 / 36.0;
}
// e_i . u
float dotprod = float(i) * velocity.x + float(j) * velocity.y;
float sum = 1.0;
sum += (3.0 / LATTICE_SPEED) * dotprod;
sum += (4.5 / (LATTICE_SPEED * LATTICE_SPEED)) * dotprod * dotprod;
sum -= (1.5 / (LATTICE_SPEED * LATTICE_SPEED)) * velmag;
if(density == 0.0){
return 0.0;
}
return weight * density * sum;
}
|
//Buffer A, takes in B, C, and D as in put in that order
float[DIRECTION_COUNT] stream_all(
sampler2D NW_N_NE_W_channel,
sampler2D E_SW_S_SE_channel,
sampler2D C_channel,
in vec2 ifragCoord){
float north_west = textureNW(NW_N_NE_W_channel, ifragCoord, iResolution.xy);
float north = textureN(NW_N_NE_W_channel, ifragCoord, iResolution.xy);
float north_east = textureNE(NW_N_NE_W_channel, ifragCoord, iResolution.xy);
float west = textureW(NW_N_NE_W_channel, ifragCoord, iResolution.xy);
float east = textureE(E_SW_S_SE_channel, ifragCoord, iResolution.xy);
float south_west = textureSW(E_SW_S_SE_channel, ifragCoord, iResolution.xy);
float south = textureS(E_SW_S_SE_channel, ifragCoord, iResolution.xy);
float south_east = textureSE(E_SW_S_SE_channel, ifragCoord, iResolution.xy);
float center = textureC(C_channel, ifragCoord, iResolution.xy);
return float[DIRECTION_COUNT](
north_west, north, north_east, west, center, east, south_west, south, south_east
);
}
float calc_density(const in float new_directions[DIRECTION_COUNT]) {
float density;
for(int i = 0; i < DIRECTION_COUNT; ++i){
density += new_directions[i];
}
return density;
}
vec2 calc_velocity(const in float new_directions[DIRECTION_COUNT], const in float density) {
if(density == 0.0){
return vec2(0.0);
}
if(isinf(density)){
return vec2(0.0);
}
// Compute target indices.
vec2 velocity = vec2(0.0);
for(int idx = 0; idx < DIRECTION_COUNT; ++idx){
vec2 ij = offsets[idx];
float i = ij.x;
float j = ij.y;
velocity.x += new_directions[idx] * (i);
velocity.y += new_directions[idx] * (j);
}
return velocity * (LATTICE_SPEED/density);
}
void mainImage( out vec4 fragColor, in vec2 fragCoord )
{
ivec2 ifragCoord = ivec2(fragCoord);
float new_directions[DIRECTION_COUNT] = stream_all(iChannel0, iChannel1, iChannel2, fragCoord);
float density = calc_density(new_directions);
vec2 velocity = calc_velocity(new_directions, density);
fragColor = vec4(density,velocity.x,velocity.y,0.0);
float center = textureC(iChannel2, fragCoord, iResolution.xy);
float debug = center;
if(isnan(density)){
debug = 1.0;
fragColor.w = debug;
}
//fragColor = vec4(1.0);
}
|
//Buffer B, takes in B, and A in that order
void mainImage( out vec4 fragColor, in vec2 fragCoord )
{
if(iFrame < 10){
fragColor = vec4(0.0);
return;
}
ivec2 ifragCoord = ivec2(fragCoord - 0.5);
float north_west = textureNW(iChannel0, fragCoord, iResolution.xy);
float north = textureN(iChannel0, fragCoord, iResolution.xy);
float north_east = textureNE(iChannel0, fragCoord, iResolution.xy);
float west = textureW(iChannel0, fragCoord, iResolution.xy);
vec4 density_velocity = texelFetch(iChannel1, ifragCoord, 0);
float density = density_velocity.x;
vec2 velocity = density_velocity.yz;
float north_west_eq = calc_equilibrium(density, velocity, ivec2(north_west_offset));
float north_eq = calc_equilibrium(density, velocity, ivec2(north_offset));
float north_east_eq = calc_equilibrium(density, velocity, ivec2(north_east_offset));
float west_eq = calc_equilibrium(density, velocity, ivec2(west_offset));
fragColor = vec4((north_west - (north_west - north_west_eq) / TAU),
(north - (north - north_eq) / TAU),
(north_east - (north_east - north_east_eq) / TAU),
(west - (west - west_eq) / TAU));
}
|
//Buffer C, takes in C and A in that order.
void mainImage( out vec4 fragColor, in vec2 fragCoord )
{
if(iFrame < 10){
fragColor = vec4(0.0);
return;
}
if(iFrame < 30 && fragCoord.y < -1.0){
fragColor = vec4(10.0, 0.0,10.0,0.0);
return;
}
ivec2 ifragCoord = ivec2(fragCoord - 0.5);
float east = textureE(iChannel0, fragCoord, iResolution.xy);
float south_west = textureSW(iChannel0, fragCoord, iResolution.xy);
float south = textureS(iChannel0, fragCoord, iResolution.xy);
float south_east = textureSE(iChannel0, fragCoord, iResolution.xy);
vec4 density_velocity = texelFetch(iChannel1, ifragCoord, 0);
float density = density_velocity.x;
vec2 velocity = density_velocity.yz;
float east_eq = calc_equilibrium(density, velocity, ivec2(east_offset));
float south_west_eq = calc_equilibrium(density, velocity, ivec2(south_west_offset));
float south_eq = calc_equilibrium(density, velocity, ivec2(south_offset));
float south_east_eq = calc_equilibrium(density, velocity, ivec2(south_east_offset));
fragColor = vec4((east - (east - east_eq) / TAU),
(south_west - (south_west - south_west_eq) / TAU),
(south - (south - south_eq) / TAU),
(south_east - (south_east - south_east_eq) / TAU));
}
|
//Buffer D takes in D and A in that order
void mainImage( out vec4 fragColor, in vec2 fragCoord )
{
if(iFrame < 10){
fragColor = vec4(1, 0.0,0.0,0.0);
return;
}
ivec2 ifragCoord = ivec2(fragCoord - 0.5);
float center = textureC(iChannel0, fragCoord, iResolution.xy);
vec4 density_velocity = texelFetch(iChannel1, ifragCoord, 0);
float density = density_velocity.x;
vec2 velocity = density_velocity.yz;
float center_eq = calc_equilibrium(density, velocity, ivec2(center_offset));
fragColor = vec4((center - (center - center_eq) / TAU),
0.0,
0.0,
0.0);
vec2 mouse = vec2(iMouse.zw);
if(mouse.x > 0.0 && mouse.y > 0.0){
vec2 current_mouse = vec2(iMouse.xy);
if(distance(fragCoord, current_mouse) < 3.0){
fragColor.r = vec4(10.0).r;
}
}
}
|
//main image output, only takes in A as an iChannel
void mainImage( out vec4 fragColor, in vec2 fragCoord )
{
ivec2 ifragCoord = ivec2(fragCoord-0.5);
vec4 density_velocity = texelFetch(iChannel0, ifragCoord, 0);
float density = density_velocity.r;
vec2 velocity = density_velocity.gb;
float vel_length = length(velocity);
velocity = normalize(velocity);
//Output to screen
//fragColor = vec4(abs(velocity),density/100.0,vel_length/100.0);
//fragColor = vec4(abs(velocity),0.0,1.0);
fragColor = vec4(density/10.0,0.0,0.0,1.0);
//
if(density_velocity.w == 1.0){
fragColor = vec4(1.0);
}
}
What have I done incorrectly to result in all of these Nans? Is there a way to stop them?
Clamping the return value from calc_equilibrium should avoid the white NaN blooms.
return clamp(weight * density * sum, -1000.0, 1000.0);
Preventing the red/black noise blooms does not appear to be so simple.
For every frame that occurs while the mouse button is held down, a lot of energy is being added to the system, and at some point it is bound to boil over.

Screen Space Reflections Artifacts

When I implemented SSR I encountered the problem of artifacts. Below I present the code and screenshots.
Fragment SSR shader:
#version 330 core
uniform sampler2D normalMap; // in view space
uniform sampler2D colorMap;
uniform sampler2D reflectionStrengthMap;
uniform sampler2D positionMap; // in view space
uniform mat4 projection;
uniform vec3 skyColor = vec3(0.1, 0, 0.5);
in vec2 texCoord;
layout (location = 0) out vec4 fragColor;
const int binarySearchCount = 10;
const int rayMarchCount = 30;
const float step = 0.05;
const float LLimiter = 0.2;
const float minRayStep = 0.2;
vec3 getPosition(in vec2 texCoord) {
return texture(positionMap, texCoord).xyz;
}
vec2 binarySearch(inout vec3 dir, inout vec3 hitCoord, inout float dDepth) {
float depth;
vec4 projectedCoord;
for(int i = 0; i < binarySearchCount; i++) {
projectedCoord = projection * vec4(hitCoord, 1.0);
projectedCoord.xy /= projectedCoord.w;
projectedCoord.xy = projectedCoord.xy * 0.5 + 0.5;
depth = getPosition(projectedCoord.xy).z;
dDepth = hitCoord.z - depth;
dir *= 0.5;
if(dDepth > 0.0)
hitCoord += dir;
else
hitCoord -= dir;
}
projectedCoord = projection * vec4(hitCoord, 1.0);
projectedCoord.xy /= projectedCoord.w;
projectedCoord.xy = projectedCoord.xy * 0.5 + 0.5;
return vec2(projectedCoord.xy);
}
vec2 rayCast(vec3 dir, inout vec3 hitCoord, out float dDepth) {
dir *= step;
for (int i = 0; i < rayMarchCount; i++) {
hitCoord += dir;
vec4 projectedCoord = projection * vec4(hitCoord, 1.0);
projectedCoord.xy /= projectedCoord.w;
projectedCoord.xy = projectedCoord.xy * 0.5 + 0.5;
float depth = getPosition(projectedCoord.xy).z;
dDepth = hitCoord.z - depth;
if((dir.z - dDepth) < 1.2 && dDepth <= 0.0) return binarySearch(dir, hitCoord, dDepth);
}
return vec2(-1.0);
}
void main() {
float reflectionStrength = texture(reflectionStrengthMap, texCoord).r;
if (reflectionStrength == 0) {
fragColor = texture(colorMap, texCoord);
return;
}
vec3 normal = texture(normalMap, texCoord).xyz;
vec3 viewPos = getPosition(texCoord);
// Reflection vector
vec3 reflected = normalize(reflect(normalize(viewPos), normalize(normal)));
// Ray cast
vec3 hitPos = viewPos;
float dDepth;
vec2 coords = rayCast(reflected * max(-viewPos.z, minRayStep), hitPos, dDepth);
float L = length(getPosition(coords) - viewPos);
L = clamp(L * LLimiter, 0, 1);
float error = 1 - L;
vec3 color = texture(colorMap, coords.xy).rgb * error;
if (coords.xy != vec2(-1.0)) {
fragColor = mix(texture(colorMap, texCoord), vec4(color, 1.0), reflectionStrength);
return;
}
fragColor = mix(texture(colorMap, texCoord), vec4(skyColor, 1.0), reflectionStrength);
}
Result without blackout (without * error):
Result with blackout:
Note: blue is filled specifically to see artifacts
And one more question, what is the best way to add fresnel without harming scene?

How to implement Screen Space Reflection with DDA

I am trying to implement screen space reflection with DDA.
http://casual-effects.blogspot.jp/2014/08/screen-space-ray-tracing.html
But, not working well.
Below is my shader codes.
This is vertex shader code.
layout(location = 0) in vec4 position;
layout(location = 1) in vec4 color_0;
layout(location = 2) in vec3 normal;
uniform mat4 mtxL2W; // Local to World space.
uniform mat4 mtxW2C; // World to Clip space.
out vec4 varColor;
out vec3 varNormal;
void main()
{
gl_Position = mtxW2C * mtxL2W * position;
varColor = color_0;
varNormal = normalize(mtxL2W * vec4(normal, 0)).xyz;
}
This is fragment shader code.
in vec4 varColor;
in vec3 varNormal;
layout(location = 0) out vec4 outColor;
uniform sampler2D s0; // color
uniform sampler2D s1; // linear depth.
uniform mat4 mtxW2V; // World to View(Camera) space.
uniform mat4 mtxV2C; // View(Camera) to Clip space.
uniform mat4 mtxC2V; // Clip to View(Camera) space.
uniform mat4 mtxV2W; // View(Camera) to World space.
uniform vec4 camPos; // Camera position (World space).
uniform float nearPlaneZ;
uniform float maxDistance;
uniform float zThickness;
uniform int maxSteps;
uniform float stride;
float squaredLength(vec2 a, vec2 b)
{
a -= b;
return dot(a, a);
}
bool intersectsDepthBuffer(float z, float minZ, float maxZ)
{
z += zThickness;
return (maxZ >= z) && (minZ - zThickness <= z);
}
bool traceScreenSpaceRay(
vec3 csOrig,
vec3 csDir,
out vec2 hitPixel,
out vec3 hitPoint)
{
// Clip to the near plane.
float rayLength = (csOrig.z + csDir.z * maxDistance) < nearPlaneZ
? (nearPlaneZ - csOrig.z) / csDir.z
: maxDistance;
vec3 csEndPoint = csOrig + csDir * rayLength;
// Project into homogeneous clip space.
vec4 H0 = mtxV2C * vec4(csOrig, 1);
vec4 H1 = mtxV2C * vec4(csEndPoint, 1);
float k0 = 1.0 / H0.w;
float k1 = 1.0 / H1.w;
// The interpolated homogeneous version of the camera-space points.
vec3 Q0 = csOrig * k0;
vec3 Q1 = csEndPoint * k1;
// Screen space point.
vec2 P0 = H0.xy * k0;
vec2 P1 = H1.xy * k1;
// [-1, 1] -> [0, 1]
P0 = P0 * 0.5 + 0.5;
P1 = P1 * 0.5 + 0.5;
ivec2 texsize = textureSize(s0, 0);
P0 *= vec2(texsize.xy);
P1 *= vec2(texsize.xy);
P1.x = min(max(P1.x, 0), texsize.x);
P1.y = min(max(P1.y, 0), texsize.y);
// If the line is degenerate, make it cover at least one pixel to avoid handling zero-pixel extent as a special case later.
P1 += squaredLength(P0, P1) < 0.0001
? vec2(0.01, 0.01)
: vec2(0.0);
vec2 delta = P1 - P0;
// Permute so that the primary iteration is in x to collapse all quadrant-specific DDA cases later.
bool permute = false;
if (abs(delta.x) < abs(delta.y))
{
permute = true;
delta = delta.yx;
P0 = P0.yx;
P1 = P1.yx;
}
float stepDir = sign(delta.x);
float invdx = stepDir / delta.x;
// Track the derivatives of Q and k.
vec3 dQ = (Q1 - Q0) / invdx;
float dk = (k1 - k0) / invdx;
// y is slope.
// slope = (y1 - y0) / (x1 - x0)
vec2 dP = vec2(stepDir, delta.y / invdx);
// Adjust end condition for iteration direction
float end = P1.x * stepDir;
int stepCount = 0;
float prevZMaxEstimate = csOrig.z;
float rayZMin = prevZMaxEstimate;
float rayZMax = prevZMaxEstimate;
float sceneZMax = rayZMax + 100.0f;
dP *= stride;
dQ *= stride;
dk *= stride;
vec4 PQk = vec4(P0, Q0.z, k0);
vec4 dPQk = vec4(dP, dQ.z, dk);
vec3 Q = Q0;
for (;
((PQk.x * stepDir) <= end)
&& (stepCount < maxSteps)
&& !intersectsDepthBuffer(sceneZMax, rayZMin, rayZMax)
&& (sceneZMax != 0.0);
++stepCount)
{
rayZMin = prevZMaxEstimate;
rayZMax = (PQk.z + dPQk.z * 0.5) / (PQk.w + dPQk.w * 0.5);
prevZMaxEstimate = rayZMax;
if (rayZMin > rayZMax) {
float tmp = rayZMin;
rayZMin = rayZMax;
rayZMax = tmp;
}
hitPixel = permute ? PQk.yx : PQk.xy;
//hitPixel.y = texsize.y - hitPixel.y;
sceneZMax = texelFetch(s1, ivec2(hitPixel), 0).r;
PQk += dPQk;
}
// Advance Q based on the number of steps
Q.xy += dQ.xy * stepCount;
hitPoint = Q * (1.0f / PQk.w);
hitPoint = vec3(sceneZMax, rayZMin, rayZMax);
return intersectsDepthBuffer(sceneZMax, rayZMin, rayZMax);
}
void main()
{
vec3 normal = normalize(varNormal);
float linearDepth = texelFetch(s1, ivec2(gl_FragCoord.xy), 0).r;
ivec2 texsize = textureSize(s0, 0);
// Ray origin is camera origin.
vec3 rayOrg = camPos.xyz;
// Screen coordinate.
vec4 pos = vec4(gl_FragCoord.xy / texsize, 0, 1);
// [0, 1] -> [-1, 1]
pos.xy = pos.xy * 2.0 - 1.0;
// Screen-space -> Clip-space
pos.xy *= linearDepth;
// Clip-space -> View-space
pos = mtxC2V * pos;
pos.z = linearDepth;
// View-space -> World-space.
vec3 worldPos = (mtxV2W * vec4(pos.xyz, 1)).xyz;
// Compute ray direction.
// From ray origin to world position.
vec3 rayDir = normalize(worldPos - rayOrg);
// Compute reflection vector.
vec3 refDir = reflect(rayDir, normal);
// Reflection vector origin is world position.
vec3 refOrg = worldPos;
// Transform to view coordinate.
refOrg = (mtxW2V * vec4(refOrg, 1)).xyz;
refDir = (mtxW2V * vec4(refDir, 0)).xyz;
vec2 hitPixel = vec2(0, 0);
vec3 hitPoint = vec3(0, 0, 0);
// Trace screen space ray.
bool isIntersect = traceScreenSpaceRay(refOrg, refDir, hitPixel, hitPoint);
vec2 uv = hitPixel / texsize.xy;
if (uv.x > 1.0 || uv.x < 0.0f || uv.y > 1.0 || uv.y < 0.0) {
isIntersect = false;
}
if (isIntersect) {
outColor = varColor * texture(s0, uv);
}
else {
outColor = vec4(1, 1, 1, 1);
}
}
I think Q0.z and Q1.z are always 1.0.
So, I think dQ.z is also always 0.0.
And, dk is always minus value.
What is wrong?

How to generate OBJ mesh file if I used GLSL

I want to generate a OBJ file from a code, which using GLSL file to generate mesh, now I can get the vertex information from the code, but how can I extract the triangle information from the .geom.glsl file and export it into a OBJ file?
Also, is there any helper function do to so? if not, how should I write the code to get the points and triangle information from the geom.glsl file?
Here attached the geom.glsl:
#version 400 core
#extension GL_EXT_geometry_shader4 : enable
layout(lines, invocations = 1) in;
layout(triangle_strip, max_vertices = 100) out;
uniform mat4 matLightView;
uniform mat4 matViewProjection;
uniform vec3 lightPos;
uniform vec3 camPos;
uniform int isExplicit;
in vec4 VertPosition[];
in vec4 VertColor[];
in vec3 VertNormal[];
in vec3 VertTexture[];
in float VertLengthTotal[];
in float VertLengthFromBeginning[];
out vec3 GeomNormal;
out vec2 GeomTexCoords;
out float GeomDiffuse;
out float GeomThickness;
out vec4 texCoordA;
out vec4 texCoordB;
const float PI2 = 2 * 3.141592654;
void main()
{
// for(int i=0; i<gl_VerticesIn-1; ++i)
for (int i = 0; i<gl_in.length ()-1; ++i)
{
//Reading Data
vec4 posS = VertPosition[i];
vec4 posT = VertPosition[i+1];
vec3 vS = VertColor[i].xyz;
vec3 vT = VertColor[i+1].xyz;
vec3 tS = VertTexture[i].xyz;
vec3 tT = VertTexture[i+1].xyz;
float thickS = VertColor[i].w;
float thickT = VertColor[i+1].w;
//Computing
vec3 v11 = normalize(vS);
vec3 v12 = normalize(cross(vS, tS));
vec3 v21 = normalize(vT);
vec3 v22 = normalize(cross(vT, tT));
float rS = max(0.0001, thickS);
float rT = max(0.0001, thickT);
int pS = 10;
int pT = 10;
int forMax = 16;
//Light Pos
vec4 lPos = normalize(vec4(-lightPos.x, -lightPos.y, -lightPos.z, 1));
vec3 L = normalize(lPos.xyz);
for(int k=0; k<=forMax; ++k)
{
float angle = k * (PI2 / forMax);
vec3 newPS = posS.xyz + (v11 * sin(angle) + v12 * cos(angle)) * rS;
vec3 newPT = posT.xyz + (v21 * sin(angle) + v22 * cos(angle)) * rT;
float scale = 1.0f;
float texX = float(k) / float(forMax);
float edgeLength = length(posS - posT);
float sTexY = (VertLengthFromBeginning[i] * scale);
float tTexY = (VertLengthFromBeginning[i+1] * scale);
//Source Vertex
vec3 N = normalize(posS.xyz - newPS);
texCoordB = matLightView * vec4(newPS, 1);
GeomNormal = N;
GeomThickness = rS;
GeomDiffuse = rS < 0.0005 ? 0.0f : max(dot(N, L), 0.0);
GeomTexCoords = vec2(texX, sTexY);
gl_Position = matViewProjection * vec4(newPS, 1);
EmitVertex();
//Target Vertex
N = normalize(posT.xyz - newPT);
texCoordB = matLightView * vec4(newPT, 1);
GeomNormal = N;
GeomThickness = rT;
GeomDiffuse = rT < 0.0005 ? 0.0f : max(dot(N, L), 0.0);
GeomTexCoords = vec2(texX, tTexY);
gl_Position = matViewProjection * vec4(newPT, 1);
EmitVertex();
}
}
EndPrimitive();
}
And the vert.glsl:
#version 400 core
#define VERT_POSITION 0
#define VERT_NORMAL 1
#define VERT_COLOR 2
#define VERT_TEXTURE 3
layout(location = VERT_POSITION) in vec4 Position;
layout(location = VERT_NORMAL) in vec4 Normal;
layout(location = VERT_COLOR) in vec4 Color;
layout(location = VERT_TEXTURE) in vec4 Texture;
out vec4 VertPosition;
out vec3 VertNormal;
out vec3 VertTexture;
out vec4 VertColor;
out float VertLengthFromBeginning;
out float VertLengthTotal;
uniform mat4 matModel;
void main()
{
VertPosition = matModel * Position;
VertNormal = Normal.xyz; // Direction
VertColor = Color; // V from PTF, VertColor.w = thick
VertTexture = Texture.xyz; // Tangent
VertLengthFromBeginning = Normal.w; // Global Texture Coordinates
VertLengthTotal = Texture.w; // total length of chain
}
Lots of Thanks!!

LWJGL Objects not shown on ATI graphics card

I've written an LWJGL application that uses .obj files, reads them and displays them (using displaylists).
On my nvidia graphics card, everything runs fine. But on an amd graphics card i can't see the objects.
How i give data to the shaders:
glUseProgram(shaderEngine.obj);
glUniform1i(glGetUniformLocation(shaderEngine.obj, "inOrangeJuice"), inOrangeJuice ? 1 : 0);
shaderEngine.loadMatrix(glGetUniformLocation(shaderEngine.standard, "projectionMatrix"), camera.projectionMatrix);
shaderEngine.loadMatrix(glGetUniformLocation(shaderEngine.obj, "viewMatrix"), camera.viewMatrix);
ModelMatrix is loaded:
shaderEngine.createModelMatrix(new Vector3f(x, y, z), new Vector3f(rx, ry, rz), new Vector3f(1, 1, 1));
shaderEngine.loadModelMatrix(shaderEngine.obj);
Fragment Shader:
#version 130
uniform sampler2D tex;
uniform vec2 texCoord[4];
float textureSize;
float texelSize;
uniform int inOrangeJuice;
bool pointInTriangle(vec3 P, vec3 A, vec3 B, vec3 C)
{
vec3 u = B - A;
vec3 v = C - A;
vec3 w = P - A;
vec3 vCrossW = cross(v, w);
vec3 vCrossU = cross(v, u);
if(dot(vCrossW, vCrossU) < 0)
{
return false;
}
vec3 uCrossW = cross(u, w);
vec3 uCrossV = cross(u, v);
if(dot(uCrossW, uCrossV) < 0)
{
return false;
}
float denom = length(uCrossV);
float r = length(vCrossW);
float t = length(uCrossW);
return (r + t <= 1);
}
vec4 texture2DBilinear(sampler2D textureSampler, vec2 uv)
{
vec4 tl = texture2D(textureSampler, uv);
vec4 tr = texture2D(textureSampler, uv + vec2(texelSize, 0));
vec4 bl = texture2D(textureSampler, uv + vec2(0, texelSize));
vec4 br = texture2D(textureSampler, uv + vec2(texelSize , texelSize));
vec2 f = fract( uv.xy * textureSize );
vec4 tA = mix( tl, tr, f.x );
vec4 tB = mix( bl, br, f.x );
return mix( tA, tB, f.y );
}
void main()
{
ivec2 textureSize2d = textureSize(tex,0);
textureSize = float(textureSize2d.x);
texelSize = 1.0 / textureSize;
//texture coordinate:
vec2 texCoord = (gl_TexCoord[0].st);
bool inOJ = false;
if(inOrangeJuice == 1)
{
float depth = gl_FragCoord.z / gl_FragCoord.w;//works only with perspective projection
depth = depth / 6;
if(depth > 1)
{
depth = 1;
}
inOJ = true;
gl_FragColor = texture2DBilinear(tex, texCoord) * gl_Color * (1.0 - depth) + vec4(1.0, 0.5, 0.0, 1.0) * depth;
}
if(inOJ == false)
{
gl_FragColor = texture2DBilinear(tex, texCoord) * gl_Color;
}
//Nothing is shown, inOrangeJuice should be 0
//gl_FragColor = vec4(inOrangeJuice,0,0,1);
//Always works:
//gl_FragColor = texture2D(tex, texCoord) * gl_Color;
}