Lattice Boltzman WebGL translation results in NaNs everywhere - glsl

I came across this implementation of Lattice Boltzmann fluid solver from this blog which goes over its implementation. I decided I wanted to translate this to ShaderToy with webgl. Instead of computing each step necessary for equilibrium, the limitations of shader toy force me to use multiple texture frames.
My algorithm is basically like this:
Calculate bulk velocity and density for each cell in texture A from textures B, C, and D which hold the 9 direction components (NW,N,NE,W in each vec4 in B, E,SW,S,SE in each vec4 in C, center in each vec4 in D).
After these components are calculated, recalculate stream/advection for each necessary cell for each texture frame, B,C,D, and take the density and velocity components from A to compute the equilibrium.
set the final values for each direction to new_direction - (new_direction - new_direction_equilibrium), ie(north_west - (north_west - north_west_eq))`. This isn't any different from the reference code.
Additionally I wrap coordinates around, so there are no boundary conditions in order to avoid dealing with boundary logic, and the user clicks to cause disturbances in the lattice, which sets the "not moving" direction value to a number.
In my code however, I end up getting lots of NaNs (colored here in white, red represents the density) and I'm not sure why. I put safe guards in locations in my code to avoid zero densities causing issues, but that didn't seem to do anything.
You can test this out on shadertoy, but my code is as follows:
//COMMON functions
const int DIRECTION_COUNT = 9;
const int DIMENSION_COUNT = 2;
const float LATTICE_SPEED = 0.1;
const float TAU = 0.9;
const vec2 north_offset = vec2(0.0,1.0);
const vec2 north_west_offset = vec2(-1.0,1.0);
const vec2 north_east_offset = vec2(1.0,1.0);
const vec2 west_offset = vec2(-1.0,0.0);
const vec2 east_offset = vec2(1.0,0.0);
const vec2 south_offset = vec2(0.0,-1.0);
const vec2 south_west_offset = vec2(-1.0,-1.0);
const vec2 south_east_offset = vec2(1.0,-1.0);
const vec2 center_offset = vec2(0.0,0.0);
const vec2 offsets[DIRECTION_COUNT] = vec2[DIRECTION_COUNT](
north_west_offset,
north_offset,
north_east_offset,
west_offset,
center_offset,
east_offset,
south_west_offset,
south_offset,
south_east_offset);
const int north_west_tex_idx = 0;
const int north_tex_idx = 1;
const int north_east_tex_idx = 2;
const int west_tex_idx = 3;
const int east_tex_idx = 0;
const int south_west_tex_idx = 1;
const int south_tex_idx = 2;
const int south_east_tex_idx = 3;
const int center_tex_idx = 0;
float textureN(sampler2D NW_N_NE_W_channel, vec2 coord, vec2 resolution){
vec2 offset_coord = coord + south_offset;
return texture(NW_N_NE_W_channel, offset_coord/resolution)[north_tex_idx];
}
float textureNW(sampler2D NW_N_NE_W_channel, vec2 coord, vec2 resolution){
vec2 offset_coord = coord + south_east_offset;
return texture(NW_N_NE_W_channel, offset_coord/resolution)[north_west_tex_idx];
}
float textureNE(sampler2D NW_N_NE_W_channel, vec2 coord, vec2 resolution){
vec2 offset_coord = coord + south_west_offset;
return texture(NW_N_NE_W_channel, offset_coord/resolution)[north_east_tex_idx];
}
float textureW(sampler2D NW_N_NE_W_channel, vec2 coord, vec2 resolution){
vec2 offset_coord = coord + east_offset;
return texture(NW_N_NE_W_channel, offset_coord/resolution)[west_tex_idx];
}
float textureS(sampler2D E_SW_S_SE_channel, vec2 coord, vec2 resolution){
vec2 offset_coord = coord + north_offset;
return texture(E_SW_S_SE_channel, offset_coord/resolution)[south_tex_idx];
}
float textureSW(sampler2D E_SW_S_SE_channel, vec2 coord, vec2 resolution){
vec2 offset_coord = coord + north_east_offset;
return texture(E_SW_S_SE_channel, offset_coord/resolution)[south_west_tex_idx];
}
float textureSE(sampler2D E_SW_S_SE_channel, vec2 coord, vec2 resolution){
vec2 offset_coord = coord + north_west_offset;
return texture(E_SW_S_SE_channel, offset_coord/resolution)[south_east_tex_idx];
}
float textureE(sampler2D E_SW_S_SE_channel, vec2 coord, vec2 resolution){
vec2 offset_coord = coord + west_offset;
return texture(E_SW_S_SE_channel, offset_coord/resolution)[east_tex_idx];
}
float textureC(sampler2D C_channel, vec2 coord, vec2 resolution){
vec2 offset_coord = coord + center_offset;
return texture(C_channel, offset_coord/resolution)[center_tex_idx];
}
float calc_equilibrium(const in float density,
const in vec2 velocity,
const in ivec2 ij) {
int i = ij.x;
int j = ij.y;
// u . u
float velmag = dot(velocity, velocity);
// Compute the weight.
float weight;
if(i == 0 && j == 0) {
weight = 4.0 / 9.0;
} else if(i == 0 || j == 0) {
weight = 1.0 / 9.0;
} else {
weight = 1.0 / 36.0;
}
// e_i . u
float dotprod = float(i) * velocity.x + float(j) * velocity.y;
float sum = 1.0;
sum += (3.0 / LATTICE_SPEED) * dotprod;
sum += (4.5 / (LATTICE_SPEED * LATTICE_SPEED)) * dotprod * dotprod;
sum -= (1.5 / (LATTICE_SPEED * LATTICE_SPEED)) * velmag;
if(density == 0.0){
return 0.0;
}
return weight * density * sum;
}
|
//Buffer A, takes in B, C, and D as in put in that order
float[DIRECTION_COUNT] stream_all(
sampler2D NW_N_NE_W_channel,
sampler2D E_SW_S_SE_channel,
sampler2D C_channel,
in vec2 ifragCoord){
float north_west = textureNW(NW_N_NE_W_channel, ifragCoord, iResolution.xy);
float north = textureN(NW_N_NE_W_channel, ifragCoord, iResolution.xy);
float north_east = textureNE(NW_N_NE_W_channel, ifragCoord, iResolution.xy);
float west = textureW(NW_N_NE_W_channel, ifragCoord, iResolution.xy);
float east = textureE(E_SW_S_SE_channel, ifragCoord, iResolution.xy);
float south_west = textureSW(E_SW_S_SE_channel, ifragCoord, iResolution.xy);
float south = textureS(E_SW_S_SE_channel, ifragCoord, iResolution.xy);
float south_east = textureSE(E_SW_S_SE_channel, ifragCoord, iResolution.xy);
float center = textureC(C_channel, ifragCoord, iResolution.xy);
return float[DIRECTION_COUNT](
north_west, north, north_east, west, center, east, south_west, south, south_east
);
}
float calc_density(const in float new_directions[DIRECTION_COUNT]) {
float density;
for(int i = 0; i < DIRECTION_COUNT; ++i){
density += new_directions[i];
}
return density;
}
vec2 calc_velocity(const in float new_directions[DIRECTION_COUNT], const in float density) {
if(density == 0.0){
return vec2(0.0);
}
if(isinf(density)){
return vec2(0.0);
}
// Compute target indices.
vec2 velocity = vec2(0.0);
for(int idx = 0; idx < DIRECTION_COUNT; ++idx){
vec2 ij = offsets[idx];
float i = ij.x;
float j = ij.y;
velocity.x += new_directions[idx] * (i);
velocity.y += new_directions[idx] * (j);
}
return velocity * (LATTICE_SPEED/density);
}
void mainImage( out vec4 fragColor, in vec2 fragCoord )
{
ivec2 ifragCoord = ivec2(fragCoord);
float new_directions[DIRECTION_COUNT] = stream_all(iChannel0, iChannel1, iChannel2, fragCoord);
float density = calc_density(new_directions);
vec2 velocity = calc_velocity(new_directions, density);
fragColor = vec4(density,velocity.x,velocity.y,0.0);
float center = textureC(iChannel2, fragCoord, iResolution.xy);
float debug = center;
if(isnan(density)){
debug = 1.0;
fragColor.w = debug;
}
//fragColor = vec4(1.0);
}
|
//Buffer B, takes in B, and A in that order
void mainImage( out vec4 fragColor, in vec2 fragCoord )
{
if(iFrame < 10){
fragColor = vec4(0.0);
return;
}
ivec2 ifragCoord = ivec2(fragCoord - 0.5);
float north_west = textureNW(iChannel0, fragCoord, iResolution.xy);
float north = textureN(iChannel0, fragCoord, iResolution.xy);
float north_east = textureNE(iChannel0, fragCoord, iResolution.xy);
float west = textureW(iChannel0, fragCoord, iResolution.xy);
vec4 density_velocity = texelFetch(iChannel1, ifragCoord, 0);
float density = density_velocity.x;
vec2 velocity = density_velocity.yz;
float north_west_eq = calc_equilibrium(density, velocity, ivec2(north_west_offset));
float north_eq = calc_equilibrium(density, velocity, ivec2(north_offset));
float north_east_eq = calc_equilibrium(density, velocity, ivec2(north_east_offset));
float west_eq = calc_equilibrium(density, velocity, ivec2(west_offset));
fragColor = vec4((north_west - (north_west - north_west_eq) / TAU),
(north - (north - north_eq) / TAU),
(north_east - (north_east - north_east_eq) / TAU),
(west - (west - west_eq) / TAU));
}
|
//Buffer C, takes in C and A in that order.
void mainImage( out vec4 fragColor, in vec2 fragCoord )
{
if(iFrame < 10){
fragColor = vec4(0.0);
return;
}
if(iFrame < 30 && fragCoord.y < -1.0){
fragColor = vec4(10.0, 0.0,10.0,0.0);
return;
}
ivec2 ifragCoord = ivec2(fragCoord - 0.5);
float east = textureE(iChannel0, fragCoord, iResolution.xy);
float south_west = textureSW(iChannel0, fragCoord, iResolution.xy);
float south = textureS(iChannel0, fragCoord, iResolution.xy);
float south_east = textureSE(iChannel0, fragCoord, iResolution.xy);
vec4 density_velocity = texelFetch(iChannel1, ifragCoord, 0);
float density = density_velocity.x;
vec2 velocity = density_velocity.yz;
float east_eq = calc_equilibrium(density, velocity, ivec2(east_offset));
float south_west_eq = calc_equilibrium(density, velocity, ivec2(south_west_offset));
float south_eq = calc_equilibrium(density, velocity, ivec2(south_offset));
float south_east_eq = calc_equilibrium(density, velocity, ivec2(south_east_offset));
fragColor = vec4((east - (east - east_eq) / TAU),
(south_west - (south_west - south_west_eq) / TAU),
(south - (south - south_eq) / TAU),
(south_east - (south_east - south_east_eq) / TAU));
}
|
//Buffer D takes in D and A in that order
void mainImage( out vec4 fragColor, in vec2 fragCoord )
{
if(iFrame < 10){
fragColor = vec4(1, 0.0,0.0,0.0);
return;
}
ivec2 ifragCoord = ivec2(fragCoord - 0.5);
float center = textureC(iChannel0, fragCoord, iResolution.xy);
vec4 density_velocity = texelFetch(iChannel1, ifragCoord, 0);
float density = density_velocity.x;
vec2 velocity = density_velocity.yz;
float center_eq = calc_equilibrium(density, velocity, ivec2(center_offset));
fragColor = vec4((center - (center - center_eq) / TAU),
0.0,
0.0,
0.0);
vec2 mouse = vec2(iMouse.zw);
if(mouse.x > 0.0 && mouse.y > 0.0){
vec2 current_mouse = vec2(iMouse.xy);
if(distance(fragCoord, current_mouse) < 3.0){
fragColor.r = vec4(10.0).r;
}
}
}
|
//main image output, only takes in A as an iChannel
void mainImage( out vec4 fragColor, in vec2 fragCoord )
{
ivec2 ifragCoord = ivec2(fragCoord-0.5);
vec4 density_velocity = texelFetch(iChannel0, ifragCoord, 0);
float density = density_velocity.r;
vec2 velocity = density_velocity.gb;
float vel_length = length(velocity);
velocity = normalize(velocity);
//Output to screen
//fragColor = vec4(abs(velocity),density/100.0,vel_length/100.0);
//fragColor = vec4(abs(velocity),0.0,1.0);
fragColor = vec4(density/10.0,0.0,0.0,1.0);
//
if(density_velocity.w == 1.0){
fragColor = vec4(1.0);
}
}
What have I done incorrectly to result in all of these Nans? Is there a way to stop them?

Clamping the return value from calc_equilibrium should avoid the white NaN blooms.
return clamp(weight * density * sum, -1000.0, 1000.0);
Preventing the red/black noise blooms does not appear to be so simple.
For every frame that occurs while the mouse button is held down, a lot of energy is being added to the system, and at some point it is bound to boil over.

Related

Why is my Open GL Compute Shader so slow?

I have been building an OpenGL compute shader that implements ray tracing. Currently it just computes the pixel color by casting a ray against an array of triangles.
#version 430 core
struct Triangle {
vec3 vertex1;
vec3 vertex2;
vec3 vertex3;
vec3 color1;
vec3 color2;
vec3 color3;
vec3 normal1;
vec3 normal2;
vec3 normal3;
vec3 edge1;
vec3 edge2;
};
layout (std430, binding = 0) readonly buffer TriangleBuffer {
int numTriangles;
Triangle triangles[];
};
layout (std430, binding = 1, column_major) buffer CameraBuffer {
vec3 cameraPosition;
mat4 view;
mat4 projection;
mat4 inverseViewProjection;
};
layout (rgba8, binding = 2) writeonly uniform image2D outputImage;
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
vec3 getBarycentricCoords(int triangleIndex, vec3 closestIntersectionPoint) {
vec3 v0 = triangles[triangleIndex].vertex2 - triangles[triangleIndex].vertex1;
vec3 v1 = triangles[triangleIndex].vertex3 - triangles[triangleIndex].vertex1;
vec3 v2 = closestIntersectionPoint - triangles[triangleIndex].vertex1;
float d00 = dot(v0, v0);
float d01 = dot(v0, v1);
float d11 = dot(v1, v1);
float d20 = dot(v2, v0);
float d21 = dot(v2, v1);
float denom = d00 * d11 - d01 * d01;
float b1 = (d11 * d20 - d01 * d21) / denom;
float b2 = (d00 * d21 - d01 * d20) / denom;
float b0 = 1.0f - b1 - b2;
return vec3(b0, b1, b2);
}
vec3 getTriangleColor(int triangleIndex, vec3 closestIntersectionPoint) {
vec3 barycentric = getBarycentricCoords(triangleIndex, closestIntersectionPoint);
vec3 triangleColor = barycentric.x * triangles[triangleIndex].color1 + barycentric.y * triangles[triangleIndex].color2 + barycentric.z * triangles[triangleIndex].color3;
return triangleColor;
}
bool rayTriangleIntersection(vec3 rayOrigin, vec3 rayDirection, int triangleIndex, out vec3 intersectionPoint) {
vec3 h = cross(rayDirection, triangles[triangleIndex].edge2);
float a = dot(triangles[triangleIndex].edge1, h);
if (a > -0.00001 && a < 0.00001) {
return false;
}
float f = 1.0 / a;
vec3 s = rayOrigin - triangles[triangleIndex].vertex1;
float u = f * dot(s, h);
if (u < 0.0 || u > 1.0) {
return false;
}
vec3 q = cross(s, triangles[triangleIndex].edge1);
float v = f * dot(rayDirection, q);
if (v < 0.0 || u + v > 1.0) {
return false;
}
float t = f * dot(triangles[triangleIndex].edge2, q);
if (t > 0.00001) {
intersectionPoint = rayOrigin + rayDirection * t;
return true;
}
return false;
}
vec3 unProject(vec3 win, mat4 model, mat4 proj, vec4 viewport) {
vec4 tmp = vec4(win, 1);
tmp.x = (tmp.x - viewport[0]) / viewport[2];
tmp.y = (tmp.y - viewport[1]) / viewport[3];
tmp.x = tmp.x * 2 - 1;
tmp.y = tmp.y * 2 - 1;
vec4 obj = inverseViewProjection * tmp;
obj /= obj.w;
return obj.xyz;
}
void main() {
ivec2 pixelCoord = ivec2(gl_GlobalInvocationID.xy);
vec4 viewport = vec4(0, 0, vec2(imageSize(outputImage)).xy);
vec3 near = vec3(pixelCoord.x, pixelCoord.y, -1);
vec3 far = vec3(pixelCoord.x, pixelCoord.y, 0.9518f);
vec3 rayOrigin = unProject(near, view, projection, viewport);
vec3 rayWorldFar = unProject(far, view, projection, viewport);
vec3 rayDirection = normalize(rayWorldFar - rayOrigin);
vec3 intersectionPoint;
vec3 closestIntersectionPoint = vec3(0,0,0);
float closestIntersectionDistance = 999999999.0f;
vec3 finalColor = vec3(0,0,0);
bool intersectionFound = false;
for (int triangleIndex = 0; triangleIndex < numTriangles; triangleIndex++) {
if (rayTriangleIntersection(rayOrigin, rayDirection, triangleIndex, intersectionPoint)) {
float intersectionDistance = distance(intersectionPoint, rayOrigin);
if (intersectionDistance < closestIntersectionDistance) {
closestIntersectionDistance = intersectionDistance;
closestIntersectionPoint = intersectionPoint;
finalColor = getTriangleColor(triangleIndex, closestIntersectionPoint);
intersectionFound = true;
}
}
}
if (intersectionFound) {
imageStore(outputImage, pixelCoord, vec4(finalColor, 1.0f));
}
else
imageStore(outputImage, pixelCoord, vec4(0));
}
However when running the shader I only get 30fps. There is a significant bottleneck in the code. This is running with only 20 triangles.
What optimizations can I make to increase the performance of the code? Why is there a bottleneck?
I managed to more than double my framerate by making the following modifications:
Change layout to a higher value
for this I used GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS
GLint glMaxComputeWorkGroupInvocations = 0;
glGetIntegerv(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &glMaxComputeWorkGroupInvocations);
LIGHTING_SHADER_LOCAL_SIZE_Y = LIGHTING_SHADER_LOCAL_SIZE_X = sqrt(glMaxComputeWorkGroupInvocations);
and update the layout sizes:
layout (local_size_x = ${LIGHTING_SHADER_LOCAL_SIZE_X}, local_size_y = ${LIGHTING_SHADER_LOCAL_SIZE_Y}, local_size_z = 1) in;
Get pixelCoord based on group_id and local_id
ivec3 groupId = ivec3(gl_WorkGroupID);
ivec3 localId = ivec3(gl_LocalInvocationID);
ivec3 globalId = ivec3(gl_GlobalInvocationID);
ivec3 coords = groupId * ivec3(gl_WorkGroupSize) + localId;
ivec2 pixelCoord = ivec2(coords.xy);
Update glDispatchCompute
glDispatchCompute(windowWidth / LIGHTING_SHADER_LOCAL_SIZE_X, windowHeight / LIGHTING_SHADER_LOCAL_SIZE_Y, 1);

Compute Normals After Vertex Deformation?

I am coding a vertex and a fragment shader trying to distort the surface of some water and then computing blinn-phong lighting on the surface. I am able to successfully compute the deformed matrices with a simple noise function, but how can I find the distorted normals? Since it isn't a linear transformation I am stuck, could anyone help?
Here are the relevant files:
vertex shader:
#version 150
uniform mat4 u_Model;
uniform mat4 u_ModelInvTr;
uniform mat4 u_ViewProj;
uniform vec4 u_Color;
uniform int u_Time;
in vec4 vs_Pos; // The array of vertex positions passed to the shader
in vec4 vs_Nor; // The array of vertex normals passed to the shader
in vec4 vs_Col; // The array of vertex colors passed to the shader.
in vec2 vs_UV; // UV coords for texture to pass thru to fragment shader
in float vs_Anim; // 0.f or 1.f To pass thru to fragment shader
in float vs_T2O;
out vec4 fs_Pos;
out vec4 fs_Nor;
out vec4 fs_LightVec;
out vec4 fs_Col;
out vec2 fs_UVs;
out float fs_Anim;
out float fs_dimVal;
out float fs_T2O;
uniform vec4 u_CamPos;
out vec4 fs_CamPos;
const vec4 lightDir = normalize(vec4(0.0, 1.f, 0.0, 0));
mat4 rotationMatrix(vec3 axis, float angle) {
axis = normalize(axis);
float s = sin(angle);
float c = cos(angle);
float oc = 1.0 - c;
return mat4(oc * axis.x * axis.x + c, oc * axis.x * axis.y - axis.z * s, oc * axis.z * axis.x + axis.y * s, 0.0, oc * axis.x * axis.y + axis.z * s, oc * axis.y * axis.y + c, oc * axis.y * axis.z - axis.x * s, 0.0,oc * axis.z * axis.x - axis.y * s, oc * axis.y * axis.z + axis.x * s, oc * axis.z * axis.z + c, 0.0, 0.0, 0.0, 0.0, 1.0);
}
vec4 rotateLightVec(float deg, vec4 LV) {
mat4 R = rotationMatrix(vec3(0,0,1), deg);
return R * LV;
}
float random1(vec3 p) {
return fract(sin(dot(p, vec3(127.1, 311.7, 191.999)))*43758.5453);
}
vec3 random2( vec3 p ) {
return fract( sin( vec3(dot(p, vec3(127.1, 311.7, 58.24)), dot(p, vec3(269.5, 183.3, 657.3)), dot(p, vec3(420.69, 69.420, 469.20))) ) * 43758.5453);
}
void main()
{
fs_Col = vs_Col;
fs_UVs = vs_UV;
fs_Anim = vs_Anim;
fs_T2O = vs_T2O;
mat3 invTranspose = mat3(u_ModelInvTr);
fs_Nor = vec4(invTranspose * vec3(vs_Nor), 0);
vec4 modelposition = u_Model * vs_Pos;
if (vs_Anim != 0) { // if we want to animate this surface
// check region in texture to decide which animatable type is drawn
bool lava = fs_UVs.x >= 13.f/16.f && fs_UVs.y < 2.f/16.f;
bool water = !lava && fs_UVs.x >= 13.f/16.f && fs_UVs.y <= 4.f/16.f;
if (water) {
// define an oscillating time so that model can transition back and forth
float t = (cos(u_Time * 0.05) + 1)/2; // u_Time increments by 1 every frame. Domain [0,1]
vec3 temp = random2(vec3(modelposition.x, modelposition.y, modelposition.z)); // range [0, 1]
temp = (temp - 0.5)/25; // [0, 1/scalar]
modelposition.x = mix(modelposition.x - temp.x, modelposition.x + temp.x, t);
modelposition.y = mix(modelposition.y - temp.y, modelposition.y + 3*temp.y, t);
modelposition.z = mix(modelposition.z - temp.z, modelposition.z + temp.z, t);
} else if (lava) {
// define an oscillating time so that model can transition back and forth
float t = (cos(u_Time * 0.01) + 1)/2; // u_Time increments by 1 every frame. Domain [0,1]
vec3 temp = random2(vec3(modelposition.x, modelposition.y, modelposition.z)); // range [0, 1]
temp = (temp - 0.5)/25; // [0, 1/scalar]
modelposition.x = mix(modelposition.x - temp.x, modelposition.x + temp.x, t);
modelposition.y = mix(modelposition.y - temp.y, modelposition.y + 3*temp.y, t);
modelposition.z = mix(modelposition.z - temp.z, modelposition.z + temp.z, t);
}
}
fs_dimVal = random1(modelposition.xyz/100.f);
fs_LightVec = rotateLightVec(0.001 * u_Time, lightDir); // Compute the direction in which the light source lies
fs_CamPos = u_CamPos; // uniform handle for the camera position instead of the inverse
fs_Pos = modelposition;
gl_Position = u_ViewProj * modelposition;// gl_Position is a built-in variable of OpenGL which is
// used to render the final positions of the geometry's vertices
}
fragment shader:
#version 330
uniform vec4 u_Color; // The color with which to render this instance of geometry.
uniform sampler2D textureSampler;
uniform int u_Time;
uniform mat4 u_ViewProj;
uniform mat4 u_Model;
in vec4 fs_Pos;
in vec4 fs_Nor;
in vec4 fs_LightVec;
in vec4 fs_Col;
in vec2 fs_UVs;
in float fs_Anim;
in float fs_T2O;
in float fs_dimVal;
out vec4 out_Col;
in vec4 fs_CamPos;
float random1(vec3 p) {
return fract(sin(dot(p,vec3(127.1, 311.7, 191.999)))
*43758.5453);
}
float random1b(vec3 p) {
return fract(sin(dot(p,vec3(169.1, 355.7, 195.999)))
*95751.5453);
}
float mySmoothStep(float a, float b, float t) {
t = smoothstep(0, 1, t);
return mix(a, b, t);
}
float cubicTriMix(vec3 p) {
vec3 pFract = fract(p);
float llb = random1(floor(p) + vec3(0,0,0));
float lrb = random1(floor(p) + vec3(1,0,0));
float ulb = random1(floor(p) + vec3(0,1,0));
float urb = random1(floor(p) + vec3(1,1,0));
float llf = random1(floor(p) + vec3(0,0,1));
float lrf = random1(floor(p) + vec3(1,0,1));
float ulf = random1(floor(p) + vec3(0,1,1));
float urf = random1(floor(p) + vec3(1,1,1));
float mixLoBack = mySmoothStep(llb, lrb, pFract.x);
float mixHiBack = mySmoothStep(ulb, urb, pFract.x);
float mixLoFront = mySmoothStep(llf, lrf, pFract.x);
float mixHiFront = mySmoothStep(ulf, urf, pFract.x);
float mixLo = mySmoothStep(mixLoBack, mixLoFront, pFract.z);
float mixHi = mySmoothStep(mixHiBack, mixHiFront, pFract.z);
return mySmoothStep(mixLo, mixHi, pFract.y);
}
float fbm(vec3 p) {
float amp = 0.5;
float freq = 4.0;
float sum = 0.0;
for(int i = 0; i < 8; i++) {
sum += cubicTriMix(p * freq) * amp;
amp *= 0.5;
freq *= 2.0;
}
return sum;
}
void main()
{
vec4 diffuseColor = texture(textureSampler, fs_UVs);
bool apply_lambert = true;
float specularIntensity = 0;
if (fs_Anim != 0) {
// check region in texture to decide which animatable type is drawn
bool lava = fs_UVs.x >= 13.f/16.f && fs_UVs.y < 2.f/16.f;
bool water = !lava && fs_UVs.x >= 13.f/16.f && fs_UVs.y < 4.f/16.f;
if (lava) {
// slowly gyrate texture and lighten and darken with random dimVal from vert shader
vec2 movingUVs = vec2(fs_UVs.x + fs_Anim * 0.065/16 * sin(0.01*u_Time),
fs_UVs.y - fs_Anim * 0.065/16 * sin(0.01*u_Time + 3.14159/2));
diffuseColor = texture(textureSampler, movingUVs);
vec4 warmerColor = diffuseColor + vec4(0.3, 0.3, 0, 0);
vec4 coolerColor = diffuseColor - vec4(0.1, 0.1, 0, 0);
diffuseColor = mix(warmerColor, coolerColor, 0.5 + fs_dimVal * 0.65*sin(0.02*u_Time));
apply_lambert = false;
} else if (water) {
// blend between 3 different points in texture to create a wavy subtle change over time
vec2 offsetUVs = vec2(fs_UVs.x - 0.5f/16.f, fs_UVs.y - 0.5f/16.f);
diffuseColor = texture(textureSampler, fs_UVs);
vec4 altColor = texture(textureSampler, offsetUVs);
altColor.x += fs_dimVal * pow(altColor.x+.15, 5);
altColor.y += fs_dimVal * pow(altColor.y+.15, 5);
altColor.z += 0.5 * fs_dimVal * pow(altColor.z+.15, 5);
diffuseColor = mix(diffuseColor, altColor, 0.5 + 0.35*sin(0.05*u_Time));
offsetUVs -= 0.25f/16.f;
vec4 newColor = texture(textureSampler, offsetUVs);
diffuseColor = mix(diffuseColor, newColor, 0.5 + 0.5*sin(0.025*u_Time)) + fs_dimVal * vec4(0.025);
diffuseColor.a = 0.7;
// ----------------------------------------------------
// Blinn-Phong Shading
// ----------------------------------------------------
vec4 lightDir = normalize(fs_LightVec - fs_Pos);
vec4 viewDir = normalize(fs_CamPos - fs_Pos);
vec4 halfVec = normalize(lightDir + viewDir);
float shininess = 400.f;
float specularIntensity = max(pow(dot(halfVec, normalize(fs_Nor)), shininess), 0);
}
}
// Calculate the diffuse term for Lambert shading
float diffuseTerm = dot(normalize(fs_Nor), normalize(fs_LightVec));
// Avoid negative lighting values
diffuseTerm = clamp(diffuseTerm, 0, 1);
float ambientTerm = 0.3;
float lightIntensity = diffuseTerm + ambientTerm; //Add a small float value to the color multiplier
//to simulate ambient lighting. This ensures that faces that are not
//lit by our point light are not completely black.
vec3 col = diffuseColor.rgb;
// Compute final shaded color
if (apply_lambert) {
col = col * lightIntensity + col * specularIntensity;
}
// & Check the rare, special case where we draw face between two diff transparent blocks as opaque
if (fs_T2O != 0) {
out_Col = vec4(col, 1.f);
} else {
out_Col = vec4(col, diffuseColor.a);
}
// distance fog!
vec4 fogColor = vec4(0.6, 0.75, 0.9, 1.0);
float FC = gl_FragCoord.z / gl_FragCoord.w / 124.f;
float falloff = clamp(1.05 - exp(-1.05f * (FC - 0.9f)), 0.f, 1.f);
out_Col = mix(out_Col, fogColor, falloff);
}
I tried implementing blinn-phong in the fragment shader, but I think it is wrong simple from the wrong normals. I think this can be done with some sort of tangent and cross product solution, but how can I know the tangent of the surface given we only know the vertex position?
I am not using unity, just bare c++ and most of the answers I am finding online are for java or unity which I do not understand.`

Blobs shader GLSL

I want to create a similar background with a shader to these images:
:
These are just blurred blobs with colors, distributed across the whole page:
Here's my current progress: https://codesandbox.io/s/lucid-bas-wvlzl9?file=/src/components/Background/Background.tsx
Vertex shader:
varying vec2 vUv;
void main() {
vUv = uv;
gl_Position = projectionMatrix * modelViewMatrix * vec4(position, 1.0);
}
Fragment shader:
precision highp float;
uniform float uTime;
uniform float uAmplitude;
uniform float uFrequency;
varying vec2 vUv;
uniform vec2 uResolution;
vec4 Sphere(vec2 position, float radius)
{
// float dist = radius / distance(vUv, position);
// float strength = 0.01 / distance(vUv, position);
float strength = 0.1 / distance(vec2(vUv.x, (vUv.y - 0.5) * 8. + 0.5), vec2(0.));
return vec4(strength * strength);
}
void main()
{
vec2 uv = vUv;
vec4 pixel = vec4(0.0, 0.0, 0.0, 0.0);
vec2 positions[4];
positions[0] = vec2(.5, .5);
// positions[1] = vec2(sin(uTime * 3.0) * 0.5, (cos(uTime * 1.3) * 0.6) + vUv.y);
// positions[2] = vec2(sin(uTime * 2.1) * 0.1, (cos(uTime * 1.9) * 0.8) + vUv.y);
// positions[3] = vec2(sin(uTime * 1.1) * 1.1, (cos(uTime * 2.6) * 0.7) + vUv.y);
for (int i = 0; i < 2; i++)
pixel += Sphere(positions[i], 0.22);
pixel = pixel * pixel;
gl_FragColor = pixel;
}
For each blob, you can multiply it's color by a a noise function and then a 2D gaussian curve centered in a random point. Then add all the blobs together. I only added the ones of the adjacent cells to make it scrollable and the numbers in the for loops might be increased for bigger blobs.
here is my code :
#ifdef GL_ES
precision mediump float;
#endif
uniform vec2 u_resolution;
uniform vec2 u_mouse;
uniform float u_time;
const float blobSize = 0.125;
const float cellSize = .75;
const float noiseScale = .375;
const float background = .125;
const float blobsLuminosity = .75;
const float blobsSaturation = .5;
vec2 random2(vec2 st){
st = vec2( dot(st,vec2(127.1,311.7)),
dot(st,vec2(269.5,183.3)) );
return -1.0 + 2.0*fract(sin(st)*43758.5453123);
}
// Gradient Noise by Inigo Quilez - iq/2013
// https://www.shadertoy.com/view/XdXGW8
float noise(vec2 st) {
vec2 i = floor(st);
vec2 f = fract(st);
vec2 u = f*f*(3.0-2.0*f);
return mix( mix( dot( random2(i + vec2(0.0,0.0) ), f - vec2(0.0,0.0) ),
dot( random2(i + vec2(1.0,0.0) ), f - vec2(1.0,0.0) ), u.x),
mix( dot( random2(i + vec2(0.0,1.0) ), f - vec2(0.0,1.0) ),
dot( random2(i + vec2(1.0,1.0) ), f - vec2(1.0,1.0) ), u.x), u.y)*.5+.5;
}
float gaussFunction(vec2 st, vec2 p, float r) {
return exp(-dot(st-p, st-p)/2./r/r);
}
// Function from IƱigo Quiles
// https://www.shadertoy.com/view/MsS3Wc
vec3 hsb2rgb( in vec3 c ){
vec3 rgb = clamp(abs(mod(c.x*6.0+vec3(0.0,4.0,2.0),
6.0)-3.0)-1.0,
0.0,
1.0 );
rgb = rgb*rgb*(3.0-2.0*rgb);
return c.z * mix( vec3(1.0), rgb, c.y);
}
vec3 hash32(vec2 p)
{
vec3 p3 = fract(vec3(p.xyx) * vec3(.1031, .1030, .0973));
p3 += dot(p3, p3.yxz+33.33);
return fract((p3.xxy+p3.yzz)*p3.zyx);
}
vec3 blobs(vec2 st){
vec2 i = floor(st/cellSize);
vec3 c = vec3(0.);
for(int x = -1; x <= 1; x++)
for(int y = -1; y <= 1; y++){
vec3 h = hash32(i+vec2(x, y));
c += hsb2rgb(vec3(h.z, blobsSaturation, blobsLuminosity)) * gaussFunction(st/cellSize, i + vec2(x, y) + h.xy, blobSize) * smoothstep(0., 1., noise(noiseScale*st/cellSize / blobSize));
//c += hsb2rgb(vec3(h.z, blobsSaturation, blobsLuminosity)) * gaussFunction(st/cellSize, i + vec2(x, y) + h.xy, blobSize) * noise(noiseScale*st/cellSize / blobSize);
}
return c + vec3(background);
}
float map(float x, float a, float b, float c, float d){
return (x-a)/(b-a)*(d-c)+c;
}
void main() {
vec2 st = gl_FragCoord.xy/u_resolution.xy;
st.x *= u_resolution.x/u_resolution.y;
vec3 color = vec3(0.0);
color = vec3(blobs(st - u_mouse/u_resolution.xy*4.));
gl_FragColor = vec4(color,1.0);
}
made in this shader editor.

Problem with normals when drawing instanced mesh

I'm rendering a sphere with instanced drawing, while rotating the model-view-matrix around the Y axis.
It looks ok at the beginning:
But at another angle, things get worse:
It looks to me like a problem with normals. Currently, I'm calculating the normal-matrix from my model-view-matrix and then pass it to the shader, which is doing phong-like lighting:
attribute vec4 a_position;
attribute vec3 a_normal;
attribute vec4 a_color;
attribute vec2 a_coord;
attribute mat4 a_matrix;
uniform mat4 u_mv_matrix;
uniform mat4 u_projection_matrix;
uniform mat3 u_normal_matrix;
varying vec4 v_position;
varying vec3 v_normal;
varying vec4 v_color;
varying vec2 v_coord;
void main() {
vec4 transformedPosition = u_mv_matrix * a_matrix * a_position;
v_position = transformedPosition;
v_normal = u_normal_matrix * a_normal;
v_color = a_color;
v_coord = a_coord;
gl_Position = u_projection_matrix * transformedPosition;
}
uniform sampler2D u_sampler;
varying vec4 v_position;
varying vec3 v_normal;
varying vec4 v_color;
varying vec2 v_coord;
void main() {
vec3 lightPosition = vec3(0.0); // XXX
// set diffuse and specular colors
vec3 cDiffuse = (v_color * texture2D(u_sampler, v_coord)).rgb;
vec3 cSpecular = vec3(0.3);
// lighting calculations
vec3 N = normalize(v_normal);
vec3 L = normalize(lightPosition - v_position.xyz);
vec3 E = normalize(-v_position.xyz);
vec3 H = normalize(L + E);
// Calculate coefficients.
float phong = max(dot(N, L), 0.0);
const float kMaterialShininess = 20.0;
const float kNormalization = (kMaterialShininess + 8.0) / (3.14159265 * 8.0);
float blinn = pow(max(dot(N, H), 0.0), kMaterialShininess) * kNormalization;
// diffuse coefficient
vec3 diffuse = phong * cDiffuse;
// specular coefficient
vec3 specular = blinn * cSpecular;
gl_FragColor = vec4(diffuse + specular, 1);
}
Final note: I'm working on desktop OpenGL 2.1 as well as WebGL on the browser.
Edit: Per request, I'm adding some information.
The mesh is built as follows, by passing an identity matrix:
void Sphere::append(IndexedVertexBatch<XYZ.N.UV> &batch, const Matrix &matrix) const {
float sectorStep = TWO_PI / sectorCount;
float stackStep = PI / stackCount;
for(int i = 0; i <= stackCount; ++i) {
float stackAngle = HALF_PI - i * stackStep;
float xy = radius * cosf(stackAngle);
float z = radius * sinf(stackAngle);
for(int j = 0; j <= sectorCount; ++j) {
float sectorAngle = j * sectorStep;
float x = xy * cosf(sectorAngle);
float y = xy * sinf(sectorAngle);
float nx = x / radius;
float ny = y / radius;
float nz = z / radius;
float s = (float)j / sectorCount;
float t = (float)i / stackCount;
batch.addVertex(matrix.transformPoint(x, y, z), matrix.transformNormal(nx, ny, nz), glm::vec2(s, t));
}
}
for(int i = 0; i < stackCount; ++i) {
float k1 = i * (sectorCount + 1);
float k2 = k1 + sectorCount + 1;
for(int j = 0; j < sectorCount; ++j, ++k1, ++k2) {
if (i != 0) {
if (frontFace == CCW) {
batch.addIndices(k1, k1 + 1, k2);
} else {
batch.addIndices(k1, k2, k1 + 1);
}
}
if (i != (stackCount - 1)) {
if (frontFace == CCW) {
batch.addIndices(k1 + 1, k2 + 1, k2);
} else {
batch.addIndices(k1 + 1, k2, k2 + 1);
}
}
}
}
}
Regarding the transformation matrices, it works as follow:
camera.getMVMatrix()
.setIdentity()
.translate(0, -150, -600)
.rotateY(clock()->getTime() * 0.5f);
State()
.setShader(shader)
.setShaderMatrix<MV>(camera.getMVMatrix())
.setShaderMatrix<PROJECTION>(camera.getProjectionMatrix())
.setShaderMatrix<NORMAL>(camera.getNormalMatrix())
.apply();
Finally, the light position is defined as vec3(0) in the fragment shader.
Note: As you can see, I'm using my own framework which provides among other things high level methods for building meshes and handling transformations. It's all straightforward stuff, proven to work as intended, but let me know if you need pointers to the source-code.
Update: The lighting part of the shader I used ended up being wrong, so I switched to another method.
But in essence, the solution I proposed in my answer is still valid (or at least it does the job of solving the "normal problem" when instancing is used, and non-uniform scaling is avoided.)
Here is a gist with the source-code. There is also an online WebGL demo.
The solution was relatively simple: there is no point in passing a normal-matrix to the shader.
Instead, the normal needs to be computed in the vertex shader:
v_normal = vec3(u_mv_matrix * a_matrix * vec4(a_normal, 0.0));
Credits

How to implement Screen Space Reflection with DDA

I am trying to implement screen space reflection with DDA.
http://casual-effects.blogspot.jp/2014/08/screen-space-ray-tracing.html
But, not working well.
Below is my shader codes.
This is vertex shader code.
layout(location = 0) in vec4 position;
layout(location = 1) in vec4 color_0;
layout(location = 2) in vec3 normal;
uniform mat4 mtxL2W; // Local to World space.
uniform mat4 mtxW2C; // World to Clip space.
out vec4 varColor;
out vec3 varNormal;
void main()
{
gl_Position = mtxW2C * mtxL2W * position;
varColor = color_0;
varNormal = normalize(mtxL2W * vec4(normal, 0)).xyz;
}
This is fragment shader code.
in vec4 varColor;
in vec3 varNormal;
layout(location = 0) out vec4 outColor;
uniform sampler2D s0; // color
uniform sampler2D s1; // linear depth.
uniform mat4 mtxW2V; // World to View(Camera) space.
uniform mat4 mtxV2C; // View(Camera) to Clip space.
uniform mat4 mtxC2V; // Clip to View(Camera) space.
uniform mat4 mtxV2W; // View(Camera) to World space.
uniform vec4 camPos; // Camera position (World space).
uniform float nearPlaneZ;
uniform float maxDistance;
uniform float zThickness;
uniform int maxSteps;
uniform float stride;
float squaredLength(vec2 a, vec2 b)
{
a -= b;
return dot(a, a);
}
bool intersectsDepthBuffer(float z, float minZ, float maxZ)
{
z += zThickness;
return (maxZ >= z) && (minZ - zThickness <= z);
}
bool traceScreenSpaceRay(
vec3 csOrig,
vec3 csDir,
out vec2 hitPixel,
out vec3 hitPoint)
{
// Clip to the near plane.
float rayLength = (csOrig.z + csDir.z * maxDistance) < nearPlaneZ
? (nearPlaneZ - csOrig.z) / csDir.z
: maxDistance;
vec3 csEndPoint = csOrig + csDir * rayLength;
// Project into homogeneous clip space.
vec4 H0 = mtxV2C * vec4(csOrig, 1);
vec4 H1 = mtxV2C * vec4(csEndPoint, 1);
float k0 = 1.0 / H0.w;
float k1 = 1.0 / H1.w;
// The interpolated homogeneous version of the camera-space points.
vec3 Q0 = csOrig * k0;
vec3 Q1 = csEndPoint * k1;
// Screen space point.
vec2 P0 = H0.xy * k0;
vec2 P1 = H1.xy * k1;
// [-1, 1] -> [0, 1]
P0 = P0 * 0.5 + 0.5;
P1 = P1 * 0.5 + 0.5;
ivec2 texsize = textureSize(s0, 0);
P0 *= vec2(texsize.xy);
P1 *= vec2(texsize.xy);
P1.x = min(max(P1.x, 0), texsize.x);
P1.y = min(max(P1.y, 0), texsize.y);
// If the line is degenerate, make it cover at least one pixel to avoid handling zero-pixel extent as a special case later.
P1 += squaredLength(P0, P1) < 0.0001
? vec2(0.01, 0.01)
: vec2(0.0);
vec2 delta = P1 - P0;
// Permute so that the primary iteration is in x to collapse all quadrant-specific DDA cases later.
bool permute = false;
if (abs(delta.x) < abs(delta.y))
{
permute = true;
delta = delta.yx;
P0 = P0.yx;
P1 = P1.yx;
}
float stepDir = sign(delta.x);
float invdx = stepDir / delta.x;
// Track the derivatives of Q and k.
vec3 dQ = (Q1 - Q0) / invdx;
float dk = (k1 - k0) / invdx;
// y is slope.
// slope = (y1 - y0) / (x1 - x0)
vec2 dP = vec2(stepDir, delta.y / invdx);
// Adjust end condition for iteration direction
float end = P1.x * stepDir;
int stepCount = 0;
float prevZMaxEstimate = csOrig.z;
float rayZMin = prevZMaxEstimate;
float rayZMax = prevZMaxEstimate;
float sceneZMax = rayZMax + 100.0f;
dP *= stride;
dQ *= stride;
dk *= stride;
vec4 PQk = vec4(P0, Q0.z, k0);
vec4 dPQk = vec4(dP, dQ.z, dk);
vec3 Q = Q0;
for (;
((PQk.x * stepDir) <= end)
&& (stepCount < maxSteps)
&& !intersectsDepthBuffer(sceneZMax, rayZMin, rayZMax)
&& (sceneZMax != 0.0);
++stepCount)
{
rayZMin = prevZMaxEstimate;
rayZMax = (PQk.z + dPQk.z * 0.5) / (PQk.w + dPQk.w * 0.5);
prevZMaxEstimate = rayZMax;
if (rayZMin > rayZMax) {
float tmp = rayZMin;
rayZMin = rayZMax;
rayZMax = tmp;
}
hitPixel = permute ? PQk.yx : PQk.xy;
//hitPixel.y = texsize.y - hitPixel.y;
sceneZMax = texelFetch(s1, ivec2(hitPixel), 0).r;
PQk += dPQk;
}
// Advance Q based on the number of steps
Q.xy += dQ.xy * stepCount;
hitPoint = Q * (1.0f / PQk.w);
hitPoint = vec3(sceneZMax, rayZMin, rayZMax);
return intersectsDepthBuffer(sceneZMax, rayZMin, rayZMax);
}
void main()
{
vec3 normal = normalize(varNormal);
float linearDepth = texelFetch(s1, ivec2(gl_FragCoord.xy), 0).r;
ivec2 texsize = textureSize(s0, 0);
// Ray origin is camera origin.
vec3 rayOrg = camPos.xyz;
// Screen coordinate.
vec4 pos = vec4(gl_FragCoord.xy / texsize, 0, 1);
// [0, 1] -> [-1, 1]
pos.xy = pos.xy * 2.0 - 1.0;
// Screen-space -> Clip-space
pos.xy *= linearDepth;
// Clip-space -> View-space
pos = mtxC2V * pos;
pos.z = linearDepth;
// View-space -> World-space.
vec3 worldPos = (mtxV2W * vec4(pos.xyz, 1)).xyz;
// Compute ray direction.
// From ray origin to world position.
vec3 rayDir = normalize(worldPos - rayOrg);
// Compute reflection vector.
vec3 refDir = reflect(rayDir, normal);
// Reflection vector origin is world position.
vec3 refOrg = worldPos;
// Transform to view coordinate.
refOrg = (mtxW2V * vec4(refOrg, 1)).xyz;
refDir = (mtxW2V * vec4(refDir, 0)).xyz;
vec2 hitPixel = vec2(0, 0);
vec3 hitPoint = vec3(0, 0, 0);
// Trace screen space ray.
bool isIntersect = traceScreenSpaceRay(refOrg, refDir, hitPixel, hitPoint);
vec2 uv = hitPixel / texsize.xy;
if (uv.x > 1.0 || uv.x < 0.0f || uv.y > 1.0 || uv.y < 0.0) {
isIntersect = false;
}
if (isIntersect) {
outColor = varColor * texture(s0, uv);
}
else {
outColor = vec4(1, 1, 1, 1);
}
}
I think Q0.z and Q1.z are always 1.0.
So, I think dQ.z is also always 0.0.
And, dk is always minus value.
What is wrong?