Related
I have the following shader rendering voxels using raycasting:
#version 460
#extension GL_ARB_separate_shader_objects : enable
#pragma optionNV(unroll all)
layout(binding = 3, std140) uniform compVarsOb {
float time;
float phiA;
float thetaA;
vec3 camPos;
float fov;
int voxWidth;
int voxHeight;
int voxDepth;
} cvo;
layout(binding = 2, rgba8) uniform writeonly image2D img;
float hash3(vec2 xy){
xy = mod(xy, .19);
float h = dot(xy.yyx, vec3(.013, 27.15, 2027.3));
h *= h;
h *= fract(h);
return fract(h);
}
//layout(binding = 4) uniform sampler3D voxels;
layout(binding = 4, std140) buffer vData{
vec4 voxels[];
};
float greaterThan(float a, float b){
float d = a - b;
return (1. + (d / abs(d)))/2.;
}
float lesserThan(float a, float b){
float d = a - b;
return (1. - (d / abs(d)))/2.;
}
float withinBounds(ivec3 li){
vec3 l = vec3(li);
return greaterThan(l.x, 0.) * lesserThan(l.x, cvo.voxWidth) * greaterThan(l.y, 0.) * lesserThan(l.y, cvo.voxHeight) * greaterThan(l.z, 0.) * lesserThan(l.z, cvo.voxDepth);
}
vec4 quaternionMult(vec4 a, vec4 b){
return vec4(a.x * b.x - dot(a.yzw, b.yzw), a.x*b.yzw + b.x*a.yzw + cross(a.yzw, b.yzw));
}
void main()
{
vec2 iResolution = vec2(2560., 1440.);
vec2 fragCoord = gl_GlobalInvocationID.xy;
ivec2 fragI = ivec2(gl_GlobalInvocationID.xy);
vec2 iMouse = vec2(.5);
vec2 uv = fragCoord/iResolution.xy;
ivec2 uvI = ivec2(uv);
vec2 muv = iMouse.xy / iResolution.xy;
float iTime = cvo.time;
vec3 col = vec3(0.);
float screenRatio = iResolution.y / iResolution.x;
//Setting up the ray directions and other information about the point and camera
//##############################################################################
//camera direction angles phi (xy plane) and theta (xz plane)
float phi = cvo.phiA;//radians(360. * (1. - muv.x));
float theta = cvo.thetaA;//radians(180. * (1. - muv.y));
//get the camera direction as the basis for the rotation (each ray direction is a rotation of the camera direciton vector)
//it is in quarternion form here so its a vec4 instead of a vec3
vec4 camD = vec4(0., cos(phi) * sin(theta), sin(phi) * sin(theta), cos(theta));
float rad90 = radians(90.);
float fov = cvo.fov;
float xAng = radians(fov * (.5 - uv.x));
//replace "fov" with "(fov + (110. * pow(.5 - uv.x, 2.)))" below to add a counteractment to the fisheye lens effect
//it basically counteracts the artifact with quaternions that happens when you rotate by a large angle on one axis then try to rotate on another axis perpendicular, it just rotates around it thus making the new direction lesser
float yAng = radians(fov * screenRatio * (uv.y - .5));
//get the axes that the quarternions should be based around (perpendicular to the camera plane or dv)
vec3 xRotAxis = vec3(cos(phi) * sin(theta - rad90), sin(phi) * sin(theta - rad90), cos(theta - rad90));
vec3 yRotAxis = cross(xRotAxis, camD.yzw);//vec3(cos(phi - rad90) * sin(theta), sin(phi - rad90) * sin(theta), cos(theta));
//get the quarternions of the ray direction rotations
vec4 xQuat = vec4(cos(xAng / 2.), xRotAxis * sin(xAng / 2.));
vec4 yQuat = vec4(cos(yAng / 2.), yRotAxis * sin(yAng / 2.));
//combine the rotations
vec4 compQuat = quaternionMult(yQuat, xQuat);
//get the conjugate of the compQuart
vec4 conjComp = vec4(compQuat.x, -compQuat.yzw);
//ray direction
vec3 rayD = normalize(quaternionMult(quaternionMult(compQuat, camD), conjComp).yzw);
//camera location
vec3 cam = cvo.camPos;//vec3(cos(iTime), 0., 0.);
//point location and radius
//vec3 p = vec3(0., (5. * iTime) + 1., 0.);
float pr = .00001;
//############################################
//hit = 1. means that nothing has been hit or everything has been completely transparent
float hit = 1.;
vec3 locf = vec3(0.);
ivec3 loc = ivec3(0);
int locI = 0;
vec4 v = vec4(0.);
for(int i = 0; i < 20; i++){
locf = vec3((i * rayD * .4) + cam);
loc = ivec3(locf);
//adjust loc for the buffer indexing
locI = loc.x + loc.y * cvo.voxWidth + loc.z * cvo.voxWidth * cvo.voxHeight;
//vec4 v = texelFetch(voxels, loc, 0);//;imageLoad(voxels, loc);//texelFetch(voxels, ivec3((i * rayD) + cam), 0);
v = voxels[locI];// * withinBounds(loc);
if(locf.x < 0. || locf.x > cvo.voxWidth || locf.y < 0. || locf.y > cvo.voxHeight || locf.z < 0. || locf.z > cvo.voxDepth){
v = vec4(0.);
}
col += v.xyz * hit * v.w;
hit -= v.w;
if(hit <= 0.){
//col = vec3(v.w / 5.);
break;
}
}
//col = imageLoad(voxels, ivec3(uvI, 1)).xyz;//texelFetch(voxels, ivec3(fragI / 10, 1), 0).xyz;
//col = vec3(phi / radians(180.));
//col = texture(iChannel0, uv).xyz;
//col = voxels[(fragI.x / 10) + (fragI.y / 10) * cvo.voxWidth].xyz;
//col = vec3(rayD.z);
imageStore(img, fragI, vec4(col,1.0));
}
It produces this:
The problem is when I change the loop (end of main()) for the amount of voxels I want to iterate over to more than 2 (its at 20 right now), the fps absolutely tanks. Yet I feel that my GPU is capable of way more than 2 iterations of a not so demanding loop, so I'm not sure what is going on.
I am running on an RTX 2060 Super which here is said to be capable of 7.81 * 10^12 FLOPS. If I'm understanding it correctly, this means that if I want to run a compute shader at 144 fps at 1440p, I would be allowed a total of (7.81 * 10^12)/(144 * 2560 * 1440) FLOPS in my shader. That comes out to about 14712 FLOPS in the compute shader which is way more than I have in my compute shader right now, yet my code only runs at an average of 30 fps when the loop is at 20 iterations. I can only get 144+ fps when I cut the loop down to 1 or 2 iterations (which at that point is basically like not having the loop at all). Are loops just horribly unoptimized for compute shaders? Where am I going wrong?
The flops GPU is capable of is usually calculated given perfect conditions with perfect code. That is, every computation is a Fused Multiply Add, is able to start right after the previous one with data already in cache/registers, and every single core is working. Achieving such conditions is the problem of writing code on GPUs.
GPUs normally create multiple threads per core in order to reduce penalty from having to wait for memory accesses. Caches are used to work with large bandwith usage, but they are relatively small and they require memory locality(data being stored physically close) for effective usage. Some GPUs have a separate texture cache, abusing which might be wise. Drivers also try to store textures and images in an efficient way, at least by making pixels close by - also close in memory, but potentially using some special hardware.
In compute shaders, threads are created in large blocks and then assigned to Compute Units (terminology differs between vendors, basically sort of a meta-core, with own cache and a bunch of cores). The size of block a is defined in shader with layout(local_size_x = X, local_size_y = Y, local_size_z = Z) in;, where total number of threads equals to the multiple of the dimensions. Threads within a block can communicate with shared variables (they are usually stored within same space as l1 cache) and synchronise with barrier() and alike. GPUs have multiple compute units, and to make them work multiple blocks should be launched(the values in vkCmdDispatch() are the number of groups launched).
Threads are also implicitly grouped in SIMD-like groups (nVidia calls it SIMT - single instruction multiple threads). Every thread in one such group performs the same instruction(but there are some differences with newer nVidia cards). In case where an if() or for(), while(), etc. makes only some threads execute a portion of code, part of cores are disabled, potentially wasting performance. Usually they have size of 32 or 64(AMD), so thread blocks should be created as a multiple of that number. Some functionalities of such groups are exposed with subgroup extensions.
Since you are working with voxels, i'd imagine that using 3D textures or images instead of a buffer may be good for caches. Figuring out how to make threads cooperate and make use of the fast shared memory should also be a good idea.
I have a vtkPolyData filled with points and cells that I want to draw on the screen. My polydata represents brain fibers (list of lines in 3D). A cell is a fiber. It's working, but I need to add colors between all points. We decided to color the polydata using a shader because there will be a lot of coloring methods. My vertex shader is:
vtkShader2 *shader = vtkShader2::New();
shader->SetType(VTK_SHADER_TYPE_VERTEX);
shader->SetSourceCode(R"VertexShader(
#version 120
attribute vec3 next_point;
varying vec3 vColor; // Pass to fragment shader
void main() {
float r = gl_Vertex.x - next_point.x;
float g = gl_Vertex.y - next_point.y;
float b = gl_Vertex.z - next_point.z;
if (r < 0.0) { r *= -1.0; }
if (g < 0.0) { g *= -1.0; }
if (b < 0.0) { b *= -1.0; }
const float norm = 1.0 / sqrt(r*r + g*g + b*b);
vColor = vec3(r * norm, g * norm, b * norm);
gl_Position = ftransform();
}
)VertexShader");
shader->SetContext(shader_program->GetContext());
shader_program->GetShaders()->AddItem(shader);
The goal here is, for each point, get the next point to calculate the color of the line between them. The problem is that I can't find a way to set the value of "next_point". I'm pretty sure it's always filled with 0.0 because the output image is red, blue and green on the sides.
I tried using vtkProperty::AddShaderVariable() but I never saw any change and the method's documentation hints about a "uniform variable" so it's probably not the right way.
// Splitted in 3 because I'm not sure how to pass a vtkPoints object to AddShaderVariable
fibersActor->GetProperty()->AddShaderVariable("next_x", nb_points, next_x);
fibersActor->GetProperty()->AddShaderVariable("next_y", nb_points, next_y);
fibersActor->GetProperty()->AddShaderVariable("next_z", nb_points, next_z);
I also tried using a vtkFloatArray filled with my points, then setting it as a data array.
vtkFloatArray *next_point = vtkFloatArray::New();
next_point->SetName("next_point");
next_point->SetNumberOfComponents(3);
next_point->Resize(nb_points);
// Fill next_point ...
polydata->GetPointData()->AddArray(next_point);
// Tried the vtkAssignAttribute class. Did nothing.
tl;dr Can you please tell me how to pass a list of points into a GLSL attribute variable? Thanks for your time.
The program that I am writing takes in the vertex data of a 3D mesh, performs a series of calculations (forgive the vagueness, I'll try to explain in better detail later), and outputs a binary file that defines where the edges are on the mesh. My program then draws a colored line where the edge is. Without the appropriate vertex shader, this would look like a regular triangulated mesh, but once the appropriate vertex shader is applied, only the edges that are "sharp" (the dot product of their normals is greater than something close to zero) have lines drawn on them, along with the edges on the outside of the figure. My implementation for the outline is not correct, as I made the assumption that if an edge wasn't behind the edge, and didn't define a sharp edge, it would be an outline edge. I haven't found a satisfactory answer to this elsewhere, and I didn't want to rely on the old trick of re-drawing the mesh as a solid color, and rendering it to be slightly larger than the original mesh. This approach was to be entirely math-based, relying only on the vertex data of a mesh. I am writing a program that uses the following vertex shader:
uniform mat4 worldMatrix;
uniform mat4 projMatrix;
uniform mat4 viewProjMatrix;
uniform vec4 eyepos;
attribute vec3 a;
attribute vec3 b;
attribute vec3 n1;
attribute vec3 n2;
attribute float w;
void main()
{
float a_vertex = dot(eyepos.xyz - a, n1);
float b_vertex = dot(eyepos.xyz - a, n2);
if (a_vertex * b_vertex > 0.0) // signs are different, edge is behind the object
{
gl_Position = vec4(2.0,2.0,2.0,1.0);
}
else // the outline of the figure
{
if(w == 0.0)
{
vec4 p = vec4(a.x, a.y, a.z, 1.0);
p = p * worldMatrix * viewProjMatrix;
gl_Position = p;
}
else
{
vec4 p = vec4(b.x, b.y, b.z, 1.0);
p = p * worldMatrix * viewProjMatrix;
gl_Position = p;
}
}
if(dot(n1, n2) <= 0.2) // there is a sharp edge
{
if(w == 0.0)
{
vec4 p = vec4(a.x, a.y, a.z, 1.0);
p = p * worldMatrix * viewProjMatrix;
gl_Position = p;
}
else
{
vec4 p = vec4(b.x, b.y, b.z, 1.0);
p = p * worldMatrix * viewProjMatrix;
gl_Position = p;
}
}
}
... to take information from a binary file that is written using this program in C++:
#include <iostream>
#include "llgl.h"
#include <fstream>
#include <vector>
#include "SuperMesh.h"
using namespace std;
using namespace llgl;
struct Vertex
{
float x,y,z,w;
float s,t,p,q;
float nx,ny,nz,nw;
};
bool isFileAlright(string fName)
{
ifstream in(fName.c_str());
if(!in.good())
return false;
return true;
}
int main(int argc, char* argv[])
{
// INPUT FILE NAME //
string fName;
cout << "Enter the path to your spec.mesh file here: ";
cin >> fName;
while(!isFileAlright(fName))
{
cout << "Enter the path to your spec.mesh file here: ";
cin >> fName;
}
SuperMesh* Model = new SuperMesh(fName.c_str());
// END INPUT //
Model->load();
Model->draw();
string fname = Model->fname;
string FileName = fname.substr(0, fname.size() - 10); // supposed to slash the last 10 characters off of the string, removing ".spec.mesh"...
FileName = FileName + ".bin"; //... and then we make it a .bin file*/
cout << FileName << endl;
ofstream out(FileName.c_str(), ios::binary);
for (unsigned w = 0; w < Model->m.size(); w++)
{
vector<float> &vdata = Model->m[w]->vdata;
vector<char> &idata = Model->m[w]->idata;
//Create a vertex and index variable, a map for Edge Mesh, perform two loops to analyze all triangles on a mesh and write out their vertex values to a file.//
Vertex* V = (Vertex*)(&vdata[0]);
unsigned short* I16 = (unsigned short*)(&idata[0]);
unsigned char* I8 = (unsigned char*)(&idata[0]);
unsigned int* I32 = (unsigned int*)(&idata[0]);
map<set<int>, vector<vec3> > EM;
for(unsigned i = 0; i < Model->m[w]->ic; i += 3) // 3 because we're looking at triangles //
{
Mesh* foo = Model->m[w];
int i1;
int i2;
int i3;
if( Model->m[w]->ise == GL_UNSIGNED_BYTE)
{
i1 = I8[i];
i2 = I8[i + 1];
i3 = I8[i + 2];
}
else if( Model->m[w]->ise == GL_UNSIGNED_SHORT)
{
i1 = I16[i];
i2 = I16[i + 1];
i3 = I16[i + 2];
}
else
{
i1 = I32[i];
i2 = I32[i + 1];
i3 = I32[i + 2];
}
vec3 p = vec3(V[i1].x, V[i1].y, V[i1].z); // to represent the point in 3D space of each vertex on every triangle on the mesh
vec3 q = vec3(V[i2].x, V[i2].y, V[i2].z);
vec3 r = vec3(V[i3].x, V[i3].y, V[i3].z);
vec3 v1 = p - q;
vec3 v2 = r - q;
vec3 n = cross(v2,v1); //important to make sure the order is correct here, do VERTEX TWO dot VERTEX ONE//
set<int> tmp;
tmp.insert(i1); tmp.insert(i2);
EM[tmp].push_back(n);
set<int> tmp2;
tmp2.insert(i2); tmp2.insert(i3);
EM[tmp2].push_back(n);
set<int> tmp3;
tmp3.insert(i3); tmp3.insert(i1);
EM[tmp3].push_back(n);
//we have now pushed every needed point into our edge map
}
int edgeNumber = 0;
cout << "There should be 12 edges on a lousy cube." << endl;
for(map<set<int>, vector<vec3> >::iterator it = EM.begin(); it != EM.end(); ++it)
{
//Now we will take our edge map and write its data to the file!//
/* Information is written to the file in this form:
Vertex One, Vertex Two, Normal One, Normal Two, r (where r, depending on its value, determines whether one edge is on top of the other in the case
where two edges are aligned with one another)
*/
set<int>::iterator tmp = it->first.begin();
int pi = *tmp;
tmp++;
int qi = *tmp;
Vertex One = V[pi];
Vertex Two = V[qi];
vec3 norm1 = it->second[0];
vec3 norm2;
if(it->second.size() == 1)
norm2 = -1 * norm1;
else
norm2 = it->second[1];
out.write((char*) &One, 12);
out.write((char*) &Two, 12);
out.write((char*) &norm1, 12);
out.write((char*) &norm2, 12);
float r = 0;
out.write((char*) &r, 4);
out.write((char*) &One, 12);
out.write((char*) &Two, 12);
out.write((char*) &norm1, 12);
out.write((char*) &norm2, 12);
r = 1;
out.write((char*) &r, 4);
edgeNumber++;
cout << "Wrote edge #" << edgeNumber << endl;
}
}
return 0;
}
The problem that this program has is that it does neither of these two essential things in the test case where I use it to draw a simple box with outlines:
It does not draw outlines. The vertex shader is not sufficient to determine anything more than where the edges of the object are. The binary file that makes this happen is pre-computed in a separate program using code from the second snippet posted above, and then it is saved as a .bin file along with the mesh assets to which it belongs. However, raw vertex data would only take me so far, and I seek a way to draw a line around the outside of the mesh without using more traditional methods.
It does not draw ALL of the edges that I need. In my test case, two of the edges are missing, and I cannot figure out for the life of me why. I figure I must have done something wrong in writing the edge map.
A couple notes about the above code:
llgl is an OpenGL wrapper that I have used to simplify many elements of OpenGL. It is not used extensively here, but rather in the creation of meshes, done elsewhere.
Things like Mesh and SuperMesh (a collection of meshes into one rigid body) are meant to be 3D objects in my scene. In my test case, there is only one Mesh in my scene, and defining a SuperMesh of a single Mesh is essentially just creating a single Mesh.
The "draw" call in the second snippet, which pre-computes a Mesh's edge map, does not actually draw anything. It is necessary to gain access to the Mesh's vertex data.
The variable "ise" is taken from the individual Meshes in the SuperMesh, and is a variable found by reading it in from the original Blender .OBJ file. It is related to how much memory should be used to store the important vertex data. It generally isn't a good idea to allocate more space than is needed for these values, as I've been told by friends and mentors who work with Blender.
It isn't well-commented, as I'm not the only one who has worked on this code, and I, unfortunately, have a limited understanding of how the second snippet could iterate through all of the triangles on a mesh and somehow miss the last two edges. Once I understand better what this code should do when properly written, I plan on heavily commenting it and using it in future applications.
Order of multiplication between matrix and vector is not comutative, so
your vertex shader have to output Projection * Model * Vertex and not the opposite.
I solved the mystery of the undrawn lines by allocating more space to write vertex data in a different part of my code. As for my other problems, although the order of multiplication being done in my vertex shader was actually alright, I had messed up another fundamental concept of vector math. The dot product of two face normals will be a negative number when the normals make an obtuse angle... the way a sharp point on my model would. Also, there is the faulty logic above that basically says that if the face is visible, draw all of the lines on it. I re-wrote my shader to test first if a face was visible, and then in that same conditional block I did the test for sharp edges. Now, if a face is visible BUT it doesn't create a sharp edge, the shader will ignore that edge. Also, outlines appear now, just not perfectly. Here is a modified version of the above vertex shader:
uniform mat4 worldMatrix; /* the matrix that defines how to project a point from
object space to world space.*/
uniform mat4 viewProjMatrix; // the view (pertaining to screen size) matrix times the projection (how to project points to 3D) matrix.
uniform vec4 eyepos; // the position of the eye, given by the program.
attribute vec3 a; // one vertex on an edge, having an x,y,z, and w coordinate.
attribute vec3 b; // the other edge vertex.
attribute vec3 n1; // the normal of the face the edge is on.
attribute vec3 n2; // another normal in the case that an edge shares two faces... otherwise, this is the same as n1.
attribute float w; // an attribute given to make a binary choice between two edges when they draw on top of one another.
void main()
{
// WORLD SPACE ATTRIBUTES //
vec4 eye_world = eyepos * worldMatrix;
vec4 a_world = vec4(a.x, a.y,a.z,1.0) * worldMatrix;
vec4 b_world = vec4(b.x, b.y,b.z,1.0) * worldMatrix;
vec4 n1_world = normalize(vec4(n1.x, n1.y,n1.z,0.0) * worldMatrix);
vec4 n2_world = normalize(vec4(n2.x, n2.y,n2.z,0.0) * worldMatrix);
// END WORLD SPACE ATTRIBUTES //
// TEST CASE ATTRIBUTES //
float a_vertex = dot(eye_world - a_world, n1_world);
float b_vertex = dot(eye_world - b_world, n2_world);
float normalDot = dot(n1_world.xyz, n2_world.xyz);
float vertProduct = a_vertex * b_vertex;
float hardness = 0.0; // this would be the value for an object made of sharp angles, like a box. Take a look at its use below.
// END TEST CASE ATTRIBUTES //
gl_Position = vec4(2.0,2.0,2.0,1.0); // if all else fails, keeping this here will discard unwanted data.
if (vertProduct >= 0.1) // NOTE: face is behind the viewable portion of the object, normally uses 0.0 when not checking for silhouette
{
gl_Position = vec4(2.0,2.0,2.0,1.0);
}
else if(vertProduct < 0.1 && vertProduct >= -0.1) // NOTE: face makes almost a right angle with the eye vector
{
if(w == 0.0)
{
vec4 p = vec4(a_world.x, a_world.y, a_world.z, 1.0);
p = p * viewProjMatrix;
gl_Position = p;
}
else
{
vec4 p = vec4(b_world.x, b_world.y, b_world.z, 1.0);
p = p * viewProjMatrix;
gl_Position = p;
}
}
else // NOTE: this is the case where you can very clearly see a face.
{ // NOTE: the number that normalDot compares to should be its "hardness" value. The more negative the value, the smoother the surface.
// a.k.a. the less we care about hard edges (when the normals of the faces make an obtuse angle) on the object, the more negative
// hardness becomes on a scale of 0.0 to -1.0.
if(normalDot <= hardness) // NOTE: the dot product of the two normals is obtuse, so we are looking at a sharp edge.
{
if(w == 0.0)
{
vec4 p = vec4(a_world.x, a_world.y, a_world.z, 1.0);
p = p * viewProjMatrix;
gl_Position = p;
}
else
{
vec4 p = vec4(b_world.x, b_world.y, b_world.z, 1.0);
p = p * viewProjMatrix;
gl_Position = p;
}
}
else // NOTE: not sharp enough, just throw the vertex away
{
gl_Position = vec4(2.0,2.0,2.0,1.0);
}
}
}
As the GPU driver vendors don't usually bother to implement noiseX in GLSL, I'm looking for a "graphics randomization swiss army knife" utility function set, preferably optimised to use within GPU shaders. I prefer GLSL, but code any language will do for me, I'm ok with translating it on my own to GLSL.
Specifically, I'd expect:
a) Pseudo-random functions - N-dimensional, uniform distribution over [-1,1] or over [0,1], calculated from M-dimensional seed (ideally being any value, but I'm OK with having the seed restrained to, say, 0..1 for uniform result distribution). Something like:
float random (T seed);
vec2 random2 (T seed);
vec3 random3 (T seed);
vec4 random4 (T seed);
// T being either float, vec2, vec3, vec4 - ideally.
b) Continous noise like Perlin Noise - again, N-dimensional, +- uniform distribution, with constrained set of values and, well, looking good (some options to configure the appearance like Perlin levels could be useful too). I'd expect signatures like:
float noise (T coord, TT seed);
vec2 noise2 (T coord, TT seed);
// ...
I'm not very much into random number generation theory, so I'd most eagerly go for a pre-made solution, but I'd also appreciate answers like "here's a very good, efficient 1D rand(), and let me explain you how to make a good N-dimensional rand() on top of it..." .
For very simple pseudorandom-looking stuff, I use this oneliner that I found on the internet somewhere:
float rand(vec2 co){
return fract(sin(dot(co, vec2(12.9898, 78.233))) * 43758.5453);
}
You can also generate a noise texture using whatever PRNG you like, then upload this in the normal fashion and sample the values in your shader; I can dig up a code sample later if you'd like.
Also, check out this file for GLSL implementations of Perlin and Simplex noise, by Stefan Gustavson.
It occurs to me that you could use a simple integer hash function and insert the result into a float's mantissa. IIRC the GLSL spec guarantees 32-bit unsigned integers and IEEE binary32 float representation so it should be perfectly portable.
I gave this a try just now. The results are very good: it looks exactly like static with every input I tried, no visible patterns at all. In contrast the popular sin/fract snippet has fairly pronounced diagonal lines on my GPU given the same inputs.
One disadvantage is that it requires GLSL v3.30. And although it seems fast enough, I haven't empirically quantified its performance. AMD's Shader Analyzer claims 13.33 pixels per clock for the vec2 version on a HD5870. Contrast with 16 pixels per clock for the sin/fract snippet. So it is certainly a little slower.
Here's my implementation. I left it in various permutations of the idea to make it easier to derive your own functions from.
/*
static.frag
by Spatial
05 July 2013
*/
#version 330 core
uniform float time;
out vec4 fragment;
// A single iteration of Bob Jenkins' One-At-A-Time hashing algorithm.
uint hash( uint x ) {
x += ( x << 10u );
x ^= ( x >> 6u );
x += ( x << 3u );
x ^= ( x >> 11u );
x += ( x << 15u );
return x;
}
// Compound versions of the hashing algorithm I whipped together.
uint hash( uvec2 v ) { return hash( v.x ^ hash(v.y) ); }
uint hash( uvec3 v ) { return hash( v.x ^ hash(v.y) ^ hash(v.z) ); }
uint hash( uvec4 v ) { return hash( v.x ^ hash(v.y) ^ hash(v.z) ^ hash(v.w) ); }
// Construct a float with half-open range [0:1] using low 23 bits.
// All zeroes yields 0.0, all ones yields the next smallest representable value below 1.0.
float floatConstruct( uint m ) {
const uint ieeeMantissa = 0x007FFFFFu; // binary32 mantissa bitmask
const uint ieeeOne = 0x3F800000u; // 1.0 in IEEE binary32
m &= ieeeMantissa; // Keep only mantissa bits (fractional part)
m |= ieeeOne; // Add fractional part to 1.0
float f = uintBitsToFloat( m ); // Range [1:2]
return f - 1.0; // Range [0:1]
}
// Pseudo-random value in half-open range [0:1].
float random( float x ) { return floatConstruct(hash(floatBitsToUint(x))); }
float random( vec2 v ) { return floatConstruct(hash(floatBitsToUint(v))); }
float random( vec3 v ) { return floatConstruct(hash(floatBitsToUint(v))); }
float random( vec4 v ) { return floatConstruct(hash(floatBitsToUint(v))); }
void main()
{
vec3 inputs = vec3( gl_FragCoord.xy, time ); // Spatial and temporal inputs
float rand = random( inputs ); // Random per-pixel value
vec3 luma = vec3( rand ); // Expand to RGB
fragment = vec4( luma, 1.0 );
}
Screenshot:
I inspected the screenshot in an image editing program. There are 256 colours and the average value is 127, meaning the distribution is uniform and covers the expected range.
Gustavson's implementation uses a 1D texture
No it doesn't, not since 2005. It's just that people insist on downloading the old version. The version that is on the link you supplied uses only 8-bit 2D textures.
The new version by Ian McEwan of Ashima and myself does not use a texture, but runs at around half the speed on typical desktop platforms with lots of texture bandwidth. On mobile platforms, the textureless version might be faster because texturing is often a significant bottleneck.
Our actively maintained source repository is:
https://github.com/ashima/webgl-noise
A collection of both the textureless and texture-using versions of noise is here (using only 2D textures):
http://www.itn.liu.se/~stegu/simplexnoise/GLSL-noise-vs-noise.zip
If you have any specific questions, feel free to e-mail me directly (my email address can be found in the classicnoise*.glsl sources.)
Gold Noise
// Gold Noise ©2015 dcerisano#standard3d.com
// - based on the Golden Ratio
// - uniform normalized distribution
// - fastest static noise generator function (also runs at low precision)
// - use with indicated fractional seeding method.
float PHI = 1.61803398874989484820459; // Φ = Golden Ratio
float gold_noise(in vec2 xy, in float seed){
return fract(tan(distance(xy*PHI, xy)*seed)*xy.x);
}
See Gold Noise in your browser right now!
This function has improved random distribution over the current function in #appas' answer as of Sept 9, 2017:
The #appas function is also incomplete, given there is no seed supplied (uv is not a seed - same for every frame), and does not work with low precision chipsets. Gold Noise runs at low precision by default (much faster).
There is also a nice implementation described here by McEwan and #StefanGustavson that looks like Perlin noise, but "does not require any setup, i.e. not textures nor uniform arrays. Just add it to your shader source code and call it wherever you want".
That's very handy, especially given that Gustavson's earlier implementation, which #dep linked to, uses a 1D texture, which is not supported in GLSL ES (the shader language of WebGL).
After the initial posting of this question in 2010, a lot has changed in the realm of good random functions and hardware support for them.
Looking at the accepted answer from today's perspective, this algorithm is very bad in uniformity of the random numbers drawn from it. And the uniformity suffers a lot depending on the magnitude of the input values and visible artifacts/patterns will become apparent when sampling from it for e.g. ray/path tracing applications.
There have been many different functions (most of them integer hashing) being devised for this task, for different input and output dimensionality, most of which are being evaluated in the 2020 JCGT paper Hash Functions for GPU Rendering. Depending on your needs you could select a function from the list of proposed functions in that paper and simply from the accompanying Shadertoy.
One that isn't covered in this paper but that has served me very well without any noticeably patterns on any input magnitude values is also one that I want to highlight.
Other classes of algorithms use low-discrepancy sequences to draw pseudo-random numbers from, such as the Sobol squence with Owen-Nayar scrambling. Eric Heitz has done some amazing research in this area, as well with his A Low-Discrepancy Sampler that Distributes Monte Carlo Errors as a Blue Noise in Screen Space paper.
Another example of this is the (so far latest) JCGT paper Practical Hash-based Owen Scrambling, which applies Owen scrambling to a different hash function (namely Laine-Karras).
Yet other classes use algorithms that produce noise patterns with desirable frequency spectrums, such as blue noise, that is particularly "pleasing" to the eyes.
(I realize that good StackOverflow answers should provide the algorithms as source code and not as links because those can break, but there are way too many different algorithms nowadays and I intend for this answer to be a summary of known-good algorithms today)
Do use this:
highp float rand(vec2 co)
{
highp float a = 12.9898;
highp float b = 78.233;
highp float c = 43758.5453;
highp float dt= dot(co.xy ,vec2(a,b));
highp float sn= mod(dt,3.14);
return fract(sin(sn) * c);
}
Don't use this:
float rand(vec2 co){
return fract(sin(dot(co.xy ,vec2(12.9898,78.233))) * 43758.5453);
}
You can find the explanation in Improvements to the canonical one-liner GLSL rand() for OpenGL ES 2.0
hash:
Nowadays webGL2.0 is there so integers are available in (w)GLSL.
-> for quality portable hash (at similar cost than ugly float hashes) we can now use "serious" hashing techniques.
IQ implemented some in https://www.shadertoy.com/view/XlXcW4 (and more)
E.g.:
const uint k = 1103515245U; // GLIB C
//const uint k = 134775813U; // Delphi and Turbo Pascal
//const uint k = 20170906U; // Today's date (use three days ago's dateif you want a prime)
//const uint k = 1664525U; // Numerical Recipes
vec3 hash( uvec3 x )
{
x = ((x>>8U)^x.yzx)*k;
x = ((x>>8U)^x.yzx)*k;
x = ((x>>8U)^x.yzx)*k;
return vec3(x)*(1.0/float(0xffffffffU));
}
Just found this version of 3d noise for GPU, alledgedly it is the fastest one available:
#ifndef __noise_hlsl_
#define __noise_hlsl_
// hash based 3d value noise
// function taken from https://www.shadertoy.com/view/XslGRr
// Created by inigo quilez - iq/2013
// License Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License.
// ported from GLSL to HLSL
float hash( float n )
{
return frac(sin(n)*43758.5453);
}
float noise( float3 x )
{
// The noise function returns a value in the range -1.0f -> 1.0f
float3 p = floor(x);
float3 f = frac(x);
f = f*f*(3.0-2.0*f);
float n = p.x + p.y*57.0 + 113.0*p.z;
return lerp(lerp(lerp( hash(n+0.0), hash(n+1.0),f.x),
lerp( hash(n+57.0), hash(n+58.0),f.x),f.y),
lerp(lerp( hash(n+113.0), hash(n+114.0),f.x),
lerp( hash(n+170.0), hash(n+171.0),f.x),f.y),f.z);
}
#endif
A straight, jagged version of 1d Perlin, essentially a random lfo zigzag.
half rn(float xx){
half x0=floor(xx);
half x1=x0+1;
half v0 = frac(sin (x0*.014686)*31718.927+x0);
half v1 = frac(sin (x1*.014686)*31718.927+x1);
return (v0*(1-frac(xx))+v1*(frac(xx)))*2-1*sin(xx);
}
I also have found 1-2-3-4d perlin noise on shadertoy owner inigo quilez perlin tutorial website, and voronoi and so forth, he has full fast implementations and codes for them.
I have translated one of Ken Perlin's Java implementations into GLSL and used it in a couple projects on ShaderToy.
Below is the GLSL interpretation I did:
int b(int N, int B) { return N>>B & 1; }
int T[] = int[](0x15,0x38,0x32,0x2c,0x0d,0x13,0x07,0x2a);
int A[] = int[](0,0,0);
int b(int i, int j, int k, int B) { return T[b(i,B)<<2 | b(j,B)<<1 | b(k,B)]; }
int shuffle(int i, int j, int k) {
return b(i,j,k,0) + b(j,k,i,1) + b(k,i,j,2) + b(i,j,k,3) +
b(j,k,i,4) + b(k,i,j,5) + b(i,j,k,6) + b(j,k,i,7) ;
}
float K(int a, vec3 uvw, vec3 ijk)
{
float s = float(A[0]+A[1]+A[2])/6.0;
float x = uvw.x - float(A[0]) + s,
y = uvw.y - float(A[1]) + s,
z = uvw.z - float(A[2]) + s,
t = 0.6 - x * x - y * y - z * z;
int h = shuffle(int(ijk.x) + A[0], int(ijk.y) + A[1], int(ijk.z) + A[2]);
A[a]++;
if (t < 0.0)
return 0.0;
int b5 = h>>5 & 1, b4 = h>>4 & 1, b3 = h>>3 & 1, b2= h>>2 & 1, b = h & 3;
float p = b==1?x:b==2?y:z, q = b==1?y:b==2?z:x, r = b==1?z:b==2?x:y;
p = (b5==b3 ? -p : p); q = (b5==b4 ? -q : q); r = (b5!=(b4^b3) ? -r : r);
t *= t;
return 8.0 * t * t * (p + (b==0 ? q+r : b2==0 ? q : r));
}
float noise(float x, float y, float z)
{
float s = (x + y + z) / 3.0;
vec3 ijk = vec3(int(floor(x+s)), int(floor(y+s)), int(floor(z+s)));
s = float(ijk.x + ijk.y + ijk.z) / 6.0;
vec3 uvw = vec3(x - float(ijk.x) + s, y - float(ijk.y) + s, z - float(ijk.z) + s);
A[0] = A[1] = A[2] = 0;
int hi = uvw.x >= uvw.z ? uvw.x >= uvw.y ? 0 : 1 : uvw.y >= uvw.z ? 1 : 2;
int lo = uvw.x < uvw.z ? uvw.x < uvw.y ? 0 : 1 : uvw.y < uvw.z ? 1 : 2;
return K(hi, uvw, ijk) + K(3 - hi - lo, uvw, ijk) + K(lo, uvw, ijk) + K(0, uvw, ijk);
}
I translated it from Appendix B from Chapter 2 of Ken Perlin's Noise Hardware at this source:
https://www.csee.umbc.edu/~olano/s2002c36/ch02.pdf
Here is a public shade I did on Shader Toy that uses the posted noise function:
https://www.shadertoy.com/view/3slXzM
Some other good sources I found on the subject of noise during my research include:
https://thebookofshaders.com/11/
https://mzucker.github.io/html/perlin-noise-math-faq.html
https://rmarcus.info/blog/2018/03/04/perlin-noise.html
http://flafla2.github.io/2014/08/09/perlinnoise.html
https://mrl.nyu.edu/~perlin/noise/
https://rmarcus.info/blog/assets/perlin/perlin_paper.pdf
https://developer.nvidia.com/gpugems/GPUGems/gpugems_ch05.html
I highly recommend the book of shaders as it not only provides a great interactive explanation of noise, but other shader concepts as well.
EDIT:
Might be able to optimize the translated code by using some of the hardware-accelerated functions available in GLSL. Will update this post if I end up doing this.
lygia, a multi-language shader library
If you don't want to copy / paste the functions into your shader, you can also use lygia, a multi-language shader library. It contains a few generative functions like cnoise, fbm, noised, pnoise, random, snoise in both GLSL and HLSL. And many other awesome functions as well. For this to work it:
Relays on #include "file" which is defined by Khronos GLSL standard and suported by most engines and enviroments (like glslViewer, glsl-canvas VS Code pluging, Unity, etc. ).
Example: cnoise
Using cnoise.glsl with #include:
#ifdef GL_ES
precision mediump float;
#endif
uniform vec2 u_resolution;
uniform float u_time;
#include "lygia/generative/cnoise.glsl"
void main (void) {
vec2 st = gl_FragCoord.xy / u_resolution.xy;
vec3 color = vec3(cnoise(vec3(st * 5.0, u_time)));
gl_FragColor = vec4(color, 1.0);
}
To run this example I used glslViewer.
Please see below an example how to add white noise to the rendered texture.
The solution is to use two textures: original and pure white noise, like this one: wiki white noise
private static final String VERTEX_SHADER =
"uniform mat4 uMVPMatrix;\n" +
"uniform mat4 uMVMatrix;\n" +
"uniform mat4 uSTMatrix;\n" +
"attribute vec4 aPosition;\n" +
"attribute vec4 aTextureCoord;\n" +
"varying vec2 vTextureCoord;\n" +
"varying vec4 vInCamPosition;\n" +
"void main() {\n" +
" vTextureCoord = (uSTMatrix * aTextureCoord).xy;\n" +
" gl_Position = uMVPMatrix * aPosition;\n" +
"}\n";
private static final String FRAGMENT_SHADER =
"precision mediump float;\n" +
"uniform sampler2D sTextureUnit;\n" +
"uniform sampler2D sNoiseTextureUnit;\n" +
"uniform float uNoseFactor;\n" +
"varying vec2 vTextureCoord;\n" +
"varying vec4 vInCamPosition;\n" +
"void main() {\n" +
" gl_FragColor = texture2D(sTextureUnit, vTextureCoord);\n" +
" vec4 vRandChosenColor = texture2D(sNoiseTextureUnit, fract(vTextureCoord + uNoseFactor));\n" +
" gl_FragColor.r += (0.05 * vRandChosenColor.r);\n" +
" gl_FragColor.g += (0.05 * vRandChosenColor.g);\n" +
" gl_FragColor.b += (0.05 * vRandChosenColor.b);\n" +
"}\n";
The fragment shared contains parameter uNoiseFactor which is updated on every rendering by main application:
float noiseValue = (float)(mRand.nextInt() % 1000)/1000;
int noiseFactorUniformHandle = GLES20.glGetUniformLocation( mProgram, "sNoiseTextureUnit");
GLES20.glUniform1f(noiseFactorUniformHandle, noiseFactor);
FWIW I had the same questions and I needed it to be implemented in WebGL 1.0, so I couldn't use a few of the examples given in previous answers. I tried the Gold Noise mentioned before, but the use of PHI doesn't really click for me. (distance(xy * PHI, xy) * seed just equals length(xy) * (1.0 - PHI) * seed so I don't see how the magic of PHI should be put to work when it gets directly multiplied by seed?
Anyway, I did something similar just without PHI and instead added some variation at another place, basically I take the tan of the distance between xy and some random point lying outside of the frame to the top right and then multiply with the distance between xy and another such random point lying in the bottom left (so there is no accidental match between these points). Looks pretty decent as far as I can see. Click to generate new frames.
(function main() {
const dim = [512, 512];
twgl.setDefaults({ attribPrefix: "a_" });
const gl = twgl.getContext(document.querySelector("canvas"));
gl.canvas.width = dim[0];
gl.canvas.height = dim[1];
const bfi = twgl.primitives.createXYQuadBufferInfo(gl);
const pgi = twgl.createProgramInfo(gl, ["vs", "fs"]);
gl.canvas.onclick = (() => {
twgl.bindFramebufferInfo(gl, null);
gl.useProgram(pgi.program);
twgl.setUniforms(pgi, {
u_resolution: dim,
u_seed: Array(4).fill().map(Math.random)
});
twgl.setBuffersAndAttributes(gl, pgi, bfi);
twgl.drawBufferInfo(gl, bfi);
});
})();
<script src="https://twgljs.org/dist/4.x/twgl-full.min.js"></script>
<script id="vs" type="x-shader/x-vertex">
attribute vec4 a_position;
attribute vec2 a_texcoord;
void main() {
gl_Position = a_position;
}
</script>
<script id="fs" type="x-shader/x-fragment">
precision highp float;
uniform vec2 u_resolution;
uniform vec2 u_seed[2];
void main() {
float uni = fract(
tan(distance(
gl_FragCoord.xy,
u_resolution * (u_seed[0] + 1.0)
)) * distance(
gl_FragCoord.xy,
u_resolution * (u_seed[1] - 2.0)
)
);
gl_FragColor = vec4(uni, uni, uni, 1.0);
}
</script>
<canvas></canvas>
When rendering a scene of textured polygons, I'd like to be able to switch between rendering in the original colors and a "grayscale" mode. I've been trying to achieve this using blending and color matrix operations; none of it worked (with blending I couldn't find a glBlendFunc() that achieved something remotely resembling to what I wanted, and color matrix operations ...are discussed here).
A solution that comes to mind (but also is rather expensive) is to capture the screen every frame and convert the resulting texture to a grayscale one and display that instead... (Where I said grayscale I actually meant anything with a low saturation, but I'm guessing for most of the possible solutions it won't differ all that much from grayscale).
What other options do I have?
The default OpenGL framebuffer uses the RGB colour-space, which doesn't store an explicit saturation. You need an approach for extracting the saturation, modifying it, and change it back again.
My previous suggestion which simply used the RGB vector length to represent 0 in luminance was incorrect, as it didn't take scaling into account, I apologize.
Credit for the new short snippet goes to the regular user "RTFM_FTW" from ##opengl and ##opengl3 on FreeNode/IRC, and it lets you modify the saturation directly without computing the costly RGB->HSV->RGB conversion, which is exactly what you want. Though the HSV code is inferior with respect to your question, I let it stay.
void main( void )
{
vec3 R0 = texture2DRect( S, gl_TexCoord[0].st ).rgb;
gl_FragColor = vec4( mix( vec3( dot( R0, vec3( 0.2125, 0.7154, 0.0721 ) ) ),
R0, T ), gl_Color.a );
}
If you want more control than just the saturation, you need to convert to HSL or HSV colour-space. As shown below by using a GLSL fragment shader.
Read the OpenGL 3.0 and GLSL 1.30 specification available on http://www.opengl.org/registry to learn how to use GLSL v1.30 functionality.
#version 130
#define RED 0
#define GREEN 1
#define BLUE 2
in vec4 vertexIn;
in vec4 colorIn;
in vec2 tcoordIn;
out vec4 pixel;
Sampler2D tex;
vec4 texel;
const float epsilon = 1e-6;
vec3 RGBtoHSV(vec3 color)
{
/* hue, saturation and value are all in the range [0,1> here, as opposed to their
normal ranges of: hue: [0,360>, sat: [0, 100] and value: [0, 256> */
int sortindex[3] = {RED,GREEN,BLUE};
float rgbArr[3] = float[3](color.r, color.g, color.b);
float hue, saturation, value, diff;
float minCol, maxCol;
int minIndex, maxIndex;
if(color.g < color.r)
swap(sortindex[0], sortindex[1]);
if(color.b < color.g)
swap(sortindex[1], sortindex[2]);
if(color.r < color.b)
swap(sortindex[2], sortindex[0]);
minIndex = sortindex[0];
maxIndex = sortindex[2];
minCol = rgbArr[minIndex];
maxCol = rgbArr[maxIndex];
diff = maxCol - minCol;
/* Hue */
if( diff < epsilon){
hue = 0.0;
}
else if(maxIndex == RED){
hue = ((1.0/6.0) * ( (color.g - color.b) / diff )) + 1.0;
hue = fract(hue);
}
else if(maxIndex == GREEN){
hue = ((1.0/6.0) * ( (color.b - color.r) / diff )) + (1.0/3.0);
}
else if(maxIndex == BLUE){
hue = ((1.0/6.0) * ( (color.r - color.g) / diff )) + (2.0/3.0);
}
/* Saturation */
if(maxCol < epsilon)
saturation = 0;
else
saturation = (maxCol - minCol) / maxCol;
/* Value */
value = maxCol;
return vec3(hue, saturation, value);
}
vec3 HSVtoRGB(vec3 color)
{
float f,p,q,t, hueRound;
int hueIndex;
float hue, saturation, value;
vec3 result;
/* just for clarity */
hue = color.r;
saturation = color.g;
value = color.b;
hueRound = floor(hue * 6.0);
hueIndex = int(hueRound) % 6;
f = (hue * 6.0) - hueRound;
p = value * (1.0 - saturation);
q = value * (1.0 - f*saturation);
t = value * (1.0 - (1.0 - f)*saturation);
switch(hueIndex)
{
case 0:
result = vec3(value,t,p);
break;
case 1:
result = vec3(q,value,p);
break;
case 2:
result = vec3(p,value,t);
break;
case 3:
result = vec3(p,q,value);
break;
case 4:
result = vec3(t,p,value);
break;
default:
result = vec3(value,p,q);
break;
}
return result;
}
void main(void)
{
vec4 srcColor;
vec3 hsvColor;
vec3 rgbColor;
texel = Texture2D(tex, tcoordIn);
srcColor = texel*colorIn;
hsvColor = RGBtoHSV(srcColor.rgb);
/* You can do further changes here, if you want. */
hsvColor.g = 0; /* Set saturation to zero */
rgbColor = HSVtoRGB(hsvColor);
pixel = vec4(rgbColor.r, rgbColor.g, rgbColor.b, srcColor.a);
}
If you're working against a modern-enough OpenGL, I would say pixel shaders is a very suitable solution here. Either by hooking into each polygon's shading as they render, or by doing a single full-screen quad in a second pass that just reads each pixel, converts to grayscale, and writes it back. Unless your resolution, graphics hardware, and target framerate are somehow "extrem", that should be doable these days in most cases.
For most Desktops Render-To-Texture isn't that expensive anymore, all of compiz, aero, etc and effects like bloom or depth of field seen in recent titles depend on it.
Actually you don't convert the screen texture per se to grayscale, you would want to draw a scree-sized quad with the texture and a fragment shader transforming the valures to grayscale.
Another option is to have two sets of fragment shaders for your triangles, one just copying the gl_FrontColor attribute as the fixed function pieline would, and another that writes grayscale values to the screen buffer.
A third option might be indexed color modes, if you set uüp a grayscale palette, but that mode might be deprecated and poorly supported by now; plus you lose a lot of functionality like blending, if I remember correctly.