How to provide 14 float values per vertex to the shader? - c++

I have downloaded a model, and have exported it as .fbx. The model contains several animations (6 to be precise), and I want to have one of them displayed. Following Anton Gerdelan's book on openGL I wrote an importer, which uses assimp to parse the model, buffers the relevant vertex data and retrieves the offset matrices needed for the animation.
Not having much experience with skeletal animation, I think I've been able to make the necessary changes to anton's importer, so that it can work on the more complex model that I need. However, the importer assumes that each vertex is only influenced by 1 bone, which unfortunately is not the case.
After some tinkering, I figured out that each vertex of the model can be influenced by at most 14 bones at a time. Since I am not sure how I could pass 14 values to the shader containing boneId and the relevant weight I tried changing the code to accommodate up to 4 bones at a time. This is the code that parses the bone id & weights and buffers them:
*bone_count = (int)mesh->mNumBones;
char bone_names[256][64];
struct vertexdata {
int IDs[4];
float Weights[4];
int ptr;
};
vector<vertexdata> vdata;
vdata.resize(*point_count);
for (int i = 0; i < *point_count; i++) {
vdata[i].ptr = 0;
}
for (int b_i = 0; b_i < *bone_count; b_i++) {
const aiBone* bone = mesh->mBones[b_i];
strcpy(bone_names[b_i], bone->mName.data);
printf("bone_names[%i]=%s\n", b_i, bone_names[b_i]);
bone_offset_mats[b_i] = convert_assimp_matrix(bone->mOffsetMatrix);
//getting weights for each bone
int num_weights = (int)bone->mNumWeights;
for (int w_i = 0; w_i < num_weights; w_i++) {
aiVertexWeight weight = bone->mWeights[w_i];
int vid = weight.mVertexId;
float vweight = weight.mWeight;
if (vdata[vid].ptr < 4) {
vdata[vid].IDs[vdata[vid].ptr] = b_i;
vdata[vid].Weights[vdata[vid].ptr] = vweight;
vdata[vid].ptr++;
}
int vertex_id = (int)weight.mVertexId;
}
}
//buffering bone id data
GLuint vbo1;
glGenBuffers(1, &vbo1);
glBindBuffer(GL_ARRAY_BUFFER, vbo1);
glBufferData(GL_ARRAY_BUFFER, sizeof(vdata[0]) * vdata.size(), &vdata[0], GL_STATIC_DRAW);
glEnableVertexAttribArray(3);
glVertexAttribIPointer(3, 4, GL_INT, sizeof(vertexdata), (const GLvoid*)0);
glEnableVertexAttribArray(4);
glVertexAttribPointer(4, 4, GL_FLOAT, GL_FALSE, sizeof(vertexdata), (const GLvoid*)16);
and in the shaders:
vertex shader
#version 330 core
layout (location = 0) in vec3 pos;
layout (location = 1) in vec3 norm;
layout (location = 2) in vec2 UV;
layout (location = 3) in ivec4 boneIDs;
layout (location = 4) in vec4 Weights;
uniform mat4 view, projection, model;
uniform mat4 bone_matrices[40];
out vec2 tCoords;
void main()
{
mat4 boneTransform = bone_matrices[boneIDs[0]] * Weights[0];
boneTransform += bone_matrices[boneIDs[1]] * Weights[1];
boneTransform += bone_matrices[boneIDs[2]] * Weights[2];
boneTransform += bone_matrices[boneIDs[3]] * Weights[3];
tCoords = UV;
gl_Position = projection * view * boneTransform * model * vec4(pos, 1.0);
}
fragment shader
#version 330 core
in vec2 tCoords;
out vec4 fragColour;
uniform sampler2D tex;
void main()
{
fragColour = texture(tex, tCoords);
}
The model is rendered properly, but I am not observing any movement. Again, not knowing much about skeletal animation, I can only assume that it's because I haven't included every bone that influences each vertex, and the corresponding weight. However, when buffering the data the shaders only accept up to vec4 aka 4 values per vertex. How can I pass 14 IDs and 14 weights? Could this be the cause of the animation not working?

Related

How to update array of matrices to glsl shader

I'm currently working with skeletal animation and I'm really close to getting it working. Currently, I have a struct that has a matrix with 100 spots ( this is so that I can max have 100 joints ) like so :
struct skelShader {
glm::mat4 currentJointTrans[100];
};
The struct should be binded in the shader, I've done it like this:
glGenBuffers(1, &sksBuff);
glBindBuffer(GL_UNIFORM_BUFFER, sksBuff);
// bind buffer to work further with it...
// allocate memory for the buffer in the GPU
glBufferData(GL_UNIFORM_BUFFER, sizeof(skelShader), NULL, GL_STATIC_DRAW);
// because we hard-coded "binding=3" in the shader code we can do this:
// bind Uniform Buffer to binding point 3 (without caring about index of UBO)
glBindBufferBase(GL_UNIFORM_BUFFER, 4, sksBuff);
// good practice, unbind buffer
glBindBuffer(GL_UNIFORM_BUFFER, 0);
sksBuff is just an GLuint.
I fill this array with new values every render/frame that goes by, these values are the new transformations for the joints. I do it like this:
for (int i = 0; i < skeleton.size(); i++) {
globalSkelInfo.currentJointTrans[i] = skeleton[i]->transformMat[currentFrame - 1] * skeleton[i]->globalBindPosMat;
}
This is working correctly for the root joint, but the rest of the joints/mesh remains in bind pose. The problem should be located in where I update the array. Currently I do it like this in the render function after I've done the multiplication for each joint:
for (int i = 0; i < skeleton.size(); i++) {
glUniformMatrix4fv(glGetUniformLocation(aShaderProgram, ("currentJointTrans[" + std::to_string(i) + "]").c_str()),
1, GL_FALSE, glm::value_ptr(globalSkelInfo.currentJointTrans[i]));
}
After this I draw. The root joints values seem to be moving correctly, but the rest of the mesh is in bindpose and doesn't move. In the Vertex Shader I try to update the matrix like this:
#version 440
const int maxJoints = 100;
const int maxWeights = 4;
layout(location = 0) in vec3 vertex_position;
layout(location = 1) in vec2 vertex_UV;
layout(location = 2) in vec3 vertex_normal;
layout(location = 3) in vec4 vertex_weight;
layout(location = 4) in ivec4 vertex_controllers;
out vec2 outUVs;
out vec3 outNorm;
layout(binding = 3 , std140) uniform uniformBlock
{
vec3 camPos;
mat4 world;
mat4 LookAt;
mat4 projection;
mat4 MVP;
};
layout(binding = 4 , std140) uniform animationStruct
{
mat4 currentJointTrans[maxJoints];
};
void main() {
vec4 finalModelPos = vec4(0.0);
vec4 finalNormal = vec4(0.0);
for (int i = 0; i < 4; i++) {
mat4 jointTrans = currentJointTrans[vertex_controllers[i]];
vec4 posePos = jointTrans * vec4(vertex_position, 1.0);
finalModelPos += posePos * vertex_weight[i];
vec4 worldNormal = jointTrans * vec4(vertex_normal, 0.0);
finalNormal += worldNormal * vertex_weight[i];
}
gl_Position = MVP * finalModelPos;
outNorm = finalNormal.xyz;
outUVs = vertex_UV;
}
My theory is that the updating of the struct skelShader with my currentJointTrans array is incorrect. Any tips on how I should do this instead?
glUniform* calls cannot set data in uniform buffers. Indeed, the whole point of uniform buffers is that the uniform data comes from a buffer object. That's why you had to create one.
So if you want to set the uniform data for a uniform block, you set that data into the buffer object.

Can I somehow render more stuff (opengl)

I'm trying to render lots of stuff with OpenGL 3.3 Am i missing some tricks to make this faster?
Does it matter if I use glBufferData or glBufferSubData?
I have coded OpenGL for 5 days now, so I know that there are lots of unkown uknowns to me. And those are what i'm looking for, can you point me to any ways of making this even faster?
I think i'm using what's called "Instanced Rendering". All my stuff is rendered via a single glDrawElementsInstancedBaseVertex call.
Did I miss any relevant code? There's so much of it that I can't really paste it all here.
I'v gotten as far as 20000 objects with 24 vertices using the following code:
Called once per mesh at start, not during frames.
void Mesh::initMesh(IndexedModel const & p_model)
{
d->drawCount = p_model.indices.size();
glGenVertexArrays(1, &(d->vertexArrayObject));
glBindVertexArray(d->vertexArrayObject);
glGenBuffers(eNumBuffers, d->vertexArrayBuffers);
glBindBuffer(GL_ARRAY_BUFFER, d->vertexArrayBuffers[ePosition_Vb]);
glBufferData(GL_ARRAY_BUFFER, sizeof(p_model.positions[0]) * p_model.positions.size(), p_model.positions.data(), GL_STATIC_DRAW);
glEnableVertexAttribArray(0);
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 0, 0);
glBindBuffer(GL_ARRAY_BUFFER, d->vertexArrayBuffers[eTexCoord_Vb]);
glBufferData(GL_ARRAY_BUFFER, sizeof(p_model.texCoords[0]) * p_model.texCoords.size(), p_model.texCoords.data(), GL_STATIC_DRAW);
glEnableVertexAttribArray(1);
glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 0, 0);
glBindBuffer(GL_ARRAY_BUFFER, d->vertexArrayBuffers[eNormal_Vb]);
glBufferData(GL_ARRAY_BUFFER, sizeof(p_model.normals[0]) * p_model.normals.size(), p_model.normals.data(), GL_STATIC_DRAW);
glEnableVertexAttribArray(2);
glVertexAttribPointer(2, 3, GL_FLOAT, GL_FALSE, 0, 0);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, d->vertexArrayBuffers[eIndex_Vb]);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(unsigned int) * p_model.indices.size(), p_model.indices.data(), GL_STATIC_DRAW);
GLint mat4_pos0 = 3;
GLint shinyPos = 7;
GLint materialPos = 8;
glBindBuffer(GL_ARRAY_BUFFER, d->vertexArrayBuffers[eModel_Vb]);
for (unsigned int i = 0; i < 4; i++)
{
glEnableVertexAttribArray(mat4_pos0 + i);
glVertexAttribPointer(mat4_pos0 + i, 4, GL_FLOAT, GL_FALSE, sizeof(glm::mat4),
(const GLvoid*)(sizeof(GLfloat) * i * 4));
glVertexAttribDivisor(mat4_pos0 + i, 1);
}
glBindBuffer(GL_ARRAY_BUFFER, d->vertexArrayBuffers[eShiny_Vb]);
glEnableVertexAttribArray(shinyPos);
glVertexAttribPointer(shinyPos, 1, GL_FLOAT, GL_FALSE, 0, 0);
glVertexAttribDivisor(shinyPos, 1);
glBindBuffer(GL_ARRAY_BUFFER, d->vertexArrayBuffers[eSpecular_Vb]);
glEnableVertexAttribArray(materialPos);
glVertexAttribPointer(materialPos, 1, GL_FLOAT, GL_FALSE, 0, 0);
glVertexAttribDivisor(materialPos, 1);
}
Called once per frame.
void Mesh::draw(std::vector<Object*> const & p_objects, GLuint p_program)
{
std::vector<glm::mat4> models;
std::vector<glm::float32> shinies;
std::vector<glm::vec3> specularColors;
models.reserve(p_objects.size());
shinies.reserve(p_objects.size());
specularColors.reserve(p_objects.size());
for (int index = 0;
index < p_objects.size();
index++)
{
models.push_back(p_objects[index]->getTransform());
shinies.push_back(p_objects[index]->getShininess());
specularColors.push_back(p_objects[index]->getSpecularColor());
}
unsigned int bytesOfModels = models.size() * sizeof(models[0]);
unsigned int bytesOfShinies = shinies.size() * sizeof(shinies[0]);
unsigned int bytesOfSpecularColors = specularColors.size() * sizeof(specularColors[0]);
glBindBuffer(GL_ARRAY_BUFFER, d->vertexArrayBuffers[eModel_Vb]);
glBufferData(GL_ARRAY_BUFFER, bytesOfModels, models.data(), GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, d->vertexArrayBuffers[eShiny_Vb]);
glBufferData(GL_ARRAY_BUFFER, bytesOfShinies, shinies.data(), GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, d->vertexArrayBuffers[eSpecular_Vb]);
glBufferData(GL_ARRAY_BUFFER, bytesOfSpecularColors, specularColors.data(), GL_DYNAMIC_DRAW);
// glDrawElementsInstanced(GL_TRIANGLES, d->drawCount, GL_UNSIGNED_SHORT, 0, models.size());
// glDrawArraysInstanced(GL_TRIANGLE_FAN, 0, d->drawCount, models.size());
glDrawElementsInstancedBaseVertex(GL_TRIANGLES,
d->drawCount,
GL_UNSIGNED_INT,
0,
p_objects.size(),
0);
}
Called once per frame
void GenericRenderer::renderObjects(std::vector<Object*> p_objects)
{
if (p_objects.empty())
{
return;
}
m_texture->bind(0);
m_shader->bind();
m_shader->updateCamera(m_camera);
m_shader->updateLightSource(*m_light);
m_shader->updateObjects(p_objects);
m_mesh->bind();
for (size_t index = 0;
index < p_objects.size();
index++)
{
p_objects[index]->setOrigin(m_camera);
p_objects[index]->updateTransform();
}
m_mesh->draw(p_objects, m_shader->getProgram());
m_mesh->unbind();
}
Vertex Shader
#version 330
uniform mat4 camera;
layout (location = 0) in vec3 position;
layout (location = 1) in vec2 texCoord;
layout (location = 2) in vec3 normal;
layout (location = 3) in mat4 model;
layout (location = 7) in float materialShininess;
layout (location = 8) in vec3 materialSpecularColor;
out vec3 fragVert;
out vec2 fragTexCoord;
out vec3 fragNormal;
out mat4 fragModel;
out float fragMaterialShininess;
out vec3 fragMaterialSpecularColor;
void main()
{
fragModel = model;
fragTexCoord = texCoord;
fragNormal = normal;
fragVert = position;
fragMaterialShininess = materialShininess;
fragMaterialSpecularColor = materialSpecularColor;
gl_Position = camera * model * vec4(position, 1);
}
Fragment Shader
#version 150
uniform vec3 cameraPosition;
uniform float exposure;
uniform float lightDistanceModifier;
uniform sampler2D tex;
uniform struct Light {
vec3 position;
vec3 intensities; //a.k.a the color of the light
float attenuation;
float ambientCoefficient;
} light;
in vec2 fragTexCoord;
in vec3 fragNormal;
in vec3 fragVert;
in mat4 fragModel;
in float fragMaterialShininess;
in vec3 fragMaterialSpecularColor;
out vec4 finalColor;
void main() {
vec3 normal = normalize(transpose(inverse(mat3(fragModel))) * fragNormal);
vec3 surfacePos = vec3(fragModel * vec4(fragVert, 1));
vec4 surfaceColor = texture(tex, fragTexCoord);
vec3 surfaceToLight = normalize(light.position - surfacePos);
vec3 surfaceToCamera = normalize(cameraPosition - surfacePos);
//ambient
vec3 ambient = light.ambientCoefficient * surfaceColor.rgb * light.intensities;
//diffuse
float diffuseCoefficient = max(0.0, dot(normal, surfaceToLight));
vec3 diffuse = diffuseCoefficient * surfaceColor.rgb * light.intensities;
//specular
float specularCoefficient = 0.0;
if(diffuseCoefficient > 0.0)
specularCoefficient = pow(max(0.0, dot(surfaceToCamera, reflect(-surfaceToLight, normal))), fragMaterialShininess);
vec3 specular = specularCoefficient * fragMaterialSpecularColor * light.intensities;
//attenuation
float distanceToLight = length(light.position - surfacePos);
distanceToLight *= lightDistanceModifier;
float attenuation = 1.0 / (1.0 + light.attenuation * pow(distanceToLight, 2));
//linear color (color before gamma correction)
vec3 linearColor = ambient + attenuation*(diffuse + specular);
//final color (after gamma correction)
vec3 gamma = vec3(1.0/2.2);
vec3 mapped = vec3(1.0) - exp(-linearColor * exposure);
mapped = pow(mapped, vec3(1.0 / gamma));
finalColor = vec4(mapped, surfaceColor.a);
}
OpenGL state changes are very expensive. If you are rendering 20000 objects individually per frame then you re most likely CPU bound. Your goal should be to render as many vertices as possible with as few state changes as possible.
If your 20000 objects are all using the same model then your situation is a prime candidate for instanced rendering. Instanced rendering lets you render the same model thousands of times in one draw call. If you couple this with a separate vertex buffer that contains WVP matrices for each model then you can render each of those model instances at a unique location within the world.
Be warned though, instanced rendering isn't some sort of panacea to all your draw call woes. It has it's own unique overhead with constructing a buffer of MVP matrices on the CPU each frame. If the number of instances you're rendering isn't at least in the hundreds you'll likely see worse performance than your current rendering method.
EDIT: You already using instanced rendering, my apologies.
After reading your code more thoroughly you are likely right in your assumption that you're GPU bound. However, it's not currently clear why you are constructing specular and shininess buffers once per frame when these attributes tend to remain constant for a material.

GLSL/OpenGL shader tessellation flickering and failure

I just started with OpenGL tessellation and have run into a bit a trouble. I am tessellating series of patches formed by one vertex each. These vertices/patches are structured in a gridlike fashion to later form a terrain generated by Perlin Noise.
The problem I have run into is that starting from the second patch, and every 5th patch after that, sometimes have a lot of tessellation (not the way i configured) but most of the time it doesn't get tessellated at all.
Like so:
The two white circles mark the highly/over tessellated patches. Also note the pattern of untessellated patches.
The strange thing is that it works on my Surface Pro 2 (Intel HD4400 graphics) but bugs on my main desktop computer (AMD HD6950 graphics). Is it possible the hardware is bad?
The patches are generated with the code:
vec4* patches = new vec4[m_patchesWidth * m_patchesDepth];
int c = 0;
for (unsigned int z = 0; z < m_patchesDepth; ++z) {
for (unsigned int x = 0; x < m_patchesWidth; ++x) {
patches[c] = vec4(x * 1.5f, 0, z * 1.5f, 1.0f);
c++;
}
}
m_fxTerrain->Apply();
glGenBuffers(1, &m_planePatches);
glBindBuffer(GL_ARRAY_BUFFER, m_planePatches);
glBufferData(GL_ARRAY_BUFFER, m_patchesWidth * m_patchesDepth * sizeof(vec4), patches, GL_STATIC_DRAW);
GLuint loc = m_fxTerrain->GetAttrib("posIn");
glEnableVertexAttribArray(loc);
glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, sizeof(vec4), nullptr);
delete(patches);
And drawn with:
glPatchParameteri(GL_PATCH_VERTICES, 1);
glBindVertexArray(patches);
glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
glDrawArrays(GL_PATCHES, 0, nrOfPatches);
Vertex Shader:
#version 430 core
in vec4 posIn;
out gl_PerVertex {
vec4 gl_Position;
};
void main() {
gl_Position = posIn;
}
Control shader:
#version 430
#extension GL_ARB_tessellation_shader : enable
layout (vertices = 1) out;
uniform float OuterTessFactor;
uniform float InnerTessFactor;
out gl_PerVertex {
vec4 gl_Position;
} gl_out[];
void main() {
if (gl_InvocationID == 0) {
gl_TessLevelOuter[0] = OuterTessFactor;
gl_TessLevelOuter[1] = OuterTessFactor;
gl_TessLevelOuter[2] = OuterTessFactor;
gl_TessLevelOuter[3] = OuterTessFactor;
gl_TessLevelInner[0] = InnerTessFactor;
gl_TessLevelInner[1] = InnerTessFactor;
}
gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;
}
Evaluation shader:
#version 430
#extension GL_ARB_tessellation_shader : enable
layout (quads, equal_spacing, ccw) in;
uniform mat4 ProjView;
uniform sampler2D PerlinNoise;
out vec3 PosW;
out vec3 Normal;
out vec4 ColorFrag;
out gl_PerVertex {
vec4 gl_Position;
};
void main() {
vec4 pos = gl_in[0].gl_Position;
pos.xz += gl_TessCoord.xy;
pos.y = texture2D(PerlinNoise, pos.xz / vec2(8, 8)).x * 10.0f - 10.0f;
Normal = vec3(0, 1, 0);
gl_Position = ProjView * pos;
PosW = pos.xyz;
ColorFrag = vec4(pos.x / 64.0f, 0.0f, pos.z / 64.0f, 1.0f);
}
Fragment shader:
#version 430 core
in vec3 PosW;
in vec3 Normal;
in vec4 ColorFrag;
in vec4 PosH;
out vec3 FragColor;
out vec3 FragNormal;
void main() {
FragNormal = Normal;
FragColor = ColorFrag.xyz;
}
I have tried to hardcode the different tessellation levels but that did not help. I recently started out with OpenGL so please let me know if i am doing something stupid.
So does anyone have any idea what could be causing this "flickering" of certain patches?
Update: I had a friend run the project and he got the same pattern of flickering tessellation but the failing patches were not drawn at all except when being overly tessellated. He has the same graphics card as I do (AMD HD6950).
You should use triangle/quad tessellation, in which each patch has 3 or 4 vertices. As I can see, you use quads (I use them too). In that case, you can set it like this:
glPatchParameteri(GL_PATCH_VERTICES,4);
glBindVertexArray(VertexArray);
(TIP: use drawelements for your terrain, much better performance for 2D-displacement based mesh.)
In the control shader, use
layout (vertices = 4) out;
since your patch has 4 control points. The ordering is still important (CCW/CW).
Personally I don't like to use built-in variables, so for the vertex shader you can send your vertex data to the tesscontrol like this:
layout (location = 0) out vec3 outPos;
....
outPos.xz = grid.xy;
outPos.y = noise(outPos.xz);
Tess control:
layout (location = 0) in vec3 inPos[]; //outPos (location = 0) from vertex shader
//'collects' the 4 control points to an array in the order they're sended
layout (location = 0) out vec3 outPos[]; //send the c.points to the ev. shader
...
gl_TessLevelOuter[0] = outt[0];
gl_TessLevelOuter[1] = outt[1];
gl_TessLevelOuter[2] = outt[2];
gl_TessLevelOuter[3] = outt[3];
gl_TessLevelInner[0] = inn[0];
gl_TessLevelInner[1] = inn[1];
outPos[ID] = inPos[ID];//gl_invocationID = ID
Note that both in and out vertex data is an array.
The tessev is simple:
layout (location = 0) in vec3 inPos[]; //the 4 control points
layout (location = 0) out vec3 outPos; //this is no longer array, next is the fragment shader
...
//edit: do not forgot to add the next line
layout (quads) in;
vec3 interpolate3D(vec3 v0, vec3 v1, vec3 v2, vec3 v3) //linear interpolation for x,y,z coords on the quad
{
return mix(mix(v0,v1,gl_TessCoord.x),mix(v3,v2,gl_TessCoord.x),gl_TessCoord.y);
};
...main{...
outPos = interpolate3D(inPos[0],inPos[1],inPos[2],inPos[3]); //the four control points of the quad. Every other point is linearly interpolated between them according to the TessCoord.
gl_Position = mvp * vec4(outPos,1.0f);
A good representation of the quad domain: http://ogldev.atspace.co.uk/www/tutorial30/tutorial30.html.
I think the problem is with your one-vertex patch. I cannot imagine how a one vertex path can be divided into triangles, I don't know how it works on another hardware. The tessellation is for divide primitives into other simple primitives, to triangles in case of OGL, since it can be handled by a GPU easily (3 points always lie in a plane). So, the minimum number of patch vertices should be 3, for a triangle. I like quads, because it simplier to index, and the memory cost is less. It will be divided into triangles too during tessellation. http://www.informit.com/articles/article.aspx?p=2120983
Also, there is another type, the isoline tessellation. (check out the links, the second is pretty good.)
All in all, try it with quads or triangles, and set the control vertices to 4 (or 3). My (pretty complex) terrain shader is here with frustum culling, tessellation shader culling for a geoclipmap based terrain. Also, without tessellation it works with vertex morph in vertex shader. Maybe some part of this code will be useful. http://speedy.sh/TAvPR/gshader.txt
A scene with tessellation at about 4 pixels/triangle runs at 75 FPS (with fraps) with runtime normal calculation and bicubic smoothing and other things. I'm using AMD HD 5750. It still could be much faster with better code and pre-baked normals:D. (runs at max 120 w/o normal calc.)
Oh, and you can only send the x and z coords if you displace the vertex in the shader. It will be faster too.
Lots of vertices.

Skeletal Animation ussing ASSIMP and GLSL: bone uniform array size

I'm working on an ASSIMP skeletal animation loader and renderer and right now all the data is correctly loaded and interpolated at its current timeframe. However, there is still one part that isn't working as it should and that's the vertex shader stage.
Via a VBO I pass in two vec4s that contain the bone IDs and the weights for each vertex (up to a maximum of 4 bones/weights per vertex) and the vertex shader has a matrix array of 100 bone transformations (pre-calculated per frame) that are indexed via the bone IDs.
However, it seems that the bones uniform doesn't contain the proper transformations. For debugging purposes I colored the model with the weight values and the bone IDs value and they contain a color (and thus valid values). However, when I transform my vertex via the bone transformation and color the model with the result, the entire model is colored black, meaning the transformation matrices are all 0.0. So they're not initialized properly.
I think the problem is with passing the matrices to the uniform array, or perhaps the maximum size of uniforms allowed (I also tried setting the number of uniform matrices to 32 (number of bones on current model) but without effect)?
Before passing the information to the shader, the transformation matrices are indeed valid matrices (not identity/empty matrices) so the fault should probably be in the GLSL shader or the passing of the uniforms.
The following code is from the vertex shader:
#version 330
layout (location = 0) in vec3 position;
layout(location = 1) in vec3 normal;
layout(location = 2) in vec3 tangent;
layout(location = 3) in vec3 color;
layout(location = 4) in vec2 texCoord;
layout(location = 5) in ivec4 boneIDs;
layout(location = 6) in vec4 weights;
uniform mat4 projection;
uniform mat4 view;
uniform mat4 model;
uniform mat4 bones[100];
out vec2 TexCoord;
out vec4 colorz;
void main()
{
vec4 newPos = vec4(position, 1.0);
colorz = vec4(0.0, 1.0, 0.0, 1.0);
if (weights != vec4(0.0, 0.0, 0.0, 0.0))
{
mat4 boneTransform = bones[boneIDs[0]] * weights[0];
boneTransform += bones[boneIDs[1]] * weights[1];
boneTransform += bones[boneIDs[2]] * weights[2];
boneTransform += bones[boneIDs[3]] * weights[3];
// newPos = boneTransform * vec4(position, 1.0);
vec4 test = vec4(1.0);
colorz = boneTransform * test;
// newPos = boneTransform * newPos;
}
TexCoord = texCoord;
gl_Position = projection * view * model * newPos;
}
The following snippet of code pass the matrix data to the GLSL shader:
// Sets bone transformation matrices
void Shader::SetBoneMatrix(GLint index, aiMatrix4x4 matrix)
{
glm::mat4 mat = glm::transpose(glm::make_mat4(&matrix.a1));
glUniformMatrix3fv(boneLocations[index], 1, GL_FALSE, glm::value_ptr(mat));
}
Also the code that gets all the uniform locations of the bones array:
for(unsigned int i = 0; i < 100; i++)
{
string name = "bones[";
string number;
stringstream ss;
ss << i;
ss >> number;
name += number;
name += ']';
boneLocations[i] = glGetUniformLocation(this->program, name.c_str());
}
Oké, via glslDevil I came across a continous GL_INVALID_OPERATION error when setting the bone matrix to the shader via glUniformMatrix. The origin of the problem was indeed at the stage where the program passes the information along to the shader.
It is quite a stupid mistake actually since I'm using glUniformMatrix3f instead of glUniformMatrix4f. Changing this did indeed solve the problem and the animations are working perfectly right now.

GLSL Instancing - Max number of inputs for vertex data?

I am trying to implement instancing in my OpenGL program. I got it to work, and then decided to make my GLSL code more efficient by sending the Model-View-Projection multiplication matrix as input to the GLSL program, so that the CPU computes it for each instance, opposed to the GPU. Here is my vertex shader code (most of it is irrelevant to my question):
#version 330 core
// Input vertex data, different for all executions of this shader.
layout(location = 0) in vec3 vertexPosition_modelspace;
layout(location = 2) in vec3 vertexColor;
layout(location = 3) in vec3 vertexNormal_modelspace;
layout(location = 6) in mat4 models;
layout(location = 10) in mat4 modelsV;
layout(location = 14) in mat4 modelsVP;
// Output data ; will be interpolated for each fragment.
out vec3 newColor;
out vec3 Position_worldspace;
out vec3 Normal_cameraspace;
out vec3 EyeDirection_cameraspace;
// Values that stay constant for the whole mesh.
uniform mat4 MVP;
uniform mat4 MV;
uniform mat4 P;
uniform mat4 V;
uniform mat4 M;
uniform int num_lights;
uniform vec3 Lights[256];
void main(){
// Output position of the vertex, in clip space : MVP * position
gl_Position = P * modelsV * vec4(vertexPosition_modelspace,1);
// Position of the vertex, in worldspace : M * position
Position_worldspace = (models * vec4(vertexPosition_modelspace,1)).xyz;
// Vector that goes from the vertex to the camera, in camera space.
// In camera space, the camera is at the origin (0,0,0).
vec3 vertexPosition_cameraspace = ( modelsV * vec4(vertexPosition_modelspace,1)).xyz;
EyeDirection_cameraspace = vec3(0,0,0) - vertexPosition_cameraspace;
// Normal of the the vertex, in camera space
Normal_cameraspace = ( modelsV * vec4(vertexNormal_modelspace,0)).xyz;
// UV of the vertex. No special space for this one.
newColor = vertexColor;
}
The above code works, but only because I'm not using the last input modelsVP to calculate gl_position. If I do use it (instead of computing P*modelsV), the instances won't be drawn, and I get this error:
Linking program
Compiling shader : GLSL/meshColor.vertexshader
Compiling shader : GLSL/meshColor.fragmentshader
Linking program
Vertex info
0(10) : error C5102: input semantic attribute "ATTR" has too big of a numeric index (16)
0(10) : error C5102: input semantic attribute "ATTR" has too big of a numeric index (16)
0(10) : error C5041: cannot locate suitable resource to bind variable "modelsVP". Possibly large array.
I'm sure I'm linking it correctly in my OpenGL code, because if I swap the input location modelsVP with modelsV so that it is 10 instead of 14, I am able to use it, but not modelsV. Is there a maximum number of inputs you can have for your vertex shader? I really can't think of any other idea of why else I would get this error...
I'll include more of my OpenGL code that is relevant here, but I'm pretty sure that it's correct (it's not all in the same class or method):
// Buffer data for VBO. The numbers must match the layout in the GLSL code.
#define position 0
#define uv 1
#define color 2
#define normal 3
#define tangent 4
#define bitangent 5
#define model 6 // 4x4 matrices take 4 positions
#define modelV 10
#define modelVP 14
#define num_buffers 18
GLuint VBO[num_buffers];
glGenBuffers(num_buffers, VBO);
for( int i=0; i<ModelMatrices.size(); i++ )
{
mvp.push_back( projection * view * ModelMatrices.at(i) );
mv.push_back( view * ModelMatrices.at(i) );
}
glBindBuffer(GL_ARRAY_BUFFER, VBO[model]);
glBufferData(GL_ARRAY_BUFFER, sizeof(glm::mat4) * ModelMatrices.size(), &ModelMatrices[0], GL_DYNAMIC_DRAW);
for (unsigned int i = 0; i < 4 ; i++) {
glEnableVertexAttribArray(model + i);
glVertexAttribPointer(model + i, 4, GL_FLOAT, GL_FALSE, sizeof(glm::mat4),
(const GLvoid*)(sizeof(GLfloat) * i * 4));
glVertexAttribDivisor(model + i, 1);
}
glBindBuffer(GL_ARRAY_BUFFER, VBO[modelV]);
glBufferData(GL_ARRAY_BUFFER, sizeof(glm::mat4) * mv.size(), &mv[0], GL_DYNAMIC_DRAW);
for (unsigned int i = 0; i < 4 ; i++) {
glEnableVertexAttribArray(modelV + i);
glVertexAttribPointer(modelV + i, 4, GL_FLOAT, GL_FALSE, sizeof(glm::mat4),
(const GLvoid*)(sizeof(GLfloat) * i * 4));
glVertexAttribDivisor(modelV + i, 1);
}
glBindBuffer(GL_ARRAY_BUFFER, VBO[modelVP]);
glBufferData(GL_ARRAY_BUFFER, sizeof(glm::mat4) * mvp.size(), &mvp[0], GL_DYNAMIC_DRAW);
for (unsigned int i = 0; i < 4 ; i++) {
glEnableVertexAttribArray(modelVP + i);
glVertexAttribPointer(modelVP + i, 4, GL_FLOAT, GL_FALSE, sizeof(glm::mat4), (const GLvoid*)(sizeof(GLfloat) * i * 4));
glVertexAttribDivisor(modelVP + i, 1);
}
OpenGL mandates implementations offer a minimum of 16 4-component vertex attributes. Therefore an index of 16 is not guaranteed to be supported by all implementations; see GL_MAX_VERTEX_ATTRIBS for more details.
Your mat4 vertex attributes count as 4 4-component attributes, so an index of 14 is out of range on implementations that only support 16 4-component vertex attributes.
You are using too many vertex attributes. Here's how to reduce the number of attributes without changing anything much about your code (and any functional changes are improvements). The following assumes that models is the "model-to-world" matrix, modelsV is the "model-to-camera" matrix, and that modelsVP is the "model-to-projection" matrix:
#version 330 core
// Input vertex data, different for all executions of this shader.
layout(location = 0) in vec3 vertexPosition_modelspace;
layout(location = 2) in vec3 vertexColor;
layout(location = 3) in vec3 vertexNormal_modelspace;
layout(location = 6) in mat4 modelsV;
// Output data ; will be interpolated for each fragment.
out vec3 newColor;
//The fragment shader should work in *camera* space, not world space.
out vec4 Position_cameraspace;
out vec3 Normal_cameraspace;
//out vec3 EyeDirection_cameraspace; Can be computed from Position_cameraspace in the FS.
// Values that stay constant for the whole mesh.
uniform mat4 P;
void main()
{
Position_cameraspace = modelsV * vec4(vertexPosition_modelspace, 1.0);
gl_Position = P * Position_cameraspace;
Normal_cameraspace = ( modelsV * vec4(vertexNormal_modelspace,0)).xyz;
newColor = vertexColor;
}
See? Isn't that much simpler? Fewer uniforms in the vertex shader, fewer outputs to the fragment shader, fewer math computations, and fewer vertex attributes.
All you need to do is change your fragment shader to use the camera-space position, rather than the world-space position. Which should be a reasonably easy change.