How to accurately time performance of intensive vertex shader? [closed] - c++

Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 10 months ago.
Improve this question
I have a compute-heavy OpenGL vertex shader which I'm trying to profile the performance of.
Following the conventional wisdom
¹ ² ³, I'm computing the frames per second in my glfw app by waiting over 1 second and dividing the number of frames by the time elapsed. My FPS counter claims ≈30 FPS but it's clearly more like 1 FPS. Notice the grass blowing in the breeze behind the screen.
My minimal example below and in this gist, animates a densely tessellated grid and performs dummy computation in the vertex shader until the issue appears.
Is there a way to measure FPS or the performance of this shader in a way that it accurately reflects its real behavior?
// Controls how much (dummy) computation happens in the vertex shader.
const int m = 20000;
#define GL_SILENCE_DEPRECATION
#include <OpenGL/gl3.h>
#define __gl_h_
#include <Eigen/Core>
#include <Eigen/Geometry>
#define GLFW_INCLUDE_GLU
#include <GLFW/glfw3.h>
#include <chrono>
#include <string>
#include <chrono>
#include <thread>
#include <iostream>
std::string vertex_shader = R"(
#version 330 core
uniform mat4 proj;
uniform mat4 model;
uniform float t;
uniform int m;
in vec3 position;
out vec4 position_eye;
void main()
{
vec4 deformed =
vec4(
position.x,
position.y,
sin(t*3.14159)*
cos(position.x*3.14159)*
cos(position.y*3.14159)
,
1.);
for(int j = 0;j<m;j++)
{
deformed.z = deformed.z + 0.000001*float(j)/float(m);
}
position_eye = proj * model * deformed;
gl_Position = position_eye;
}
)";
std::string fragment_shader = R"(
#version 330 core
in vec4 position_eye;
out vec3 color;
void main()
{
vec3 xTangent = dFdx(position_eye.xyz);
vec3 yTangent = dFdy(position_eye.xyz);
color = normalize( cross( yTangent, xTangent ) )*0.5 + 0.5;
}
)";
// width, height, shader id, vertex array object
int w=800,h=600;
double highdpi=1;
GLuint prog_id=0;
GLuint VAO;
// Mesh data: RowMajor is important to directly use in OpenGL
Eigen::Matrix< float,Eigen::Dynamic,3,Eigen::RowMajor> V;
Eigen::Matrix<GLuint,Eigen::Dynamic,3,Eigen::RowMajor> F;
int main(int argc, char * argv[])
{
using namespace std;
const auto get_seconds = []()
{
return
std::chrono::duration<double>(
std::chrono::system_clock::now().time_since_epoch()).count();
};
if(!glfwInit())
{
cerr<<"Could not initialize glfw"<<endl;
return EXIT_FAILURE;
}
const auto & error = [] (int error, const char* description)
{
cerr<<description<<endl;
};
glfwSetErrorCallback(error);
glfwWindowHint(GLFW_SAMPLES, 4);
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 2);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE);
GLFWwindow* window = glfwCreateWindow(w, h, "WebGL", NULL, NULL);
if(!window)
{
glfwTerminate();
cerr<<"Could not create glfw window"<<endl;
return EXIT_FAILURE;
}
glfwMakeContextCurrent(window);
int major, minor, rev;
major = glfwGetWindowAttrib(window, GLFW_CONTEXT_VERSION_MAJOR);
minor = glfwGetWindowAttrib(window, GLFW_CONTEXT_VERSION_MINOR);
rev = glfwGetWindowAttrib(window, GLFW_CONTEXT_REVISION);
printf("OpenGL version recieved: %d.%d.%d\n", major, minor, rev);
printf("Supported OpenGL is %s\n", (const char*)glGetString(GL_VERSION));
printf("Supported GLSL is %s\n", (const char*)glGetString(GL_SHADING_LANGUAGE_VERSION));
glfwSetInputMode(window,GLFW_CURSOR,GLFW_CURSOR_NORMAL);
const auto & reshape = [] (GLFWwindow* window, int w, int h)
{
::w=w,::h=h;
};
glfwSetWindowSizeCallback(window,reshape);
{
int width, height;
glfwGetFramebufferSize(window, &width, &height);
int width_window, height_window;
glfwGetWindowSize(window, &width_window, &height_window);
highdpi = width/width_window;
reshape(window,width_window,height_window);
}
// Compile each shader
const auto & compile_shader = [](const GLint type,const char * str) -> GLuint
{
GLuint id = glCreateShader(type);
glShaderSource(id,1,&str,NULL);
glCompileShader(id);
return id;
};
GLuint vid = compile_shader(GL_VERTEX_SHADER,vertex_shader.c_str());
GLuint fid = compile_shader(GL_FRAGMENT_SHADER,fragment_shader.c_str());
// attach shaders and link
prog_id = glCreateProgram();
glAttachShader(prog_id,vid);
glAttachShader(prog_id,fid);
glLinkProgram(prog_id);
GLint status;
glGetProgramiv(prog_id, GL_LINK_STATUS, &status);
glDeleteShader(vid);
glDeleteShader(fid);
// construct a regular grid mesh
const int nx = 300;
const int ny = 305;
V.resize(nx*ny,3);
for(int i = 0;i<nx;i++)
{
for(int j = 0;j<ny;j++)
{
const float x = float(i)/(nx-1);
const float y = float(j)/(ny-1);
V.row(j*nx+i) << x,y, 0;
}
}
F.resize((nx-1)*(ny-1)*2,3);
for(int y = 0;y<ny-1;y++)
{
for(int x = 0;x<nx-1;x++)
{
// index of southwest corner
const int sw = (x +nx*(y+0));
const int se = (x+1+nx*(y+0));
const int ne = (x+1+nx*(y+1));
const int nw = (x +nx*(y+1));
// Index of first triangle in this square
const int gf = 2*(x+(nx-1)*y);
F(gf+0,0) = sw;
F(gf+0,1) = se;
F(gf+0,2) = nw;
F(gf+1,0) = se;
F(gf+1,1) = ne;
F(gf+1,2) = nw;
}
}
V.rowwise() -= V.colwise().mean();
V /= (V.colwise().maxCoeff()-V.colwise().minCoeff()).maxCoeff();
V /= 1.2;
// Generate and attach buffers to vertex array
glGenVertexArrays(1, &VAO);
GLuint VBO, EBO;
glGenBuffers(1, &VBO);
glGenBuffers(1, &EBO);
glBindVertexArray(VAO);
glBindBuffer(GL_ARRAY_BUFFER, VBO);
glBufferData(GL_ARRAY_BUFFER, sizeof(float)*V.size(), V.data(), GL_STATIC_DRAW);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, EBO);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(GLuint)*F.size(), F.data(), GL_STATIC_DRAW);
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 3 * sizeof(GLfloat), (GLvoid*)0);
glEnableVertexAttribArray(0);
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindVertexArray(0);
double t0 = get_seconds();
const auto draw = [&]()
{
double tic = get_seconds();
// clear screen and set viewport
glClearColor(0.1,0.1,0.1,0.);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glViewport(0,0,w*highdpi,h*highdpi);
// Projection and modelview matrices
Eigen::Matrix4f proj;
float near = 0.01;
float far = 100;
float top = tan(35./360.*M_PI)*near;
float right = top * (double)::w/(double)::h;
float left = -right;
float bottom = -top;
proj.setConstant(4,4,0.);
proj(0,0) = (2.0 * near) / (right - left);
proj(1,1) = (2.0 * near) / (top - bottom);
proj(0,2) = (right + left) / (right - left);
proj(1,2) = (top + bottom) / (top - bottom);
proj(2,2) = -(far + near) / (far - near);
proj(3,2) = -1.0;
proj(2,3) = -(2.0 * far * near) / (far - near);
Eigen::Affine3f model = Eigen::Affine3f::Identity();
model.translate(Eigen::Vector3f(0,0,-1.5));
// select program and attach uniforms
glUseProgram(prog_id);
GLint proj_loc = glGetUniformLocation(prog_id,"proj");
glUniformMatrix4fv(proj_loc,1,GL_FALSE,proj.data());
GLint model_loc = glGetUniformLocation(prog_id,"model");
glUniformMatrix4fv(model_loc,1,GL_FALSE,model.matrix().data());
GLint t_loc = glGetUniformLocation(prog_id,"t");
glUniform1f(t_loc,tic-t0);
GLint m_loc = glGetUniformLocation(prog_id,"m");
glUniform1i(m_loc,m);
// Draw mesh as wireframe
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
glBindVertexArray(VAO);
glDrawElements(GL_TRIANGLES, F.size(), GL_UNSIGNED_INT, 0);
glBindVertexArray(0);
};
// Main display routine
while (!glfwWindowShouldClose(window))
{
double tic = get_seconds();
static size_t count = 0;
static double t_prev = get_seconds();
if(tic-t_prev > 1)
{
const double fps = double(count)/(tic-t_prev);
std::stringstream ss;
ss << fps <<" FPS";
glfwSetWindowTitle(window, ss.str().c_str());
count = 0;
t_prev = tic;
}
count++;
draw();
glfwSwapBuffers(window);
glfwPollEvents();
}
glfwDestroyWindow(window);
glfwTerminate();
return EXIT_SUCCESS;
}

GPU execution is highly parallelised and asynchronous, so timing it in the way you would CPU code is not going to work. Your GPU vendor will have profiling tools you can download which can provide a better insight than this kind of simple time measuring.

Related

Trying to draw sphere in OpenGL, Only part of the sphere is showing, seems clipped?

I created a minimal setup with a fragment shader setting the color to write, so not even a parameter.
The vertex shader passes in a matrix and transforms the points. We can see the sphere, but only part of it.
I hesitate to post the whole code, trying as hard as possible for a minimum working solution but it's about 300 lines including the shader loading code. I will post just the core pieces, and if people want more I will post it all.
Here is the code for the demo including a stripped down Sphere class and glmain.
Not shown is main() which does try..catch and calls glmain
#include <GL/glew.h>
#include "common/common.hh"
#include <glm/glm.hpp>
#include <glm/ext.hpp>
#include <numbers>
#include <iostream>
#include <iomanip>
#include <cstdint>
#include <string>
using namespace std;
using namespace glm;
using namespace std::numbers;
class Sphere {
private:
uint32_t progid; // handle to the shader code
uint32_t vao; // array object container for vbo and indices
uint32_t vbo; // handle to the point data on the graphics card
uint32_t lbo; // handle to buffer of indices for lines for wireframe sphere
uint32_t latRes, lonRes;
uint32_t resolution;
public:
/**
* #brief Construct a sphere
*
* #param r radius of the sphere
* #param latRes resolution of the grid in latitude
* #param lonRes resolution of the grid in latitude
*/
Sphere(double r, uint32_t latRes, uint32_t lonRes);
~Sphere() { cleanup(); }
void render(mat4& trans);
void cleanup();
};
Sphere::Sphere(double r, uint32_t latRes, uint32_t lonRes) : latRes(latRes), lonRes(lonRes),
resolution((2*latRes-2)*lonRes + 2) {
progid = loadShaders( "05_3d.vert", "02simple.frag" );
double dlon = 2.0*numbers::pi / lonRes, dlat = numbers::pi / latRes;
double z;
double lat = -numbers::pi/2 + dlat; // latitude in radians
double rcircle;
float vert[resolution*3]; // x,y,z
uint32_t c = 0;
for (uint32_t j = 0; j < 2*latRes-2; j++, lat += dlat) {
//what is the radius of hte circle at that height?
rcircle = r* cos(lat); // size of the circle at this latitude
z = r * sin(lat); // height of each circle
double t = 0;
for (uint32_t i = 0; i < lonRes; i++, t += dlon) {
vert[c++] = rcircle * cos(t), vert[c++] = rcircle * sin(t);
vert[c++] = z;
}
cout << endl;
}
// south pole
vert[c++] = 0;
vert[c++] = 0;
vert[c++] = -r;
// north pole
vert[c++] = 0;
vert[c++] = 0;
vert[c++] = r;
cout << "resolution: " << resolution << endl;
cout << "predicted num vert components: " << resolution*3 << endl;
cout << "actual num vert components: " << c << endl;
glGenVertexArrays(1, &vao);
glBindVertexArray(vao);
glGenBuffers(1, &vbo);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBufferData(GL_ARRAY_BUFFER, resolution, vert, GL_STATIC_DRAW);
glBindVertexArray(0);
}
void Sphere::render(mat4& trans) {
glUseProgram(progid); // Use the shader
uint32_t matrixID = glGetUniformLocation(progid, "trans");
glUniformMatrix4fv(matrixID, 1, GL_FALSE, &trans[0][0]);
glBindVertexArray(vao);
glVertexAttribPointer(
0, // first parameter to shader, numbered 0
3, // 3 floating point numbers (x,y,z)
GL_FLOAT, // type
GL_FALSE, // normalized?
0, // this is the entire set of data, move on
(void*)0 // array buffer offset
);
glEnableVertexAttribArray(0); // pass x,y to shader
glEnable(GL_PROGRAM_POINT_SIZE);
//points don't work, why not? And how to set the size of the points?
glPointSize(5);
glDrawArrays(GL_POINT, 0, resolution);
// line strips work, but incomplete (see screen shot)
glDrawArrays(GL_LINE_STRIP, 0, resolution);
glDisableVertexAttribArray(0);
}
void Sphere::cleanup() {
glDeleteBuffers(1, &vbo); // remove vbo memory from graphics card
glDeleteVertexArrays(1, &vao); // remove vao from graphics card
glDeleteProgram(progid);
}
using namespace std;
void glmain() {
win = createWindow(800, 800, "Sphere demo");
glClearColor(0.0f, 0.0f, 0.4f, 0.0f); // Dark blue background
Sphere sphere(1.0, 30, 15);
mat4 trans= lookAt(vec3(0,0,0), vec3(10,5,10), vec3(0,1,0));
do {
glClear( GL_COLOR_BUFFER_BIT ); // Clear the screen
//glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glDisable(GL_DEPTH_TEST);
//glDepthFunc(GL_LESS);
sphere.render(trans);
glfwSwapBuffers(win); // double buffer
glfwPollEvents();
} while( glfwGetKey(win, GLFW_KEY_ESCAPE ) != GLFW_PRESS &&
glfwWindowShouldClose(win) == 0 );
}
Points did not display at all so the call is commented out. We drew a line strip instead. That works somewhat. Why is it truncated? Why doesn't it at least finish the layer of the sphere?
The shaders are shown below:
#version 330 core
// Input vertex data, different for all executions of this shader.
layout(location = 0) in vec3 v;
uniform mat4 trans;
void main(){
gl_PointSize = 5;
gl_Position = trans * vec4(v,1.0);
gl_Position.w = 1.0;
}
fragment shader:
#version 330 core
out vec4 color;
void main()
{
color = vec4(1,1,1,1);
}
The size argument of glBufferData specifies the size in bytes of the buffer object's new data store:
glBufferData(GL_ARRAY_BUFFER, resolution, vert, GL_STATIC_DRAW);
glBufferData(GL_ARRAY_BUFFER, resolution * 3 * sizeof(float), vert, GL_STATIC_DRAW);

Polygon tearing in OpenGL

500x500 grid with 1000 sub Divisions:
Just one question.
Why is this happening ?
#include <iostream>
#include <sstream>
#include <vector>
#define GLEW_STATIC
#include <GL/glew.h>
#include <GLFW/glfw3.h>
#include "glm/glm.hpp"
#include "glm/gtc/matrix_transform.hpp"
#include "GameEngine.hpp"
#include "ShaderProgram.h"
#include "Camera.h"
#include "Mesh.h"
const char *title = "Terrain";
GameEngine engine;
OrbitCamera orbitCamera;
float gYaw = 0.0f;
float gPitch = 1.0f;
float gRadius = 200.0f;
const float MOUSE_SENSTIVITY = 0.25f;
bool gWireFrame = false;
void glfw_onKey(GLFWwindow *window, int key, int scancode, int action, int mode);
void glfw_onMouseMove(GLFWwindow *window, double posX, double posY);
void glfw_onMouseScroll(GLFWwindow *window, double deltaX, double deltaY);
int main()
{
if (!engine.init(1024, 768, title))
{
std::cerr << "OpenGL init failed" << std::endl;
std::cin.get();
return -1;
}
//set callbacks
glfwSetKeyCallback(engine.getWindow(), glfw_onKey);
glfwSetCursorPosCallback(engine.getWindow(), glfw_onMouseMove);
std::vector<Vertex> VER;
std::vector<glm::vec3> verts;
std::vector<unsigned int> indices;
std::vector<glm::vec3> norms;
int subDiv = 1000;
int width = 500;
int height = 500;
int size = 0;
for (int row = 0; row < subDiv; row++)
{
for (int col = 0; col < subDiv; col++)
{
float x = (float)((col * width) / subDiv - (width / 2.0));
float z = ((subDiv - row) * height) / subDiv - (height / 2.0);
glm::vec3 pos = glm::vec3(x, 0, z);
verts.push_back(pos);
}
}
size = subDiv * subDiv;
size = verts.size();
for (int row = 0; row < subDiv -1 ; row++)
{
for (int col = 0; col < subDiv -1; col++)
{
int row1 = row * (subDiv);
int row2 = (row+1) * (subDiv);
indices.push_back(row1+col);
indices.push_back(row1+col+1);
indices.push_back( row2+col+1);
indices.push_back(row1+col);
indices.push_back( row2+col+1);
indices.push_back(row2+col);
}
}
for (int i = 0; i < verts.size(); i++)
{
Vertex vertex;
vertex.position = verts[i];
vertex.normal = glm::vec3(0, 0, 0);
vertex.texCoords = glm::vec2(0, 0);
VER.push_back(vertex);
}
VER.begin();
for (int i = 0; i < indices.size(); i += 3)
{
Vertex a = VER[indices[i]];
Vertex b = VER[indices[i + 1]];
Vertex c = VER[indices[i + 2]];
glm::vec3 p = glm::cross(b.position - a.position, c.position - a.position);
VER[indices[i]].normal += p;
VER[indices[i + 1]].normal += p;
VER[indices[i + 2]].normal += p;
}
for (int i = 0; i < VER.size(); i++)
{
VER[i].normal = glm::normalize(VER[i].normal);
}
glm::vec3 cubePos = glm::vec3(0.0f, 0.0f, -5.0f);
GLuint vbo, vao, ibo;
glGenVertexArrays(1, &vao);
glGenBuffers(1, &vbo);
glBindVertexArray(vao);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBufferData(GL_ARRAY_BUFFER, VER.size() * sizeof(Vertex), &VER[0], GL_STATIC_DRAW);
// Vertex Positions
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof(Vertex), (GLvoid*)0);
glEnableVertexAttribArray(0);
// Normals attribute
glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, sizeof(Vertex), (GLvoid*)(3 * sizeof(GLfloat)));
glEnableVertexAttribArray(1);
// Vertex Texture Coords
glVertexAttribPointer(2, 2, GL_FLOAT, GL_FALSE, sizeof(Vertex), (GLvoid*)(6 * sizeof(GLfloat)));
glEnableVertexAttribArray(2);
int n = indices.size() * sizeof(unsigned int);
glGenBuffers(1, &ibo);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ibo);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, indices.size() * sizeof(unsigned int), &indices[0], GL_STATIC_DRAW);
glBindVertexArray(0);
ShaderProgram shaderProgram;
shaderProgram.loadShaders("shaders/vert.glsl", "shaders/frag.glsl");
glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
while (!glfwWindowShouldClose(engine.getWindow()))
{
glfwPollEvents();
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glm::mat4 model, view, projection;
model = glm::mat4(1.0f);
orbitCamera.setLookAt(glm::vec3(0, 0, 0));
orbitCamera.rotate(gYaw, gPitch);
orbitCamera.setRadius(gRadius);
model = glm::translate(model, glm::vec3(0, 0, 0));
//model = glm::scale(model, glm::vec3(1, 0, 1));
//model = scaleMat;
projection = glm::perspective(glm::radians(45.0f), (float)engine.getWidth() / (float)engine.getHeight(), 0.00001f, 100.0f);
shaderProgram.use();
glm::vec3 viewPos;
viewPos.x = orbitCamera.getPosition().x;
viewPos.y = orbitCamera.getPosition().y;
viewPos.z = orbitCamera.getPosition().z;
shaderProgram.setUniform("projection", projection);
shaderProgram.setUniform("view", orbitCamera.getViewMatrix());
shaderProgram.setUniform("model", model);
shaderProgram.setUniform("lightPos", glm::vec3(5, 10, 10));
shaderProgram.setUniform("viewPos", viewPos);
glBindVertexArray(vao);
glDrawElements(GL_TRIANGLES,indices.size(), GL_UNSIGNED_INT, 0);
//glDrawArrays(GL_TRIANGLES, 0, VER.size());
glBindVertexArray(0);
glfwSwapBuffers(engine.getWindow());
}
//cleanup
glDeleteVertexArrays(1, &vao);
glDeleteBuffers(1, &vbo);
glfwTerminate();
return 0;
}
void glfw_onKey(GLFWwindow *window, int key, int scancode, int action, int mode)
{
if (key == GLFW_KEY_ESCAPE && action == GLFW_PRESS)
{
glfwSetWindowShouldClose(window, GL_TRUE);
}
if (key == GLFW_KEY_E && action == GLFW_PRESS)
{
gWireFrame = !gWireFrame;
if (gWireFrame)
glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
else
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
}
}
void glfw_onMouseMove(GLFWwindow *window, double posX, double posY)
{
static glm::vec2 lastMousePos = glm::vec2(0, 0);
if (glfwGetMouseButton(engine.getWindow(), GLFW_MOUSE_BUTTON_LEFT) == 1)
{
gYaw -= ((float)posX - lastMousePos.x) * MOUSE_SENSTIVITY;
gPitch += ((float)posY - lastMousePos.y) * MOUSE_SENSTIVITY;
}
if (glfwGetMouseButton(engine.getWindow(), GLFW_MOUSE_BUTTON_RIGHT) == 1)
{
float dx = 0.01f * ((float)posX - lastMousePos.x);
float dy = 0.01f * ((float)posY - lastMousePos.y);
gRadius += dx - dy;
}
lastMousePos.x = (float)posX;
lastMousePos.y = (float)posY;
}
This is the main code. Rest is just basic initializing code, nothing fancy.
I've tried changing the swapinterval but that doesn't seems to be the problem.
I can share code for the other classes if anyone wants to take a look. And I've also tried lowering the sub divisions.
Edit*
After increasing the value of far plane to 8000:
Still not crisp.
the edit with second image is telling you what is happening ... if tampering with znear/zfar changes output like that it means your depth buffer has low bitwidth to the range you want to use...
However increasing zfar should make things worse (you just for some reason don't see it maybe its cut off or some weird math accuracy singularity).
for me its usual to select the planes so:
zfar/znear < (2^depth_buffer_bitwidth)/2
check you depth_buffer_bitwidth
Try to use 24 bits (you might have 16 bits right now). That should work on all gfx cards these days. You can try 32 bits too but that will work only on newer cards. I am using this code to get the max I can:
What is the proper OpenGL initialisation on Intel HD 3000?
However you are using GLFW so you need to find how to do it in it ... probably there is some hint for this in it ...
increase znear as much as you can
tampering znear has much much more impact than zfar...
Use linear depth buffer
this is the best option for large depth range views like terrains that covers stuf in whole depth view range. See:
How to correctly linearize depth in OpenGL ES in iOS?
however you need shaders and new api for this... I do not think this is doable in old api but luckily you are on new api already ...
if none of above is enough
You can stack up more frustrums together at a cost of multiple rendering of the same geometry. for more info see:
Is it possible to make realistic n-body solar system simulation in matter of size and mass?
How do you initialize OpenGL?
Are you using GL_BLEND?
Using blending is nice to get anti-aliased polygon edges, however it also means your z-buffer gets updated even when a very translucent fragment is drawn. This prevents other opaque fragments with the same z-depth from being drawn, which might be what is causing those holes. You could try disabling GL_BLEND to see if the issue goes away.
What depth function are you using?
By default it is set to GL_LESS. You might want to try glDepthFunc(GL_LEQUAL); So fragments with the same z-depth will be drawn. However, due to rounding errors this might not solve your problem entirely.

Why Dear ImGui based renderer is so slow?

I have done class which render 2d objects based on Dear ImGui DrawList, because it can draw many different variants of objects thanks index vector dynamic array and still stay well optimized. Dear ImGui can render 30k unfilled rects while having ~36fps and ~70MB on debug mode, without antialiasing (my computer). Mine very limited version draws 30k unfilled rects while having ~3 fps and ~130MB on debug mode.
class Renderer
{
public:
Renderer();
~Renderer();
void Create();
void DrawRect(float x, float y, float w, float h, GLuint color, float thickness);
void Render(float w, float h);
void Clear();
void ReserveData(int numVertices, int numElements);
void CreatePolygon(const Vector2* vertices, const GLuint verticesCount, GLuint color, float thickness);
GLuint vao, vbo, ebo;
GLShader shader;
Vertex* mappedVertex = nullptr;
GLuint* mappedElement = nullptr,
currentVertexIndex = 0;
std::vector<Vertex> vertexBuffer;
std::vector<GLuint> elementBuffer;
std::vector<Vector2> vertices;
};
const char* vtx =
R"(
#version 460 core
layout(location = 0) in vec3 a_position;
layout(location = 1) in vec4 a_color;
out vec3 v_position;
out vec4 v_color;
uniform mat4 projection;
void main()
{
gl_Position = projection * vec4(a_position, 1.0);
v_color = a_color;
}
)";
const char* frag =
R"(
#version 460 core
layout (location = 0) out vec4 outColor;
in vec4 v_color;
void main()
{
outColor = v_color;
}
)";
void Renderer::Clear()
{
vertexBuffer.resize(0);
elementBuffer.resize(0);
vertices.resize(0);
mappedVertex = nullptr;
mappedElement = nullptr;
currentVertexIndex = 0;
}
void Renderer::Create()
{
glGenBuffers(1, &vbo);
glGenBuffers(1, &ebo);
shader.VtxFromFile(vtx);
shader.FragFromFile(frag);
}
void Renderer::DrawRect(float x, float y, float w, float h, GLuint color, float thickness)
{
// Add vertices
vertices.push_back({ x, y });
vertices.push_back(Vector2(x, y + w));
vertices.push_back(Vector2( x, y ) + Vector2(w, h));
vertices.push_back(Vector2(x + w, y));
// Create rect
CreatePolygon(vertices.data(), vertices.size(), color, thickness);
}
void Renderer::Render(float w, float h)
{
glEnable(GL_BLEND);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
shader.UseProgram();
shader.UniformMatrix4fv("projection", glm::ortho(0.0f, w, 0.0f, h));
GLuint elemCount = elementBuffer.size();
glGenVertexArrays(1, &vao);
glBindVertexArray(vao);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glEnableVertexAttribArray(0);
glEnableVertexAttribArray(1);
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, sizeof(Vertex), (const void*)offsetof(Vertex, position));
glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(Vertex), (const void*)offsetof(Vertex, position));
glBufferData(GL_ARRAY_BUFFER, vertexBuffer.size() * sizeof(Vertex), vertexBuffer.data(), GL_STREAM_DRAW);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, elementBuffer.size() * sizeof(GLuint), elementBuffer.data(), GL_STREAM_DRAW);
const unsigned short* idxBufferOffset = 0;
glDrawElements(GL_TRIANGLES, elemCount, GL_UNSIGNED_INT, idxBufferOffset);
idxBufferOffset += elemCount;
glDeleteVertexArrays(1, &vao);
glDisable(GL_BLEND);
}
void Renderer::CreatePolygon(const Vector2* vertices, const GLuint verticesCount, GLuint color, float thickness)
{
// To create for example unfilled rect, we have to draw 4 rects with small sizes
// So, unfilled rect is built from 4 rects and each rect contains 4 vertices ( * 4) and 6 indices ( *6)
ReserveData(verticesCount * 4, verticesCount * 6);
for (GLuint i = 0; i < verticesCount; ++i)
{
const int j = (i + 1) == verticesCount ? 0 : i + 1;
const Vector2& position1 = vertices[i];
const Vector2& position2 = vertices[j];
Vector2 difference = position2 - position1;
difference *= difference.Magnitude() > 0 ? 1.0f / difference.Magnitude() : 1.0f;
const float dx = difference.x * (thickness * 0.5f);
const float dy = difference.y * (thickness * 0.5f);
mappedVertex[0].position = Vector2(position1.x + dy, position1.y - dx);
mappedVertex[1].position = Vector2(position2.x + dy, position2.y - dx);
mappedVertex[2].position = Vector2(position2.x - dy, position2.y + dx);
mappedVertex[3].position = Vector2(position1.x - dy, position1.y + dx);
mappedVertex[0].color = color;
mappedVertex[1].color = color;
mappedVertex[2].color = color;
mappedVertex[3].color = color;
mappedVertex += 4;
mappedElement[0] = currentVertexIndex;
mappedElement[1] = currentVertexIndex + 1;
mappedElement[2] = currentVertexIndex + 2;
mappedElement[3] = currentVertexIndex + 2;
mappedElement[4] = currentVertexIndex + 3;
mappedElement[5] = currentVertexIndex;
mappedElement += 6;
currentVertexIndex += 4;
}
this->vertices.clear();
}
void Renderer::ReserveData(int numVertices, int numElements)
{
currentVertexIndex = vertexBuffer.size();
// Map vertex buffer
int oldVertexSize = vertexBuffer.size();
vertexBuffer.resize(oldVertexSize + numVertices);
mappedVertex = vertexBuffer.data() + oldVertexSize;
// Map element buffer
int oldIndexSize = elementBuffer.size();
elementBuffer.resize(oldIndexSize + numElements);
mappedElement = elementBuffer.data() + oldIndexSize;
}
int main()
{
//Create window, init opengl, etc.
Renderer renderer;
renderer.Create();
bool quit=false;
while(!quit) {
//Events
//Clear color bit
renderer.Clear();
for(int i = 0; i < 30000; ++i)
renderer.DrawRect(100.0f, 100.0f, 50.0f, 50.0f, 0xffff0000, 1.5f);
renderer.Render(windowW, windowH);
//swap buffers
}
return 0;
}
Why is it that much slower?
How can I make it faster and less memory-consuming?
The biggest bottleneck in that code looks like your allocations are never amortized across frames, since you are clearing the buffers capacity instead of reusing them, leading you to lots of realloc/copies (probably Log2(n) reallocs/copies if your vector implementation grows by factor of 2). Try changing your .clear() call with .resize(0) and maybe you can have a more lazy/rare call to .clear() when things gets unused.
In debug or in release mode? Vectors are terribly slow in debug due to memory checking. Profiling should always be done in Release.
Profiling should be done both in Release and Debug/Unoptimized mode if you intend to ever use and work with your application in Debug/Unoptimized mode. The gross "zero-cost abstraction" lie of modern C++ is that it makes it a pain to work with a debugger because large applications don't run at correct frame-rate in "Debug" mode any more. Ideally you should always run all your applications in Debug mode. Do yourself a productivity favour and ALSO do some profiling/optimization for your worse case.
Good luck with your learning quest! :)
Solution
I do not use std::vector anymore. I use ImVector instead (it maybe your own implementation as well),
I set position directly to a Vector2.x/.y

Why don't these GLSL shaders work?

I'm trying to render a spiral as all red using the shaders to override the colours. For some reason they compile and link but do nothing.
See the following code
fragmentshader.glsl
#version 430
out vec4 outColor;
in vec4 color;
void main(){
outColor = color;
}
vertexshader.glsl
#version 430
in layout(location=0) vec2 position;
out vec4 color;
void main(){
gl_Position = vec4(position, 0.0, 1.0);
color = vec4(1.0, 0.0f, 0.0f, 1.0f);
}
Window.cpp
#include <GL\glew.h>
#include <glm\glm.hpp>
#include <glm\gtc\matrix_transform.hpp>
#include "MeGLWindow.h"
#include <iostream>
#include <fstream>
#define Pi 3.14159265358979
#define E 2.718281828F
#define SCREEN_WIDTH 800
#define SCREEN_HEIGHT 600
#define MAX_SPIRAL 25000
std::string readFile(const char* relPath);
void installShaders();
bool checkShaderStatus(GLuint shaderID);
bool checkProgramStatus(GLuint programID);
GLuint programID;
void MeGLWindow::initializeGL() {
glewInit();
glEnable(GL_DEPTH_TEST);
installShaders();
const int verts_num = MAX_SPIRAL * 2;
GLfloat verts[50000];
GLuint myBufferID;
float a = 0.06f;
float b = 0.06f;
float cx = 0.0;
float cy = 0.0;
int z = 0;
for (int i = 0; i < MAX_SPIRAL; i++) {
float ang = (Pi / 720) * i;
float factor = pow(E, b * ang);
float x = cx + (a * (cos(ang)) * factor);
float y = cy - (a * (sin(ang)) * factor);
verts[2 * i] = x;
verts[(2 * i) + 1] = y;
}
glGenBuffers(1, &myBufferID);
glBindBuffer(GL_ARRAY_BUFFER, myBufferID);
glBufferData(GL_ARRAY_BUFFER, sizeof(verts), verts, GL_STATIC_DRAW);
glEnableVertexAttribArray(0);
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, 0);
}
void MeGLWindow::paintGL() {
glClear(GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT);
glViewport(0, 0, width(), height());
glDrawArrays(GL_POINTS, 0, 2*25000);
}
void installShaders() {
GLuint vertexShaderID = glCreateShader(GL_VERTEX_SHADER);
GLuint fragmentShaderID = glCreateShader(GL_FRAGMENT_SHADER);
const char* adapter[1];
std::string file = readFile("vertexshader.glsl");
const char* vertexFile = file.c_str();
adapter[0] = vertexFile;
glShaderSource(vertexShaderID, 1, adapter, 0);
file = readFile("fragmentshader.glsl");
const char* fragmentFile = file.c_str();
adapter[0] = fragmentFile;
glShaderSource(fragmentShaderID, 1, adapter, 0);
glCompileShader(vertexShaderID);
glCompileShader(fragmentShaderID);
if (!checkShaderStatus(vertexShaderID) || !checkShaderStatus(fragmentShaderID)) {
return;
}
programID = glCreateProgram();
glAttachShader(programID, vertexShaderID);
glAttachShader(programID, fragmentShaderID);
glLinkProgram(programID);
if (!checkProgramStatus(programID)) {
return;
}
glUseProgram(programID);
}
main.cpp
#include <Qt\qapplication.h>
#include "MeGLWindow.h"
int main(int argc, char* argv[]) {
QApplication app(argc, argv);
MeGLWindow meWindow;
meWindow.show();
return app.exec();
}
The vertices in the spiral should be red according to the linked shaders! What am i doing wrong? Please help!
No error checking on the shader program. Or rather, you check for errors, but don't signal the application that the program creation failed in any way.
You don't provide the source for checkProgramStatus, but if you checking for GL_LINK_STATUS, you're doing so before you link the problem.
It's not clear how you're initializing OpenGL from the code provided, but you're not setting a VAO, which is required for the core profile.

Instancing with OpenGL 3.3 seems very slow

I wrote a minimal code-sample in C++, which is rendering 10000 colored
quads on the screen. I am using "instancing" and so updating only
the model-matrix for each quad each frame. The data of the 6 vertices
are stored in an indivdual VBO und will be reused all the time.
The projection-matrix (orthographic) is injected once at program-start
via uniform. The model-matrix is calculated on the CPU with the library GLM.
I measured the rendering-time and I got only an average FPS of 52.
I think this is MUCH to less, but I cannot find the mistake/bottleneck in my little sample program.
After some analysis it seems, that the 3 calculations done with GLM
are very slow. Am I doing something wrong here? For example, If
I remove the rotating-calculation, I get an FPS-boost of 10 FPS!
Maybe you can help me to find out, what I can do better here and how
can I optimize my sample. It is important for me, that each quad is individual configurable during runtime, so I decided to use instancing.
Moving the matrix-calculations to the GPU seems another option, but I am really confused, why the CPU has so much problems calculating the 10000
model-matrices! Ok, my CPU is very bad (Athlon 2 Core-Duo M300, GPU is ATI Mobility Radeon 4100), but It should do this task in no measurable time, or?
Here is minimal, fully working, compilable example (If u have GLFW and GLM).
Maybe someone have some time and can help me out here :)
#define GLEW_STATIC
#define GLM_FORCE_INLINE
#define GLM_FORCE_SSE2
#include "glew.h"
#include "glfw3.h"
#include "glm.hpp"
#include "glm/gtc/matrix_transform.hpp"
#include <conio.h>
#include <cstdlib>
#include <iostream>
#include <ctime>
GLuint buildShader()
{
std::string strVSCode =
"#version 330 core\n"
"in vec3 vertexPosition;\n"
"in mat4 modelMatrix;\n"
"uniform mat4 projectionMatrix;\n"
"out vec4 m_color;\n"
"void main() {\n"
" vec4 vecVertex = vec4(vertexPosition, 1);\n"
" gl_Position = projectionMatrix * modelMatrix * vecVertex;\n"
" m_color = gl_Position;\n"
"}\n";
std::string strFSCode = "#version 330 core\n"
"out vec4 frag_colour;\n"
"in vec4 m_color;\n"
"void main() {\n"
" frag_colour = vec4(m_color.x, m_color.y, m_color.z, 0.5f);\n"
"}\n";
GLuint gluiVertexShaderId = glCreateShader(GL_VERTEX_SHADER);
char const * VertexSourcePointer = strVSCode.c_str();
glShaderSource(gluiVertexShaderId, 1, &VertexSourcePointer, NULL);
glCompileShader(gluiVertexShaderId);
GLuint gluiFragmentShaderId = glCreateShader(GL_FRAGMENT_SHADER);
char const * FragmentSourcePointer = strFSCode.c_str();
glShaderSource(gluiFragmentShaderId, 1, &FragmentSourcePointer, NULL);
glCompileShader(gluiFragmentShaderId);
GLuint gluiProgramId = glCreateProgram();
glAttachShader(gluiProgramId, gluiVertexShaderId);
glAttachShader(gluiProgramId, gluiFragmentShaderId);
glLinkProgram(gluiProgramId);
glDeleteShader(gluiVertexShaderId);
glDeleteShader(gluiFragmentShaderId);
return gluiProgramId;
}
struct Sprite
{
glm::vec3 position, dimension;
float speed, rotation, rx, ry;
};
struct Vertex
{
float x, y, z;
Vertex(){};
Vertex(float x, float y, float z) : x(x), y(y), z(z) {}
};
int main(int arc, char **argv)
{
// GLFW init
int displayResWith = 1366; //modify this here
int displayResHeight = 768; //modify this here
glfwInit();
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, 1);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
glfwWindowHint(GLFW_RED_BITS, 8);
glfwWindowHint(GLFW_GREEN_BITS, 8);
glfwWindowHint(GLFW_BLUE_BITS, 8);
glfwWindowHint(GLFW_ALPHA_BITS, 8);
glfwWindowHint(GLFW_DEPTH_BITS, 32);
glfwWindowHint(GLFW_STENCIL_BITS, 32);
GLFWwindow* window = glfwCreateWindow(displayResWith, displayResHeight,"Instancing", glfwGetPrimaryMonitor(),NULL);
int width, height;
glfwMakeContextCurrent(window);
glfwSwapInterval(0);
glfwGetFramebufferSize(window, &width, &height);
//GLEW init
glewExperimental = GL_TRUE;
glewInit();
const GLubyte* renderer = glGetString(GL_RENDERER);
const GLubyte* version = glGetString(GL_VERSION);
std::cout << "Renderer: " << renderer << std::endl;
std::cout << "OpenGL supported version: " << version << std::endl;
//OpenGL init
glEnable(GL_CULL_FACE);
glCullFace(GL_BACK);
glEnable(GL_DEPTH_TEST);
glDepthFunc(GL_LESS);
glEnable(GL_BLEND);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
glClearColor(255.0f, 255.0f, 255.0f, 255.0f);
//Shader
GLuint programID = buildShader();
//VBO vertexBuffer
GLuint vertexBuffer;
glGenBuffers(1, &vertexBuffer);
glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer);
Vertex VertexBufferData[6];
VertexBufferData[0] = Vertex(-0.5f, 0.5f, 0.0f); //Links oben
VertexBufferData[1] = Vertex(-0.5f, -0.5f, 0.0f); //Links unten
VertexBufferData[2] = Vertex(0.5f, -0.5f, 0.0f); //Rechts unten
VertexBufferData[3] = VertexBufferData[2]; //Rechts unten
VertexBufferData[4] = Vertex(0.5f, 0.5f, 0.0f); //Rechts oben
VertexBufferData[5] = VertexBufferData[0]; //Links oben
glBufferData(GL_ARRAY_BUFFER, sizeof(Vertex)*6, VertexBufferData, GL_STATIC_DRAW);
//VBO instanceBuffer
GLuint instanceBuffer;
glGenBuffers(1, &instanceBuffer);
glBindBuffer(GL_ARRAY_BUFFER, instanceBuffer);
int iMaxInstanceCount = 30000;
glm::mat4 *ptrInstanceBufferData = new glm::mat4[iMaxInstanceCount];
glBufferData(GL_ARRAY_BUFFER, iMaxInstanceCount * sizeof(glm::mat4), NULL, GL_STREAM_DRAW);
//VAO - Start
GLuint vertexArrayObject;
glGenVertexArrays(1, &vertexArrayObject);
glBindVertexArray(vertexArrayObject);
//For VBO vertexbuffer
glEnableVertexAttribArray(glGetAttribLocation(programID, "vertexPosition"));
glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer);
glVertexAttribPointer(
glGetAttribLocation(programID, "vertexPosition"),
3,
GL_FLOAT,
GL_FALSE,
sizeof(Vertex),
(void*)0
);
glVertexAttribDivisor(0, 0);
//For VBO instanceBuffer
int pos = glGetAttribLocation(programID, "modelMatrix");
int pos1 = pos + 0;
int pos2 = pos + 1;
int pos3 = pos + 2;
int pos4 = pos + 3;
glEnableVertexAttribArray(pos1);
glEnableVertexAttribArray(pos2);
glEnableVertexAttribArray(pos3);
glEnableVertexAttribArray(pos4);
glBindBuffer(GL_ARRAY_BUFFER, instanceBuffer);
glVertexAttribPointer(pos1, 4, GL_FLOAT, GL_FALSE, sizeof(GLfloat) * 4 * 4, (void*)(0));
glVertexAttribPointer(pos2, 4, GL_FLOAT, GL_FALSE, sizeof(GLfloat) * 4 * 4, (void*)(sizeof(float) * 4));
glVertexAttribPointer(pos3, 4, GL_FLOAT, GL_FALSE, sizeof(GLfloat) * 4 * 4, (void*)(sizeof(float) * 8));
glVertexAttribPointer(pos4, 4, GL_FLOAT, GL_FALSE, sizeof(GLfloat) * 4 * 4, (void*)(sizeof(float) * 12));
glVertexAttribDivisor(pos1, 1);
glVertexAttribDivisor(pos2, 1);
glVertexAttribDivisor(pos3, 1);
glVertexAttribDivisor(pos4, 1);
glBindVertexArray(0); //VAO - End
//Matrix vars
glm::mat4 Projection, Rotating, Scaling, Translation, Identity;
glm::vec3 ZRotateVec(0.0f, 0.0f, 1.0f);
//Calc projection-matrix and put shader (uniform)
Projection = glm::ortho(0.0f, (float)width, 0.0f, (float)height, 0.0f, 1.0f);
glUseProgram(programID);
glUniformMatrix4fv(glGetUniformLocation(programID, "projectionMatrix"), 1, GL_FALSE, &Projection[0][0]);
//Creating sprites
std::srand(static_cast<unsigned int>(std::time(0)));
int iActInstanceCount = 10000;
Sprite *ptrSprites = new Sprite[iActInstanceCount];
for (int i = 0; i < iActInstanceCount; ++i)
{
ptrSprites[i].dimension = glm::vec3(16, 16, 1.0f);
ptrSprites[i].position = glm::vec3(std::rand()%(width-32),std::rand()%(height-32),-1.0f *((std::rand()%256)/256.0f));
ptrSprites[i].rotation = rand() % 360 + 0.0f;
ptrSprites[i].rx = static_cast<float>(std::rand() % 2);
ptrSprites[i].ry = static_cast<float>(std::rand() % 2);
ptrSprites[i].speed = (std::rand() % 100) + 1.0f;
if (ptrSprites[i].speed < 1.0f) ptrSprites[i].speed = 1.0f;
}
//FPS init
double fFramesRendered = 0.0f;
double fFrameMeasurementStart = 0.0f;
double fFPS = 0.0f;
double fCurrentTime = 0.0f;
glfwSetTime(0);
//Main-loop (also renderloop)
while (!glfwWindowShouldClose(window))
{
//application-logic
if (glfwGetKey(window, GLFW_KEY_ESCAPE)== GLFW_PRESS)
glfwSetWindowShouldClose(window, GL_TRUE);
const double fNewTime = glfwGetTime();
double fDeltaTime = fNewTime - fCurrentTime;
fCurrentTime = fNewTime;
for (int i = 0; i < iActInstanceCount; ++i)
{
float fSpeed = ptrSprites[i].speed * static_cast<float>(fDeltaTime);
ptrSprites[i].rotation += fSpeed;
if (ptrSprites[i].rotation >= 360.0f) ptrSprites[i].rotation = 0.0f;
if (ptrSprites[i].rx == 1) ptrSprites[i].position.x = ptrSprites[i].position.x + fSpeed;
if (ptrSprites[i].rx == 0) ptrSprites[i].position.x = ptrSprites[i].position.x - fSpeed;
if (ptrSprites[i].ry == 1) ptrSprites[i].position.y = ptrSprites[i].position.y + fSpeed;
if (ptrSprites[i].ry == 0) ptrSprites[i].position.y = ptrSprites[i].position.y - fSpeed;
if (ptrSprites[i].position.x <= 0) ptrSprites[i].rx = 1;
if (ptrSprites[i].position.x + ptrSprites[i].dimension.x >= width) ptrSprites[i].rx = 0;
if (ptrSprites[i].position.y <= 0) ptrSprites[i].ry = 1;
if (ptrSprites[i].position.y + ptrSprites[i].dimension.y >= height) ptrSprites[i].ry = 0;
//matrix-calculations (saved in local buffer)
Translation = glm::translate(Identity, ptrSprites[i].position + glm::vec3(ptrSprites[i].dimension.x / 2.0f, ptrSprites[i].dimension.y / 2.0f, 0.0f));
Scaling = glm::scale(Translation, ptrSprites[i].dimension);
ptrInstanceBufferData[i] = glm::rotate(Scaling, ptrSprites[i].rotation, ZRotateVec);
}
//render-call
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glUseProgram(programID);
glBindVertexArray(vertexArrayObject);
glBindBuffer(GL_ARRAY_BUFFER, instanceBuffer);
glBufferData(GL_ARRAY_BUFFER, iMaxInstanceCount * sizeof(glm::mat4), NULL, GL_STREAM_DRAW); // Buffer orphaning
glBufferSubData(GL_ARRAY_BUFFER, 0, iActInstanceCount * sizeof(glm::mat4), ptrInstanceBufferData);
glDrawArraysInstanced(GL_TRIANGLES, 0, 6, iActInstanceCount);
glBindVertexArray(0);
glfwSwapBuffers(window);
glfwPollEvents();
//FPS-stuff
++fFramesRendered;
if ((fCurrentTime*1000.0f) >= (fFrameMeasurementStart*1000.0f) + 1000.0f)
{
fFPS = ((fCurrentTime*1000.0f) - (fFrameMeasurementStart*1000.0f)) / 1000.0f * fFramesRendered;
fFrameMeasurementStart = fCurrentTime;
fFramesRendered = 0;
std::cout << "FPS: " << fFPS << std::endl;
}
}
//Termination and cleanup
glDeleteBuffers(1, &vertexBuffer);
glDeleteBuffers(1, &instanceBuffer);
glDeleteVertexArrays(1, &vertexArrayObject);
glDeleteProgram(programID);
glfwDestroyWindow(window);
glfwTerminate();
return _getch();
}
Well, after testing it on my machine, it is definitely CPU limited, so nothing you do with OGL is going to make much difference. I get about ~300fps with GCC on at least -O1, but only ~80 with -O0. My CPU is very fast (i7 2600k, 4.7ghz), but my GPU is rather slow (GT 520). I'm also on Ubuntu.
Some quick ideas for things that might speed it up a little:
Put the vertex positions in an array in the vertex shader and use gl_VertexID to access them
Use GL_TRIANGLE_STRIP instead of GL_TRIANGLES
Use radians for angles, as otherwise GLM has to convert them
None of these are likely to make much of any impact, really. Just make sure your compiler is set up right, and there probably isn't much more to do.