Using VBO's and CPU usage is very high - c++

I'm really not sure what to do anymore. I'v made my application use VBO's and my cpu usage still goes into the 70's and 80's. My render proceedure works like this:
Set the camera transformation
if the shape has not been tesselated, tesselate it.
create it's VBO
if it has a VBO, use it.
You will notice I have display lists too, I might use these if VBO is not supported. I went and found an OpenGL demo that renders a 32000 poly mesh at 60fps on my PC and uses 4% cpu. I'm rendering about 10,000 polys # 60fps using vbos and its using 70-80%.
Here is my render proc:
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
POINT hh = controls.MainGlFrame.GetMousePos();
POINTFLOAT S;
S.x = static_cast<float>(hh.x);
S.y = static_cast<float>(hh.y);
POINTFLOAT t;
t.x = 256;
t.y = 256;
POINT dimensions;
dimensions.x = 512;
dimensions.y = 512;
glDeleteTextures(1,&texName);
texName = functions.CreateGradient(col,t,S,512,512,true);
itt = true;
}
HDC hdc;
PAINTSTRUCT ps;
glEnable(GL_MULTISAMPLE_ARB);
glEnable(GL_BLEND);
glBlendFunc (GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
hdc = BeginPaint(controls.MainGlContext.mhWnd,&ps);
//start OGL code
glClearColor( 1.0f, 1.0f, 1.0f, 0.0f );
if(!current.isdrawing)
glClear( GL_COLOR_BUFFER_BIT );
glPushMatrix();
glTranslatef(controls.MainGlFrame.GetCameraX(),
controls.MainGlFrame.GetCameraY(),0);
//glTranslatef(current.ScalePoint.x,current.ScalePoint.y,0);
glScalef(current.ScaleFactor,current.ScaleFactor,current.ScaleFactor);
//glTranslatef(-current.ScalePoint.x,-current.ScalePoint.y,0);
if(!current.isdrawing)
{
for(unsigned int currentlayer = 0; currentlayer < layer.size(); ++currentlayer)
{
PolygonTesselator.Init();
for(unsigned int i = 0; i < layer[currentlayer].Shapes.size(); i++)
{
if(layer[currentlayer].Shapes[i].DisplayListInt == -999)
{
gluTessNormal(PolygonTesselator.tobj, 0, 0, 1);
PolygonTesselator.Set_Winding_Rule(layer[currentlayer].Shapes[i].WindingRule);
glEnable(GL_TEXTURE_2D);
glBindTexture(GL_TEXTURE_2D, texName);
layer[currentlayer].Shapes[i].DisplayListInt = glGenLists(1);
glNewList(layer[currentlayer].Shapes[i].DisplayListInt,GL_COMPILE);
PolygonTesselator.SetDimensions(layer[currentlayer].Shapes[i].Dimensions,layer[currentlayer].Shapes[i].minima);
PolygonTesselator.Begin_Polygon();
for(unsigned int c = 0; c < layer[currentlayer].Shapes[i].Contour.size(); ++c)
{
if(layer[currentlayer].Shapes[i].Color.a != 0)
{
PolygonTesselator.Begin_Contour();
for(unsigned int j = 0; j < layer[currentlayer].Shapes[i].Contour[c].DrawingPoints.size(); ++j)
{
gluTessVertex(PolygonTesselator.tobj,&layer[currentlayer].Shapes[i].Contour[c].DrawingPoints[j][0],
&layer[currentlayer].Shapes[i].Contour[c].DrawingPoints[j][0]);
}
PolygonTesselator.End_Contour();
}
}
PolygonTesselator.End_Polygon();
glEndList();
PolygonTesselator.TransferVerticies(layer[currentlayer].Shapes[i].OutPoints);
glGenBuffersARB(1,&layer[currentlayer].Shapes[i].VBOInt);
glBindBufferARB(GL_ARRAY_BUFFER_ARB,layer[currentlayer].Shapes[i].VBOInt);
glBufferDataARB(GL_ARRAY_BUFFER_ARB,sizeof(GLfloat) * layer[currentlayer].Shapes[i].OutPoints.size(),
&layer[currentlayer].Shapes[i].OutPoints[0], GL_STATIC_DRAW_ARB);
InvalidateRect(controls.MainGlFrame.framehWnd,NULL,false);
}
else //run vbo
{
//glEnable(GL_TEXTURE_2D);
//glDisable(GL_TEXTURE_2D);
//glBindTexture(GL_TEXTURE_2D, texName);
glColor4f(layer[currentlayer].Shapes[i].Color.r,
layer[currentlayer].Shapes[i].Color.g,
layer[currentlayer].Shapes[i].Color.b,
layer[currentlayer].Shapes[i].Color.a);
//glColor4f(1,1,1,1);
glBindBufferARB(GL_ARRAY_BUFFER_ARB, layer[currentlayer].Shapes[i].VBOInt);
//glCallList(layer[currentlayer].Shapes[i].DisplayListInt);
glEnableClientState(GL_VERTEX_ARRAY);
glVertexPointer(2, GL_FLOAT, 0, 0);
glDrawArrays(GL_TRIANGLES, 0, layer[currentlayer].Shapes[i].OutPoints.size() / 2);
glDisableClientState(GL_VERTEX_ARRAY);
glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
}
glDisable(GL_TEXTURE_2D);
//Draw outlines
if(layer[currentlayer].Shapes[i].Outline.OutlinePoints.size() > 4)
{
glColor4f(layer[currentlayer].Shapes[i].Outline.OutlineColor.r
,layer[currentlayer].Shapes[i].Outline.OutlineColor.g
,layer[currentlayer].Shapes[i].Outline.OutlineColor.b
,layer[currentlayer].Shapes[i].Outline.OutlineColor.a);
}
}
PolygonTesselator.End();
}
}
glPopMatrix();
//end OGL code
glFlush();
SwapBuffers(hdc);
glDisable(GL_MULTISAMPLE_ARB);
EndPaint(controls.MainGlContext.mhWnd,&ps);
}
Why could I be getting such high cpu usage?

Under what conditions is that first bit of code run? There's a couple of suspicious-looking lines in there:
glDeleteTextures(1,&texName);
texName = functions.CreateGradient(col,t,S,512,512,true);
If you're deleting and recreating a texture every time you paint, that could get expensive. I couldn't say how expensive the OpenGL parts would be -- I'd expect uploading texture data to be reasonably efficient, even if deleting and creating texture names might be less so -- but perhaps CreateGradient is inherently slow. Or maybe you're accidentally hitting some kind of slow path for your graphics card. Or the function is creating all the mipmap levels. And so on.
Aside from that, some random ideas:
What is the present interval? If the buffer swap is set to sync with the monitor, you may incur a delay because of that. (You can use the WGL_EXT_swap_control extension to tweak this value.)
If all of this is being run in response to a WM_PAINT, check that you aren't getting unexpected extra WM_PAINTs for some reason.
Check that the polygon tesselator Init and End functions aren't doing anything, since they're being called every time, even if there's no tesselating to be done.

Based on the code snippet you have provided, you have (at one point) loops nested four layers deep. You may be seeing high CPU load due to running each of these loops an extremely large number of times. Can you give us any idea how many iterations these loops are having to run through?
Try grabbing a timestamp inside each loop iteration and compare it against the previous to see how long it is taking to run one iteration of each particular loop. This should help you determine what part of the function is taking up the bulk of your CPU time.

Related

Rendering surfaces in OpenGL with depth test correctly

I am wondering how to render surfaces using depth test correctly. In my case it is not working although it has been enabled. I tried many combinations but can not figure out what is being done wrong, it might been some ordering of OpenGL commands, or it might be something I am missing completely.
I have this code that uses opengl to render a 2d game I am working on. I want to enable z buffering and depth test to simplify things in the code. I read a number of tutorials online and made changes as instructed but can not figure out why it is not working.
the code of the main function is shown below, I am changing the values of z for the two squares to be -10 and -25 and swapping them later on, but I always get the first square rendered over the second one no matter what values I use:
void MainGame::RenderTestUI()
{
glEnable(GL_DEPTH_TEST);
glDepthMask(GL_TRUE);
glDepthFunc(GL_LESS);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
GLSLProgram *ActiveShader = nullptr;
ActiveShader = &ColorShader;
ActiveShader->Use();
GLint Location1 = ActiveShader->GetUniformLocation("cam");
glm::mat4 tmp = Camera.GetCameraMatrix();
glUniformMatrix4fv(Location1, 1, GL_FALSE, &tmp[0][0]);
glActiveTexture(GL_TEXTURE0);
GLint Location2 = ActiveShader->GetUniformLocation("basic");
glUniform1f(Location2, 0);
glBindTexture(GL_TEXTURE_2D, GameTextures.ID);
CurrentBoundTexture = GameTextures.ID;
RenderingBatch.StartAddingVerticies();
this->GameMap.TileList[1].FillSixVerticies(RenderingBatch.VertexListPtr, 0, 0);
RenderingBatch.VertexCount += 6;
for (int i = 0; i < 6; i++)
RenderingBatch.VertexListPtr[i].z = -10; // first face
this->GameMap.TileList[2].FillSixVerticies(&RenderingBatch.VertexListPtr[RenderingBatch.VertexCount], 8, 8);
RenderingBatch.VertexCount += 6;
for (int i = 0; i < 6; i++)
RenderingBatch.VertexListPtr[i+6].z = -25; // second face
RenderingBatch.EndAddingVerticies();
RenderingBatch.CreateVBO();
RenderingBatch.Render();
ActiveShader->Unuse();
// swap buffers
SDL_GL_SwapWindow(GameWindow);
}
The end result is always the same regardless of the value of z i am assigning to the two faces, the result could be seen here:
any advice is highly appreciated.
When setting up the SDL surface to draw on, did you ask for a depth buffer prior to calling SDL_CreateWindow?
SDL_GL_SetAttribute(SDL_GL_DEPTH_SIZE, 24);
SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);

Add To Scene OpenGL

I am trying to create a simple function that adds openGL code into my MainLoop.
For example, using win->AddToScene("glBegin(GL_TRIANGLES); glVertex3f( 0.0f, 1.0f, 0.0f); glVertex3f( 1.0f,-1.0f, 0.0f); glVertex3f(-1.0f,-1.0f, 0.0f); glEnd(); ");
I am trying to do this using Arrays, but it isn't working as far what I have.
Code:
std::vector< int > arr;
void Window::MainLoop()
{
do
{
glfwMakeContextCurrent(window);
glFlush();
glfwPollEvents();
for(int i = 0; i < arr.size(); i++)
{
arr[i];
}
glfwSwapBuffers(window);
}
while(running && !glfwWindowShouldClose(window));
}
void Window::AddToScene(char child[100]){
for(unsigned int i = 0; i < arr.size(); i++){
arr.push_back(*child);
}
}
Are the arrays a way to do it, or do I have to do something completely else?
Thanks
Props for creativity. Just FYI what you want to do wouldn't work that way in interpreted languages either. In those you'd have to use some eval statement.
It should be noted that it's perfectly possible to implement an interpreter for a small custom toy language (or rather command execution list) in C that does exactly as you envision.
However as far as adding geometry to your scene goes: You're barking up the completely wrong tree! A scene solely consists of what is drawn not how it is drawn. Your mistake in thinking is, that you mistook the glVertex calls as what is important. What's actually important are the numbers that go into it.
Think about something like this:
glBegin(…);
for(size_t i = 0; i < vertices.length(); ++i){
glVertex3f(vertices[i].x, vertices[i].y, vertices[i].z);
}
glEnd();
However all these function calls are super inefficient. What you actually want to do is rather this, i.e. using Vertex Arrays:
glVertexPointer(3, GL_FLOAT, 0, &vertices[0]);
glDrawArrays(…, 0, vertices.length());
Last step would be dropping the old style OpenGL functions and transition to generic vertex attributes and vertex buffer object; but that is easy enough once you master old style vertex arrays.

Why OpenGL state changes very slow on VirtualBox?

I'm on Linux host and using a Windows guest.
I'm creating the OpenGL context with SDL, and I just draw 1000 objects in different positions, each object is just 6 vertices and 3 lines, just a 3D cross.
I'm using vertex and index buffers and GLSL shaders. It doesn't do anything special: it binds the buffers and sets the vertex attrib pointers, set the matrix and draws the elements.
It renders the scene in 2 seconds, if I hoist the buffer binding and attribute setting outside the loop it renders in 200ms, if I remove glUniformMatrix4fv that sets the matrix due to the updated positions it'll render about 10ms, though I only see 1 object and thousand other is just overdrawn on it.
On Linux and Windows host the same thing renders on stable 60FPS.
OpenGL games like OpenArena run on 60FPS in VirtualBox...
Is buffer binding and uniform setting a slow operation in OpenGL in general?
Does anyone has experience testing 3D programs on VirtualBox?
Update: added some code, error checking removed for clarity:
void drawStuff()
{
GLfloat projectView[16];
int ms;
RenderingContext rc; /*< Nothing special contains the currently bound render object.*/
glClearColor(0, 0, 0, 1);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
createViewMatrix(
viewMatrix,
&viewedObject->attitude.k,
&viewedObject->attitude.j,
&viewedObject->pos
);
multiplyMatrix(projectView, viewMatrix, projMatrix); /*< Combine projection and view matrices. */
/* Draw 10×10×10 grid of 3D crosses. One cross is 6 vertices and 3 lines. */
bindRenderObject(&rc, &cross); /*< This binds buffers and sets up vertex attrib arrays. It's very slow if I put it into the loop*/
{
int i, j, k;
for (i = -5; i < 5; i++)
{
for (j = -5; j < 5; j++)
{
for (k = -5; k < 5; k++)
{
createTranslationMatrix(modelMatrix, i * 10, j * 10, k * 10);
multiplyMatrix(combinedMatrix, modelMatrix, projectView);
glUniformMatrix4fv(renderingMatrixId, 1, GL_FALSE, combinedMatrix); /*< This is slow for some reason.*/
drawRenderObject(&rc); /*< This is just a call to glDrawElements. No performance bottleneck here at all. */
}
}
}
}
/* Draw some UI. */
glDisable(GL_DEPTH_TEST);
/* ... */
glEnable(GL_DEPTH_TEST);
SDL_GL_SwapBuffers();
}

glDeleteBuffers crashes during destructor call

Hi I am using VBO to load image texture and then draw it in C++. VBO id generate and bind and draw occurs here
void ViewManager::render(){
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glEnable(GL_TEXTURE_2D);
glEnableClientState(GL_VERTEX_ARRAY);
glEnableClientState(GL_TEXTURE_COORD_ARRAY);
glEnable(GL_BLEND);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
if(decompressTileImage->tileTexure == 0)
{
loadTexture(decompressTileImage);
glGenBuffers(1,&decompressTileImage->VBOId);
glBindBuffer(GL_ARRAY_BUFFER,decompressTileImage->VBOId);
glBufferData(GL_ARRAY_BUFFER,sizeof(*(this->tileCoordList))+sizeof(*(this->tileTextureCoordList)),0,GL_STATIC_DRAW);
glBufferSubData(GL_ARRAY_BUFFER,0,sizeof(*(this->tileCoordList)),this->tileCoordList);
glBufferSubData(GL_ARRAY_BUFFER,sizeof(*(this->tileCoordList)),sizeof(*(this->tileTextureCoordList)),this->tileTextureCoordList);
}
else
{
glBindBuffer(GL_ARRAY_BUFFER,decompressTileImage->VBOId);
glBindTexture(GL_TEXTURE_2D, decompressTileImage->tileTexure);
}
glColor4f(1.0f, 1.0f, 1.0f, textureAlpha);
if(textureAlpha < 1.0)
{
textureAlpha = textureAlpha + .03;
this->tiledMapView->renderNow();
}
glTexCoordPointer(3, GL_FLOAT, 0, (void*)sizeof(*(this->tileCoordList)));
glVertexPointer(3, GL_FLOAT, 0, 0);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
glBindBuffer(GL_ARRAY_BUFFER,0);
glDisable(GL_BLEND);
glDisableClientState(GL_VERTEX_ARRAY);
glDisableClientState(GL_TEXTURE_COORD_ARRAY);
glDisable(GL_TEXTURE_2D);
}
This function is in a class named MapTile. MapTile is created 35 times for 35 images downloaded from the internet. And then a thread calls this method 35 times for 35 MapTile object and keeps doing it. That is why I first check if the method is called for the first time so that I can load data and generate VBO id only once for each MapTile object. I check this with if(decompressTileImage->tileTexure == 0) this line. and then each time I just bind the vbo id to draw. No need to load the data again.
Here decompressTileImage is a TextureImageInfo class. The implementation is
#include "TextureImageInfo.h"
TextureImageInfo::TextureImageInfo(unsigned char * image,GLuint format,int texWidth,int texHeight,int imageWidth,int imageHeight,float tex_x,float tex_y)
{
// TODO Auto-generated constructor stub
this->format = format;
this->image = image;
this->imageHeight = imageHeight;
this->imageWidth = imageWidth;
this->texHeight = texHeight;
this->texWidth = texWidth;
this->tileTexure = 0;
this->VBOId = 0;
this->time = 0;
}
TextureImageInfo::~TextureImageInfo()
{
if(VBOId!=0)
glDeleteBuffers(1,&VBOId);
}
It draws and does everything fine but crashes when I try to clean up the memory in the destructor of TextureImageInfo class which is given here. I don't understand why. I checked to see if the VBOId is generated and loaded in the memory with the if condition in the destructor too.
As indicated in the comments, OpendGL ES commands should be submitted from the same thread where the context was created.
From the Blackberry docs Parallel processing with OpenGL ES:
It is important to note that each OpenGL ES rendering context targets
a single thread of execution.
If you want to render multiple scenes, you can separate each scene
into its own thread, making sure each thread has its own context

Rendering mesh polygons in OpenGL - very slow

I recently switched from intermediate mode and have a new rendering process. There must be something I am not understanding. I think it has something to do with the indices.
Here is my diagram: Region->Mesh->Polygon Array->3 vertex indices which references the master list of vertices.
Here my render code:
// Render the mesh
void WLD::render(GLuint* textures, long curRegion, CFrustum cfrustum)
{
int num = 0;
// Set up rendering states
glEnableClientState(GL_VERTEX_ARRAY);
glEnableClientState(GL_TEXTURE_COORD_ARRAY);
// Set up my indices
GLuint indices[3];
// Cycle through the PVS
while(num < regions[curRegion].visibility.size())
{
int i = regions[curRegion].visibility[num];
// Make sure the region is not "dead"
if(!regions[i].dead && regions[i].meshptr != NULL)
{
// Check to see if the mesh is in the frustum
if(cfrustum.BoxInFrustum(regions[i].meshptr->min[0], regions[i].meshptr->min[2], regions[i].meshptr->min[1], regions[i].meshptr->max[0], regions[i].meshptr->max[2], regions[i].meshptr->max[1]))
{
// Cycle through every polygon in the mesh and render it
for(int j = 0; j < regions[i].meshptr->polygonCount; j++)
{
// Assign the index for the polygon to the index in the huge vertex array
// This I think, is redundant
indices[0] = regions[i].meshptr->poly[j].vertIndex[0];
indices[1] = regions[i].meshptr->poly[j].vertIndex[1];
indices[2] = regions[i].meshptr->poly[j].vertIndex[2];
// Enable texturing and bind the appropriate texture
glEnable(GL_TEXTURE_2D);
glBindTexture(GL_TEXTURE_2D, textures[regions[i].meshptr->poly[j].tex]);
glVertexPointer(3, GL_FLOAT, sizeof(Vertex), &vertices[0].x);
glTexCoordPointer(2, GL_FLOAT, sizeof(Vertex), &vertices[0].u);
// Draw
glDrawElements(GL_TRIANGLES, 3, GL_UNSIGNED_INT, indices);
}
}
}
num++;
}
// End of rendering - disable states
glDisableClientState(GL_VERTEX_ARRAY);
glDisableClientState(GL_TEXTURE_COORD_ARRAY);
}
Sorry if I left anything out. And I really appreciate feedback and help with this. I would even consider paying someone who is good with OpenGL and optimization to help me with this.
There is no point in using array rendering if you're only rendering 3 vertices at a time. The idea is to send thousands through with a single call. That is, you render a single "Polygon Array" or "Mesh" with one call.