Follow up: Asynchronous off-screen query performance

Follow up: Asynchronous off-screen query performance - c++

I recently asked this question:
How to perform asynchronous off-screen queries?
What I've heard, but haven't been able to confirm yet, is that rendering to the window is more expensive than rendering to a framebuffer. First of all, can anyone comment on this? Can I draw multiple scenes to framebuffers faster than I can to the window? Are there other options, e.g., pbuffers or PBOs?
I have started playing around with framebuffers, but I have not been able to get the query to work. Here's some psuedo code for what I have set up so far:
glfwWindowHint(GLFW_VISIBLE, GL_FALSE);
window = glfwCreateWindow(1, 1, "OpenGL", NULL, NULL);
glfwMakeContextCurrent(window);
glGenFramebuffers(1, &fbo);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo);
glGenRenderbuffers(1, &rbo);
glBindRenderbuffer(GL_RENDERBUFFER, rbo);
glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT24, size, size);
glFramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, rbo);
glEnable(GL_DEPTH_TEST);
glGenQueries(numberOfQueries, queries);
for (scene in scenesToRender)
{
glClear(GL_DEPTH_BUFFER_BIT);
glDepthFunc(GL_LESS);
drawShadingObjects(scene);
glBeginQuery(GL_SAMPLES_PASSED, queries[index]);
glDepthFunc(GL_LEQUAL);
drawShadedObject(scene);
glEndQuery(GL_SAMPLES_PASSED);
}
collectQueryResults();
deleteBuffers();
So far everything runs, but all of the queries return "0". Is there something about querying when drawing to a framebuffer that is different than when drawing to the window buffer?
Again, my two questions are:
Can I draw multiple scenes to framebuffers faster than I can to the window? Are there other options, e.g., pbuffers or PBOs?
Is there something about querying when drawing to a framebuffer that is different than when drawing to the window buffer?

Try something like this:
#include <GL/glew.h>
#include <GLFW/glfw3.h>
#include <iostream>
#include <vector>
using namespace std;
const unsigned int sz = 1024;
void drawScene( unsigned int multiplier )
{
glViewport( 0, 0, sz, sz );
glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glRotatef( (float)glfwGetTime() * 50.f * multiplier, 0.f, 0.f, 1.f);
glBegin(GL_TRIANGLES);
glColor3f(1.f, 0.f, 0.f);
glVertex3f(-0.6f, -0.4f, 0.f);
glColor3f(0.f, 1.f, 0.f);
glVertex3f(0.6f, -0.4f, 0.f);
glColor3f(0.f, 0.f, 1.f);
glVertex3f(0.f, 0.6f, 0.f);
glEnd();
}
bool available( const vector< GLuint >& queries )
{
for( size_t i = 0; i < queries.size(); ++i )
{
GLuint available = 0;
glGetQueryObjectuiv( queries[i], GL_QUERY_RESULT_AVAILABLE, &available );
if( GL_FALSE == available )
return false;
}
return true;
}
int main()
{
glfwInit();
GLFWwindow* window = glfwCreateWindow( 400, 400, "Simple example", NULL, NULL );
glfwMakeContextCurrent( window );
glewInit();
if( !glewIsSupported( "GL_VERSION_2_1" ) )
return -1;
if( !glewIsSupported( "GL_EXT_framebuffer_object" ) )
return -1;
GLuint fbo = 0;
glGenFramebuffersEXT( 1, &fbo );
glBindFramebufferEXT( GL_DRAW_FRAMEBUFFER_EXT, fbo );
GLuint rbo0 = 0;
glGenRenderbuffersEXT( 1, &rbo0 );
glBindRenderbufferEXT( GL_RENDERBUFFER_EXT, rbo0 );
glRenderbufferStorageEXT( GL_RENDERBUFFER_EXT, GL_RGBA, sz, sz );
glFramebufferRenderbufferEXT( GL_DRAW_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_RENDERBUFFER_EXT, rbo0 );
GLuint rbo1 = 0;
glGenRenderbuffersEXT( 1, &rbo1 );
glBindRenderbufferEXT( GL_RENDERBUFFER_EXT, rbo1 );
glRenderbufferStorageEXT( GL_RENDERBUFFER_EXT, GL_DEPTH_COMPONENT24, sz, sz );
glFramebufferRenderbufferEXT( GL_DRAW_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_RENDERBUFFER_EXT, rbo1 );
GLenum status = glCheckFramebufferStatusEXT( GL_FRAMEBUFFER_EXT );
if( status != GL_FRAMEBUFFER_COMPLETE_EXT )
return -1;
glBindFramebufferEXT( GL_FRAMEBUFFER_EXT, 0 );
vector< GLuint > queries( 10 );
glGenQueries( queries.size(), &queries[0] );
glBindFramebufferEXT( GL_FRAMEBUFFER_EXT, fbo );
for( size_t i = 0; i < queries.size(); ++i )
{
glBeginQuery( GL_SAMPLES_PASSED, queries[i] );
drawScene( i + 1 );
glEndQuery( GL_SAMPLES_PASSED );
}
glBindFramebufferEXT( GL_FRAMEBUFFER_EXT, 0 );
// wait for queries to become available
unsigned int cnt = 0;
while( !available( queries ) )
{
cnt++;
}
// all queries available, display query results
cout << "cnt: " << cnt << endl;
for( size_t i = 0; i < queries.size(); ++i )
{
GLuint samples = 0;
glGetQueryObjectuiv( queries[i], GL_QUERY_RESULT, &samples );
cout << i << ": " << samples << endl;
}
cout << endl;
glfwDestroyWindow( window );
glfwTerminate();
return 0;
}
Representative output on my system:
cnt: 1884
0: 157288
1: 157288
2: 157289
3: 157288
4: 157287
5: 157286
6: 157292
7: 157286
8: 157289
9: 157288

Related

How to draw points efficiently

My program receives PCL pointcloud and plot each point one by one using:
glBegin(GL_POINTS);
glVertex3f(point.x, point.y, point].z);
glEnd();
It works but due to the large number of points the program is pretty slow. Is there a more efficient way to do this?

Jam all the points into a big VBO when the point-cloud changes & draw 'em all in one go using a single glDrawArrays() call. That way OpenGL can shift all the vertex data to GPU once instead of you spoon-feeding the driver geometry one glVertex() at a time every frame.
Heck, even vertex arrays will buy you a huge speed-up by avoiding hundreds of thousands of function-calls into the GL driver.
EDIT: Comparison:
10 million random points, using vertex buffer objects:
Vertex arrays:
Display lists:
And using immediate-mode:
Code (hit 'n' to cycle between drawing methods):
// http://glew.sourceforge.net/
#include <GL/glew.h>
// http://freeglut.sourceforge.net/
#include <GL/freeglut.h>
// http://glm.g-truc.net/
#include <glm/glm.hpp>
#include <glm/gtc/random.hpp>
#include <glm/gtc/type_ptr.hpp>
#include <vector>
#include <sstream>
#include <chrono>
#include <cstddef>
struct Vertex
{
glm::vec4 pos;
glm::vec4 color;
};
std::vector< Vertex > verts;
GLuint vbo = 0;
GLuint dlist = 0;
void init()
{
// init geometry
for( size_t i = 0; i < 10000000; i++ )
{
Vertex vert;
vert.pos = glm::vec4( glm::linearRand( glm::vec3( -1.0f, -1.0f, -1.0f ), glm::vec3( 1.0f, 1.0f, 1.0f ) ), 1.0f );
vert.color = glm::vec4( glm::linearRand( glm::vec3( 0.00f, 0.0f, 0.0f ), glm::vec3( 1.0f, 1.0f, 1.0f ) ), 1.0f );
verts.push_back( vert );
}
// create display list
dlist = glGenLists( 1 );
glNewList( dlist, GL_COMPILE );
glBegin( GL_POINTS );
for( size_t i = 0; i < verts.size(); ++i )
{
glColor4fv( glm::value_ptr( verts[i].color) );
glVertex4fv( glm::value_ptr( verts[i].pos) );
}
glEnd();
glEndList();
// create VBO
glGenBuffers( 1, &vbo );
glBindBuffer( GL_ARRAY_BUFFER, vbo );
glBufferData( GL_ARRAY_BUFFER, sizeof( Vertex ) * verts.size(), verts.data(), GL_STATIC_DRAW );
}
unsigned int method = 0;
void keyboard( unsigned char key, int x, int y )
{
if( 'n' == key )
{
method++;
if( method > 3 ) method = 0;
}
}
void display()
{
// timekeeping
static std::chrono::steady_clock::time_point prv = std::chrono::steady_clock::now();
std::chrono::steady_clock::time_point cur = std::chrono::steady_clock::now();
const float dt = std::chrono::duration< float >( cur - prv ).count();
prv = cur;
glClearColor( 0, 0, 0, 1 );
glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );
glMatrixMode( GL_PROJECTION );
glLoadIdentity();
double w = glutGet( GLUT_WINDOW_WIDTH );
double h = glutGet( GLUT_WINDOW_HEIGHT );
gluPerspective( 60.0, w / h, 0.1, 10.0 );
glMatrixMode( GL_MODELVIEW );
glLoadIdentity();
gluLookAt( 2, 2, 2, 0, 0, 0, 0, 0, 1 );
static float angle = 0.0f;
angle += dt * 6.0f;
glRotatef( angle, 0, 0, 1 );
// render
switch( method )
{
case 0:
// VBO
glBindBuffer( GL_ARRAY_BUFFER, vbo );
glEnableClientState( GL_VERTEX_ARRAY );
glEnableClientState( GL_COLOR_ARRAY );
glVertexPointer( 4, GL_FLOAT, sizeof( Vertex ), (void*)offsetof( Vertex, pos ) );
glColorPointer( 4, GL_FLOAT, sizeof( Vertex ), (void*)offsetof( Vertex, color ) );
glDrawArrays( GL_POINTS, 0, verts.size() );
glDisableClientState( GL_VERTEX_ARRAY );
glDisableClientState( GL_COLOR_ARRAY );
glBindBuffer( GL_ARRAY_BUFFER, 0 );
break;
case 1:
// vertex array
glEnableClientState( GL_VERTEX_ARRAY );
glEnableClientState( GL_COLOR_ARRAY );
glVertexPointer( 4, GL_FLOAT, sizeof( Vertex ), &verts[0].pos );
glColorPointer( 4, GL_FLOAT, sizeof( Vertex ), &verts[0].color );
glDrawArrays( GL_POINTS, 0, verts.size() );
glDisableClientState( GL_VERTEX_ARRAY );
glDisableClientState( GL_COLOR_ARRAY );
break;
case 2:
// display list
glCallList( dlist );
break;
case 3:
// immediate mode
glBegin( GL_POINTS );
for( size_t i = 0; i < verts.size(); ++i )
{
glColor4fv( glm::value_ptr( verts[i].color) );
glVertex4fv( glm::value_ptr( verts[i].pos) );
}
glEnd();
break;
}
// info/frame time output
std::stringstream msg;
msg << "Using ";
switch( method )
{
case 0: msg << "vertex buffer object"; break;
case 1: msg << "vertex array"; break;
case 2: msg << "display list"; break;
case 3: msg << "immediate mode"; break;
}
msg << std::endl;
msg << "Frame time: " << (dt * 1000.0f) << " ms";
glColor3ub( 255, 255, 0 );
glWindowPos2i( 10, 25 );
glutBitmapString( GLUT_BITMAP_9_BY_15, (unsigned const char*)( msg.str().c_str() ) );
glutSwapBuffers();
}
int main(int argc, char **argv)
{
glutInit( &argc, argv );
glutInitDisplayMode( GLUT_RGBA | GLUT_DOUBLE );
glutInitWindowSize( 600, 600 );
glutCreateWindow( "GLUT" );
glewInit();
init();
glutDisplayFunc( display );
glutKeyboardFunc( keyboard );
glutIdleFunc( display );
glutMainLoop();
return 0;
}

Yes definitely, the code you are showing is from a quite old version of OpenGL.
In more recent versions you can pack your data together and send it to the GPU in one call. The code becomes a little bit more complex but it is worth it.
I suggest you to look at this website : https://learnopengl.com/
It gathers everything you need to start using modern opengl.
Hope it helped.

Load image with GDAL Libraries (VC++)

I have a problem when I try to load an image with GDAL Libraries, and implements it(image) to the OpenGL Control. The problem is on the color as you can see on the picture.
And this is the functions to generate texture from the image:
GLuint COpenGLControl::ReadGDALData(CString filename)
{
BYTE* tempReturn;
GLuint texture;
GDALDataset *poDataset;
GDALAllRegister();
poDataset = (GDALDataset *) GDALOpen((const char *)(CStringA)filename, GA_ReadOnly);
int Height = poDataset->GetRasterXSize(), Width = poDataset->GetRasterYSize();
LONG LineBytes = (Width*8+31)/32*4;
BYTE * pData = (BYTE *)new char[ LineBytes * Height * 3];
if (poDataset == NULL)
{
AfxMessageBox("Couldn't open selected file!");
return NULL;
}
nBands = poDataset->GetRasterCount();
GDALRasterBand **poBand;
poBand = new GDALRasterBand *[nBands];
if (poBand == NULL)
{
AfxMessageBox("Couldn't open the bands!", MB_ICONWARNING);
return NULL;
}
for (int i=0; i<nBands; i++)
{
poBand[i] = poDataset->GetRasterBand(i+1);
if (poBand[i] == NULL)
{
AfxMessageBox("Couldn't open selected bands", MB_ICONWARNING);
return NULL;
}
}
int BandChoice = 2;
nXsize = poBand[BandChoice]->GetXSize();
nYsize = poBand[BandChoice]->GetYSize();
if (BandChoice == 1)
{
poBandBlock_Gray = (BYTE*)CPLMalloc(sizeof(BYTE)*(nXsize*nYsize));
poBand[BandChoice]->RasterIO(GF_Read, 0, 0, nXsize, nYsize, poBandBlock_Gray, nXsize, nYsize, poBand[BandChoice]->GetRasterDataType(), 0, 0);
}
else
{
int nXsize_R, nXsize_G, nXsize_B;
int nYsize_R, nYsize_G, nYsize_B;
int BandChoiceR = 0;
int BandChoiceG = 1;
int BandChoiceB = 2;
nXsize_R = poBand[BandChoiceR]->GetXSize();
nXsize_G = poBand[BandChoiceG]->GetXSize();
nXsize_B = poBand[BandChoiceB]->GetXSize();
nYsize_R = poBand[BandChoiceR]->GetYSize();
nYsize_G = poBand[BandChoiceG]->GetYSize();
nYsize_B = poBand[BandChoiceB]->GetYSize();
nXsize = nXsize_R;
nYsize = nYsize_R;
poBandBlock_R = (BYTE*)CPLMalloc(sizeof(BYTE)*(nXsize_R*nYsize_R));
poBandBlock_G = (BYTE*)CPLMalloc(sizeof(BYTE)*(nXsize_G*nYsize_G));
poBandBlock_B = (BYTE*)CPLMalloc(sizeof(BYTE)*(nXsize_B*nYsize_B));
poBand[BandChoiceR]->RasterIO(GF_Read, 0, 0, nXsize_R, nYsize_R, poBandBlock_R, nXsize_R, nYsize_R, poBand[BandChoiceR]->GetRasterDataType(), 0, 0);
poBand[BandChoiceG]->RasterIO(GF_Read, 0, 0, nXsize_G, nYsize_G, poBandBlock_G, nXsize_G, nYsize_G, poBand[BandChoiceG]->GetRasterDataType(), 0, 0);
poBand[BandChoiceB]->RasterIO(GF_Read, 0, 0, nXsize_B, nYsize_B, poBandBlock_B, nXsize_B, nYsize_B, poBand[BandChoiceB]->GetRasterDataType(), 0, 0);
delete poDataset;
}
if (BandChoice == 1)
{
for ( int i=0; i < Height; i++)
{
for ( int j=0; j < Width; j++)
{
pData[(Height-i-1) * LineBytes + j] = poBandBlock_Gray[i*Width + j];
}
}
CPLFree(poBandBlock_Gray);
}
else
{
int j2 ;
for ( int i=0; i<Height; i++)
{
for ( int j=0, j2=0; j < Width, j2 < 3 * Width; j++, j2+=3)
{
pData[(Height-i-1)*LineBytes + j2+2] = poBandBlock_R[i*Width + j];
pData[(Height-i-1)*LineBytes + j2+1] = poBandBlock_G[i*Width + j];
pData[(Height-i-1)*LineBytes + j2] = poBandBlock_B[i*Width + j];
}
}
CPLFree(poBandBlock_B);
CPLFree(poBandBlock_R);
CPLFree(poBandBlock_G);
}
// allocate a texture name
glGenTextures( 1, &texture );
// select our current texture
glBindTexture( GL_TEXTURE_2D, texture );
// select modulate to mix texture with color for shading
glTexEnvf( GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE );
// when texture area is small, bilinear filter the closest mipmap
glTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER,
GL_LINEAR_MIPMAP_NEAREST );
// when texture area is large, bilinear filter the first mipmap
glTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
// if wrap is true, the texture wraps over at the edges (repeat)
// ... false, the texture ends at the edges (clamp)
glTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, FALSE );
glTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, FALSE );
// build our texture mipmaps
gluBuild2DMipmaps( GL_TEXTURE_2D, 3, Width, Height, GL_RGB, GL_UNSIGNED_BYTE, pData );
// free buffer
free( pData );
return texture;
}
This is the Draw function:
void COpenGLControl::OnDraw(CDC *pDC)
{
// TODO: Camera controls
wglMakeCurrent(hdc,hrc);
// Set color to use when clearing the background.
glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
glClearDepth(1.0f);
// Turn on backface culling
glFrontFace(GL_CCW);
glCullFace(GL_FRONT_AND_BACK);
// Turn on depth testing
glEnable(GL_DEPTH_TEST);
glDepthFunc(GL_LEQUAL);
glClear (GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // Clear all objects
glEnable( GL_TEXTURE_2D ); // enable texture for 2 dimensions
glPushMatrix();
if (filename.IsEmpty() == false)
{
imgData = ReadGDALData( filename );
glBindTexture( GL_TEXTURE_2D, imgData );
glClear (GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // Clear all objects
glLoadIdentity ();
gluLookAt (0,0,1,0,0,0,0,1,0);
glTranslatef (m_fPosX, m_fPosY, 0.0f);
glScalef (m_fZoom,m_fZoom,1.0);
glBegin( GL_QUADS ); // apply loaded texture to viewport
glTexCoord2d(0.0,0.0); glVertex2d(-1.0,-1.0);
glTexCoord2d(1.0,0.0); glVertex2d(+1.0,-1.0);
glTexCoord2d(1.0,1.0); glVertex2d(+1.0,+1.0);
glTexCoord2d(0.0,1.0); glVertex2d(-1.0,+1.0);
glEnd();
}
glPopMatrix();
glDisable( GL_TEXTURE_2D );
glFlush();
// Swap buffers
SwapBuffers(hdc);
wglMakeCurrent(NULL, NULL);
}

The problem is not so much in the color, but (from what I can tell from the sample) in the way your data is packed. Look into what byte ordering / row padding / color packing your OpenGL buffer expects, and in your GDAL loader what it provides. Just a hunch here, but it seems like your OpenGL expects a 4th (alpha) component in your RGB structs, but your GDAL code doesn't supply that. Also your GDAL loader aligns on 32 bit boundaries it seems, check if your OpenGL texture calls require that, too. Did you copy/paste the GDAL loader from a sample where somebody uses it to draw with BitBlt()? It looks that way.

Is It More Efficient to Use GL_TRIANGLE_STRIP or Indexed GL_TRIANGLES to a Draw a Dynamic Number of Quads

I'm developing a simple sprite-based 2D game in C++ that uses OpenGL for hardware-accelerated rendering, and SDL for window management and user input handling. Since it's a 2D game, I'm only ever going to need to draw quads, but because the number of sprites is dynamic, I can never rely on there being a constant number of quads. Consequently, I need to rebuffer all of the vertex data via my VBO each frame (since there may be more or fewer quads than there were in the last frame, and thus the buffer may be a different size).
The prototype program I have so far creates a window and allows the user to add and remove quads in a diagonal row by using the up and down arrow keys. Right now the quads I'm drawing are simple, untextured white squares. Here is the code I'm working with (compiles and works correctly under OS X 10.6.8 and Ubuntu 12.04 with OpenGL 2.1):
#if defined(__APPLE__)
#include <OpenGL/OpenGL.h>
#endif
#if defined(__linux__)
#define GL_GLEXT_PROTOTYPES
#include <GL/glx.h>
#endif
#include <GL/gl.h>
#include <SDL.h>
#include <iostream>
#include <vector>
#include <string>
struct Vertex
{
//vertex coordinates
GLint x;
GLint y;
};
//Constants
const int SCREEN_WIDTH = 1024;
const int SCREEN_HEIGHT = 768;
const int FPS = 60; //our framerate
//Globals
SDL_Surface *screen; //the screen
std::vector<Vertex> vertices; //the actual vertices for the quads
std::vector<GLint> startingElements; //the index where the 4 vertices of each quad begin in the 'vertices' vector
std::vector<GLint> counts; //the number of vertices for each quad
GLuint VBO = 0; //the handle to the vertex buffer
void createVertex(GLint x, GLint y)
{
Vertex vertex;
vertex.x = x;
vertex.y = y;
vertices.push_back(vertex);
}
//creates a quad at position x,y, with a width of w and a height of h (in pixels)
void createQuad(GLint x, GLint y, GLint w, GLint h)
{
//Since we're drawing the quads using GL_TRIANGLE_STRIP, the vertex drawing
//order is from top to bottom, left to right, like so:
//
// 1-----3
// | |
// | |
// 2-----4
createVertex(x, y); //top-left vertex
createVertex(x, y+h); //bottom-left vertex
createVertex(x+w, y); //top-right vertex
createVertex(x+w, y+h); //bottom-right vertex
counts.push_back(4); //each quad will always have exactly 4 vertices
startingElements.push_back(startingElements.size()*4);
std::cout << "Number of Quads: " << counts.size() << std::endl; //print out the current number of quads
}
//removes the most recently created quad
void removeQuad()
{
if (counts.size() > 0) //we don't want to remove a quad if there aren't any to remove
{
for (int i=0; i<4; i++)
{
vertices.pop_back();
}
startingElements.pop_back();
counts.pop_back();
std::cout << "Number of Quads: " << counts.size() << std::endl;
}
else
{
std::cout << "Sorry, you can't remove a quad if there are no quads to remove!" << std::endl;
}
}
void init()
{
//initialize SDL
SDL_Init(SDL_INIT_VIDEO | SDL_INIT_TIMER);
screen = SDL_SetVideoMode(SCREEN_WIDTH, SCREEN_HEIGHT, 0, SDL_OPENGL);
#if defined(__APPLE__)
//Enable vsync so that we don't get tearing when rendering
GLint swapInterval = 1;
CGLSetParameter(CGLGetCurrentContext(), kCGLCPSwapInterval, &swapInterval);
#endif
//Disable depth testing, lighting, and dithering, since we're going to be doing 2D rendering only
glDisable(GL_DEPTH_TEST);
glDisable(GL_LIGHTING);
glDisable(GL_DITHER);
glPushAttrib(GL_DEPTH_BUFFER_BIT | GL_LIGHTING_BIT);
//Set the projection matrix
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glOrtho(0, SCREEN_WIDTH, SCREEN_HEIGHT, 0, -1.0, 1.0);
//Set the modelview matrix
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
//Create VBO
glGenBuffers(1, &VBO);
glBindBuffer(GL_ARRAY_BUFFER, VBO);
}
void gameLoop()
{
int frameDuration = 1000/FPS; //the set duration (in milliseconds) of a single frame
int currentTicks;
int pastTicks = SDL_GetTicks();
bool done = false;
SDL_Event event;
while(!done)
{
//handle user input
while(SDL_PollEvent(&event))
{
switch(event.type)
{
case SDL_KEYDOWN:
switch (event.key.keysym.sym)
{
case SDLK_UP: //create a new quad every time the up arrow key is pressed
createQuad(64*counts.size(), 64*counts.size(), 64, 64);
break;
case SDLK_DOWN: //remove the most recently created quad every time the down arrow key is pressed
removeQuad();
break;
default:
break;
}
break;
case SDL_QUIT:
done = true;
break;
default:
break;
}
}
//Clear the color buffer
glClear(GL_COLOR_BUFFER_BIT);
glBindBuffer(GL_ARRAY_BUFFER, VBO);
//replace the current contents of the VBO with a completely new set of data (possibly including either more or fewer quads)
glBufferData(GL_ARRAY_BUFFER, vertices.size()*sizeof(Vertex), &vertices.front(), GL_DYNAMIC_DRAW);
glEnableClientState(GL_VERTEX_ARRAY);
//Set vertex data
glVertexPointer(2, GL_INT, sizeof(Vertex), 0);
//Draw the quads
glMultiDrawArrays(GL_TRIANGLE_STRIP, &startingElements.front(), &counts.front(), counts.size());
glDisableClientState(GL_VERTEX_ARRAY);
glBindBuffer(GL_ARRAY_BUFFER, 0);
//Check to see if we need to delay the duration of the current frame to match the set framerate
currentTicks = SDL_GetTicks();
int currentDuration = (currentTicks - pastTicks); //the duration of the frame so far
if (currentDuration < frameDuration)
{
SDL_Delay(frameDuration - currentDuration);
}
pastTicks = SDL_GetTicks();
// flip the buffers
SDL_GL_SwapBuffers();
}
}
void cleanUp()
{
glDeleteBuffers(1, &VBO);
SDL_FreeSurface(screen);
SDL_Quit();
}
int main(int argc, char *argv[])
{
std::cout << "To create a quad, press the up arrow. To remove the most recently created quad, press the down arrow." << std::endl;
init();
gameLoop();
cleanUp();
return 0;
}
At the moment I'm using GL_TRIANGLE_STRIPS with glMultiDrawArrays() to render my quads. This works, and seems do be pretty decent in terms of performance, but I have to wonder whether using GL_TRIANGLES in conjunction with an IBO to avoid duplicate vertices would be a more efficient way to render? I've done some research, and some people suggest that indexed GL_TRIANGLES generally outperform GL_TRIANGLE_STRIPS, but they also seem to assume that the number of quads would remain constant, and thus the size of the VBO and IBO would not have to be rebuffered each frame. That's my biggest hesitation with indexed GL_TRIANGLES: if I did implement indexed GL_TRIANGLES, I would have to rebuffer the entire index buffer each frame in addition to rebuffering the entire VBO each frame, again because of the dynamic number of quads.
So basically, my question is this: Given that I have to rebuffer all of my vertex data to the GPU each frame due to the dynamic number of quads, would it be more efficient to switch to indexed GL_TRIANGLES to draw the quads, or should I stick with my current GL_TRIANGLE_STRIP implementation?

You'll probably be fine using un-indexed GL_QUADS/GL_TRIANGLES and a glDrawArrays() call.
SDL_Surface *screen;
...
screen = SDL_SetVideoMode(SCREEN_WIDTH, SCREEN_HEIGHT, 0, SDL_OPENGL);
...
SDL_FreeSurface(screen);
Don't do that:
The returned surface is freed by SDL_Quit and must not be freed by the caller. This rule also includes consecutive calls to SDL_SetVideoMode (i.e. resize or resolution change) because the existing surface will be released automatically.
EDIT: Simple vertex array demo:
// g++ main.cpp -lglut -lGL
#include <GL/glut.h>
#include <vector>
using namespace std;
// OpenGL Mathematics (GLM): http://glm.g-truc.net/
#include <glm/glm.hpp>
#include <glm/gtc/random.hpp>
using namespace glm;
struct SpriteWrangler
{
SpriteWrangler( unsigned int aSpriteCount )
{
verts.resize( aSpriteCount * 6 );
states.resize( aSpriteCount );
for( size_t i = 0; i < states.size(); ++i )
{
states[i].pos = linearRand( vec2( -400, -400 ), vec2( 400, 400 ) );
states[i].vel = linearRand( vec2( -30, -30 ), vec2( 30, 30 ) );
Vertex vert;
vert.r = (unsigned char)linearRand( 64.0f, 255.0f );
vert.g = (unsigned char)linearRand( 64.0f, 255.0f );
vert.b = (unsigned char)linearRand( 64.0f, 255.0f );
vert.a = 255;
verts[i*6 + 0] = verts[i*6 + 1] = verts[i*6 + 2] =
verts[i*6 + 3] = verts[i*6 + 4] = verts[i*6 + 5] = vert;
}
}
void wrap( const float minVal, float& val, const float maxVal )
{
if( val < minVal )
val = maxVal - fmod( maxVal - val, maxVal - minVal );
else
val = minVal + fmod( val - minVal, maxVal - minVal );
}
void Update( float dt )
{
for( size_t i = 0; i < states.size(); ++i )
{
states[i].pos += states[i].vel * dt;
wrap( -400.0f, states[i].pos.x, 400.0f );
wrap( -400.0f, states[i].pos.y, 400.0f );
float size = 20.0f;
verts[i*6 + 0].pos = states[i].pos + vec2( -size, -size );
verts[i*6 + 1].pos = states[i].pos + vec2( size, -size );
verts[i*6 + 2].pos = states[i].pos + vec2( size, size );
verts[i*6 + 3].pos = states[i].pos + vec2( size, size );
verts[i*6 + 4].pos = states[i].pos + vec2( -size, size );
verts[i*6 + 5].pos = states[i].pos + vec2( -size, -size );
}
}
struct Vertex
{
vec2 pos;
unsigned char r, g, b, a;
};
struct State
{
vec2 pos;
vec2 vel; // units per second
};
vector< Vertex > verts;
vector< State > states;
};
void display()
{
// timekeeping
static int prvTime = glutGet(GLUT_ELAPSED_TIME);
const int curTime = glutGet(GLUT_ELAPSED_TIME);
const float dt = ( curTime - prvTime ) / 1000.0f;
prvTime = curTime;
// sprite updates
static SpriteWrangler wrangler( 2000 );
wrangler.Update( dt );
vector< SpriteWrangler::Vertex >& verts = wrangler.verts;
glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );
// set up projection and camera
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
double w = glutGet( GLUT_WINDOW_WIDTH );
double h = glutGet( GLUT_WINDOW_HEIGHT );
double ar = w / h;
glOrtho( -400 * ar, 400 * ar, -400, 400, -1, 1);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glEnableClientState( GL_VERTEX_ARRAY );
glEnableClientState( GL_COLOR_ARRAY );
glVertexPointer( 2, GL_FLOAT, sizeof( SpriteWrangler::Vertex ), &verts[0].pos.x );
glColorPointer( 4, GL_UNSIGNED_BYTE, sizeof( SpriteWrangler::Vertex ), &verts[0].r );
glDrawArrays( GL_TRIANGLES, 0, verts.size() );
glDisableClientState( GL_VERTEX_ARRAY );
glDisableClientState( GL_COLOR_ARRAY );
glutSwapBuffers();
}
// run display() every 16ms or so
void timer( int extra )
{
glutTimerFunc( 16, timer, 0 );
glutPostRedisplay();
}
int main(int argc, char **argv)
{
glutInit( &argc, argv );
glutInitWindowSize( 600, 600 );
glutInitDisplayMode( GLUT_RGBA | GLUT_DEPTH | GLUT_DOUBLE );
glutCreateWindow( "Sprites" );
glutDisplayFunc( display );
glutTimerFunc( 0, timer, 0 );
glutMainLoop();
return 0;
}
You can get decent performance with just vertex arrays.
Ideally most/all of your dts should be <= 16 milliseconds.

Tile Map Usage Much CPU With OpenGL and SDL

I been working in a method to draw a map based on tiles with OpenGL and SDL. And I finally coded but when I execute the basic program where it draw a tile map of 25x16, and I check the use of CPU, it says that consume 25% but without drawing the map consume by much 1% of CPU.
So exists another method to draw the map or why is the use of CPU so high.
This is the code for drawing the map.
void CMapManager::drawMap(Map *map)
{
vector<ImagePtr> tempImages = CGameApplication::getInstance()->getGameApp()->getImages();
GLuint texture = tempImages.at(1)->getTexture();
glColor3f(1.0f, 1.0f, 1.0f);
glEnable(GL_BLEND);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
glBindTexture( GL_TEXTURE_2D, texture );
glBegin( GL_QUADS );
for (int i = 0; i < map->getHeight(); i++)
{
for (int j = 0; j < map->getWidth(); j++)
{
ImagePtr imgDraw = tempImages.at(map->getMapTiles()[i][j]->getTypeTile());
glTexCoord2i( 0, 0 );
glVertex3f( imgDraw->getPosX() + (imgDraw->getWidth()*j), imgDraw->getPosY() + (imgDraw->getHeight()*i), 0.f );
//Bottom-left vertex (corner)
glTexCoord2i( 1, 0 );
glVertex3f( imgDraw->getOffsetX() + (imgDraw->getWidth()*j), imgDraw->getPosY() + (imgDraw->getHeight()*i), 0.f );
//Bottom-right vertex (corner)
glTexCoord2i( 1, 1 );
glVertex3f( imgDraw->getOffsetX() + (imgDraw->getWidth()*j), imgDraw->getOffsetY() + (imgDraw->getHeight()*i), 0.f );
//Top-right vertex (corner)
glTexCoord2i( 0, 1 );
glVertex3f( imgDraw->getPosX() + (imgDraw->getWidth()*j), imgDraw->getOffsetY() + (imgDraw->getHeight()*i), 0.f );
}
}
glEnd();
glDisable(GL_BLEND);
}
And in this method I call the function:
void CGameApplication::renderApplication()
{
glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );
glEnable(GL_TEXTURE_2D);
vector<ImagePtr> tempImages = GApp->getImages();
vector<ImagePtr>::iterator iterImage;
for (iterImage = tempImages.begin(); iterImage != tempImages.end(); ++iterImage)
{
CImageM->drawSprites( (*iterImage)->getTexture(), (*iterImage)->getPosX(), (*iterImage)->getPosY(),
(*iterImage)->getOffsetX(), (*iterImage)->getOffsetY() );
}
vector<TextPtr> tempTexts = GApp->getTexts();
vector<TextPtr>::iterator iterText;
for (iterText = tempTexts.begin(); iterText != tempTexts.end(); ++iterText)
{
CTextM->drawFonts( (*iterText) );
}
CMapM->drawMap(GApp->getCurrentMap());
glDisable(GL_TEXTURE_2D);
}
I already set a Timer that after this function:
GameApplication->getCKeyboardHandler()->inputLogic();
GameApplication->renderApplication();
SDL_GL_SwapBuffers();
GameApplication->getGameApp()->getTimer()->delay();
And the delay function is:
void Timer::delay()
{
if( this->getTicks() < 1000 / FRAMES_PER_SECOND )
{
SDL_Delay( ( 1000 / FRAMES_PER_SECOND ) - this->getTicks() );
}
}
The const FRAMES_PER_SECOND it's 5 in this moment.
And the function for convert image to GL texture is:
GLuint CImageManager::imageToGLTexture(std::string name)
{
GLuint texture;
SDL_Surface *surface;
GLenum texture_format;
GLint nOfColors;
if ( (surface = IMG_Load(name.c_str())) ) {
// Check that the image's width is a power of 2
if ( (surface->w & (surface->w - 1)) != 0 ) {
printf("warning: image.bmp's width is not a power of 2\n");
}
// Also check if the height is a power of 2
if ( (surface->h & (surface->h - 1)) != 0 ) {
printf("warning: image.bmp's height is not a power of 2\n");
}
// get the number of channels in the SDL surface
nOfColors = surface->format->BytesPerPixel;
if (nOfColors == 4) // contains an alpha channel
{
if (surface->format->Rmask == 0x000000ff)
texture_format = GL_RGBA;
else
texture_format = GL_BGRA_EXT;
}
else if (nOfColors == 3) // no alpha channel
{
if (surface->format->Rmask == 0x000000ff)
texture_format = GL_RGB;
else
texture_format = GL_BGR_EXT;
}
else {
printf("warning: the image is not truecolor.. this will probably break\n");
// this error should not go unhandled
}
SDL_SetAlpha(surface, 0, 0);
// Have OpenGL generate a texture object handle for us
glGenTextures( 1, &texture );
// Bind the texture object
glBindTexture( GL_TEXTURE_2D, texture );
// Set the texture's stretching properties
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR );
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
// Edit the texture object's image data using the information SDL_Surface gives us
glTexImage2D( GL_TEXTURE_2D, 0, nOfColors, surface->w, surface->h, 0,
texture_format, GL_UNSIGNED_BYTE, surface->pixels );
}
else {
printf("SDL could not load the image: %s\n", SDL_GetError());
SDL_Quit();
exit(1);
}
if ( surface ) {
SDL_FreeSurface( surface );
}
return texture;
}
Thanks before hand for the help.

After all, avoid state changes. Combine all your tiles into one texture and render using only one glBegin/glEnd block.
If you don't want to make many changes try display lists. OpenGL will be able to optimize your calls but there is no guarantee it will run much faster.
If your map doesn't change a lot use VBOs. It's the fastest way.

glReadPixels GL_DEPTH_COMPONENT and color

How to get the depth and the color information out of any OpenGL drawing? I would like to save a depth image and a color image to the disk. What i tried is the following:
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glEnable(GL_DEPTH_TEST);
glBegin(GL_POINTS);
glColor3f(1.0f,1.0f,1.0f);
for(int i=0; i<mesh->vertices.size();i++) {
if(! mesh->colors.empty()) {
glColor3f(mesh->colors[i][0],mesh->colors[i][1],mesh->colors[i][2]);
}
float x= mesh->vertices[i][0];
float y= mesh->vertices[i][1];
float z = mesh->vertices[i][2];
glVertex3f(x, y, z);
}
glEnd();
glFlush();
glFinish();
int width = 1280;
int height = 960;
GLfloat* depths;
depths = new GLfloat[ width * height ];
GLfloat * color;
color = new GLfloat[width * height];
glReadPixels (0, 0, width, height, GL_DEPTH_COMPONENT, GL_FLOAT, depths);
glReadPixels (0, 0, width, height, GL_BLUE, GL_FLOAT, color);
But it looks like only the depths array is filled?

For saving the render result in image, you must save colorbuffer information (not directly from depth buffer).
You can provide separate passes for color (to colorbuffer) and depth to same colorbuffer. And simple use glReadPixels two times, first after rendering color to colorbuffer and second after rendering depth in colorbuffer.
For write color and depth simultaneously in two separate color buffers by one pass you can use MRT ( Multiple Render Targets ), tutorial - http://www.opengl-tutorial.org/intermediate-tutorials/tutorial-14-render-to-texture/ .
I would choose MRT. :) After that you can save your results by using glReadPixels like in two passes technic.
But first you must setup from which colorbuffer you want read pixels by using glReadBuffer, default colorbuffer is GL_BACK, which mean default OpenGL context backbuffer. By using MRT you must use one of GL_COLOR_ATTACHMENTi for write in to colorbuffers and it also can be one of glReadBuffer value.
So, just simple setup glReadBuffer with one of GL_COLOR_ATTACHMENTi and use glReadPixels.

Try this:
#include <GL/freeglut.h>
#include <vector>
#include <sstream>
int mx = 0, my = 0;
void passiveMotion( int x, int y )
{
mx = x;
my = glutGet( GLUT_WINDOW_HEIGHT ) - y;
glutPostRedisplay();
}
void display()
{
glEnable(GL_DEPTH_TEST);
glClearColor( 0, 0, 0, 1 );
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
const int w = glutGet( GLUT_WINDOW_WIDTH );
const int h = glutGet( GLUT_WINDOW_HEIGHT );
const double ar = (double)w / (double)h;
glOrtho( -10 * ar, 10 * ar, -10, 10, -10, 10 );
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glColor3ub(0,255,0);
glPushMatrix();
glTranslated(2,2,-5);
glScalef(5,5,5);
glBegin(GL_QUADS);
glVertex2f(-1,-1);
glVertex2f(1,-1);
glVertex2f(1,1);
glVertex2f(-1,1);
glEnd();
glPopMatrix();
glColor3ub(255,0,0);
glPushMatrix();
glTranslated(0,0,0);
glScalef(5,5,5);
glBegin(GL_QUADS);
glVertex2f(-1,-1);
glVertex2f(1,-1);
glVertex2f(1,1);
glVertex2f(-1,1);
glEnd();
glPopMatrix();
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glOrtho( 0, w, 0, h, -1, 1 );
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
// print depth
{
GLfloat depth = 0.0f;
glReadPixels( mx, my, 1, 1, GL_DEPTH_COMPONENT, GL_FLOAT, &depth );
std::ostringstream oss;
oss << "Depth: " << depth;
glColor3ub( 255, 255, 255 );
glRasterPos2i( 10, 10 );
glutBitmapString( GLUT_BITMAP_9_BY_15, (const unsigned char*)oss.str().c_str() );
}
// print color
{
GLubyte color[4];
glReadPixels( mx, my, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, color );
std::ostringstream oss;
oss << "Color:"
<< " " << (unsigned int)color[0]
<< " " << (unsigned int)color[1]
<< " " << (unsigned int)color[2]
<< " " << (unsigned int)color[3];
glColor3ub( 255, 255, 255 );
glRasterPos2i( 10, 25 );
glutBitmapString( GLUT_BITMAP_9_BY_15, (const unsigned char*)oss.str().c_str() );
}
glutSwapBuffers();
}
int main( int argc, char** argv )
{
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_RGBA | GLUT_DEPTH | GLUT_DOUBLE);
glutInitWindowSize(400,400);
glutCreateWindow("GLUT");
glutDisplayFunc( display );
glutPassiveMotionFunc( passiveMotion );
glutMainLoop();
return 0;
}

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

Follow up: Asynchronous off-screen query performance - c++

Related

How to draw points efficiently

Load image with GDAL Libraries (VC++)

Is It More Efficient to Use GL_TRIANGLE_STRIP or Indexed GL_TRIANGLES to a Draw a Dynamic Number of Quads

Tile Map Usage Much CPU With OpenGL and SDL

glReadPixels GL_DEPTH_COMPONENT and color

Categories

Resources