Slow stencil texture on AMD - opengl

I'm trying to add soft shadows to a modified Doom3 engine using FBO + stencil texture attachment that I bind and use in the light interaction fragment shader.
It works good enough, but there's a serious performance problem on a Radeon 460 (I don't have other AMD GPU's but suspect it's same or worse since it's relatively new).
I'm on the latest drivers.
The fps drop is so bad that it's actually faster to do qglCopyTexImage2D to another texture (per each light!) than bind the stencil texture used in FBO.
Another problem is that when I try to optimize qglCopyTexImage2D with qglCopyTexSubImage2D it's starting to flicker.
Any real-use advice on stencil texture from fellow programmers?
Both nVidia and Intel appear to perform well in regard of speed here.
globalImages->currentRenderImage->Bind();
globalImages->currentRenderImage->uploadWidth = curWidth; // used as a shader param
globalImages->currentRenderImage->uploadHeight = curHeight;
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
qglTexImage2D( GL_TEXTURE_2D, 0, r_fboColorBits.GetInteger() == 15 ? GL_RGB5_A1 : GL_RGBA, curWidth, curHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL ); //NULL means reserve texture memory, but texels are undefined
globalImages->currentRenderFbo->Bind();
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
qglTexImage2D( GL_TEXTURE_2D, 0, r_fboColorBits.GetInteger() == 15 ? GL_RGB5_A1 : GL_RGBA, curWidth, curHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL ); //NULL means reserve texture memory, but texels are undefined
if ( glConfig.vendor != glvAny ) {
globalImages->currentStencilFbo->Bind();
globalImages->currentStencilFbo->uploadWidth = curWidth;
globalImages->currentStencilFbo->uploadHeight = curHeight;
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
qglTexImage2D( GL_TEXTURE_2D, 0, GL_STENCIL_INDEX8, curWidth, curHeight, 0, GL_STENCIL_INDEX, GL_UNSIGNED_BYTE, 0 );
}
globalImages->currentDepthImage->Bind();
globalImages->currentDepthImage->uploadWidth = curWidth; // used as a shader param
globalImages->currentDepthImage->uploadHeight = curHeight;
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
if ( glConfig.vendor == glvIntel ) { // FIXME allow 24-bit depth for low-res monitors
qglTexImage2D( GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT16, curWidth, curHeight, 0, GL_DEPTH_COMPONENT, GL_FLOAT, 0 );
} else {
qglTexImage2D( GL_TEXTURE_2D, 0, GL_DEPTH_STENCIL, curWidth, curHeight, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, 0 );
}
}
// (re-)attach textures to FBO
if ( !fboId || r_fboSharedColor.IsModified() || r_fboSharedDepth.IsModified() ) {
// create a framebuffer object, you need to delete them when program exits.
if ( !fboId )
qglGenFramebuffers( 1, &fboId );
qglBindFramebuffer( GL_FRAMEBUFFER_EXT, fboId );
// attach a texture to FBO color attachement point
qglFramebufferTexture2D( GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, globalImages->currentRenderImage->texnum, 0 );
// attach a renderbuffer to depth attachment point
GLuint depthTex = r_fboSharedDepth.GetBool() ? globalImages->currentDepthImage->texnum : globalImages->currentDepthFbo->texnum;
qglFramebufferTexture2D( GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depthTex, 0 );
if ( glConfig.vendor == glvIntel ) // separate stencil, thank God
qglFramebufferTexture2D( GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, globalImages->currentStencilFbo->texnum, 0 );
else
qglFramebufferTexture2D( GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, depthTex, 0 );
int status = qglCheckFramebufferStatus( GL_FRAMEBUFFER );
if ( GL_FRAMEBUFFER_COMPLETE != status ) { // something went wrong, fall back to default
common->Printf( "glCheckFramebufferStatus %d\n", status );
qglDeleteFramebuffers( 1, &fboId );
fboId = 0; // try from scratch next time
r_useFbo.SetBool( false );
}
qglBindFramebuffer( GL_FRAMEBUFFER, 0 ); // not obvious, but let it be
}
qglBindFramebuffer( GL_FRAMEBUFFER, fboId );
qglClear( GL_COLOR_BUFFER_BIT ); // otherwise transparent skybox blends with previous frame
fboUsed = true;
GL_CheckErrors();
}
/*
Soft shadows vendor specific implementation
Intel: separate stencil, direct access, fastest
nVidia: combined stencil & depth, direct access, fast
AMD: combined stencil & depth, direct access very slow, resorting to stencil copy
*/
void FB_CopyStencil() { // duzenko: why, AMD? WHY??
if ( glConfig.vendor != glvAMD || !r_softShadows.GetBool() )
return;
globalImages->currentStencilFbo->Bind();
qglCopyTexImage2D( GL_TEXTURE_2D, 0, GL_DEPTH_STENCIL, 0, 0, glConfig.vidWidth, glConfig.vidHeight, 0 );
/*globalImages->currentDepthFbo->Bind();
idScreenRect& r = backEnd.currentScissor;
//qglCopyTexSubImage2D( GL_TEXTURE_2D, 0, r.x1, r.y1, r.x1, r.y1, r.x2 - r.x1 + 1, r.y2 - r.y1 + 1 );*/
GL_CheckErrors();
}
void FB_BindStencilTexture() {
const GLenum GL_DEPTH_STENCIL_TEXTURE_MODE = 0x90EA;
idImage* stencil = glConfig.vendor != glvAny ? globalImages->currentStencilFbo : globalImages->currentDepthImage;
stencil->Bind();
if ( glConfig.vendor != glvIntel )
glTexParameteri( GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX );
}

I ended up with two framebuffers: one for shadows only and the other for everything else.
The shadow texture is an FBO attachment in the former and bound as texture2D in the latter.

Related

Why is this framebuffer incomplete? (EXT_framebuffer_object)

This code works on my fancy, semi-new Nvidia computer, but the framebuffer is incomplete when run with an Intel GPU with drivers that are probably a few years old:
GLuint fbo = 0;
glGenFramebuffersEXT( 1, &fbo );
GLuint tex = 0;
glGenTextures( 1, &tex );
glBindTexture( GL_TEXTURE_2D, tex );
glTexImage2D( GL_TEXTURE_2D, 0, GL_RGB, 320, 240, 0, GL_RGB, GL_UNSIGNED_BYTE, nullptr );
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
glBindFramebufferEXT( GL_FRAMEBUFFER_EXT, fbo );
glFramebufferTexture2DEXT( GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_2D, tex, 0 );
GLenum attachment = GL_COLOR_ATTACHMENT0_EXT;
glDrawBuffers( 1, &attachment );
GLenum status = glCheckFramebufferStatusEXT( GL_FRAMEBUFFER_EXT );
// status is GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER_EXT
The Intel one apparently supports this extension (GLEW_EXT_framebuffer_object is true). Its version string is "2.1.0 - Build 8.15.10.2086".
Is it a driver bug, or am I doing something wrong? What can I do about it other than tell people to update their drivers or buy something better?

Multisampling using FBO

i have implemented the code for multisampling using FBO but it doesnt work.
glGenTextures( text_num1, tex_image );
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
glBindTexture( GL_TEXTURE_2D, tex_image[0] );
glTexImage2D(GL_TEXTURE_2D,0,GL_RGBA,texture_width,texture_height,0,GL_RGBA,GL_UNSIGNED_BYTE, img1[0].imagedata);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
// glBindTexture( GL_TEXTURE_2D, 0 );
GLint maxSamples;
glGetIntegerv(GL_MAX_SAMPLES_EXT, &maxSamples);
glGenFramebuffers(1, &fboID);
glGenRenderbuffers(1, &colorBufID);
glBindRenderbuffer(GL_RENDERBUFFER, colorBufID);
glRenderbufferStorageMultisample(GL_RENDERBUFFER, maxSamples, GL_RGBA,TEST_2D_3D_SRC_WIDTH ,TEST_2D_3D_SRC_HEIGHT );
//glBindRenderbuffer( GL_RENDERBUFFER, 0 );
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, colorBufID);
// glBindFramebuffer(GL_FRAMEBUFFER, 0);
glGenRenderbuffers(1,&defaultDepthBuffer);
glBindRenderbuffer(GL_RENDERBUFFER, defaultDepthBuffer);
glRenderbufferStorageMultisample(GL_RENDERBUFFER,maxSamples, GL_DEPTH_COMPONENT, TEST_2D_3D_SRC_WIDTH, TEST_2D_3D_SRC_HEIGHT);
glFramebufferRenderbuffer( GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, defaultDepthBuffer );
GLenum status = glCheckFramebufferStatus( GL_FRAMEBUFFER );
glBindFramebuffer(GL_FRAMEBUFFER, fboID);
glGenFramebuffers(1, &frameBufID);
glBindFramebuffer(GL_FRAMEBUFFER, frameBufID);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, tex_image[0], 0);
GLenum DrawBuffers[1] = {GL_COLOR_ATTACHMENT0};
glDrawBuffers(1, DrawBuffers);
glActiveTexture( GL_TEXTURE0 );
glBindTexture( GL_TEXTURE_2D, tex_image[0] );
glTexImage2D(GL_TEXTURE_2D,0,GL_RGBA,texture_width,texture_height,0,GL_RGBA,GL_UNSIGNED_BYTE,img1[0].imagedata);
glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, indices);
glBindFramebuffer( GL_FRAMEBUFFER, 0 );
glBindFramebuffer( GL_READ_FRAMEBUFFER, fboID );
glBindFramebuffer( GL_DRAW_FRAMEBUFFER, frameBufID );
glBlitFramebuffer( 0, 0, TEST_2D_3D_SRC_WIDTH, TEST_2D_3D_SRC_HEIGHT, 0, 0, TEST_2D_3D_SRC_WIDTH, TEST_2D_3D_SRC_HEIGHT, GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT, GL_NEAREST );
glBindFramebuffer( GL_READ_FRAMEBUFFER, 0 );
glBindFramebuffer( GL_DRAW_FRAMEBUFFER, 0 );
eglSwapBuffers(sEGLDisplay, sEGLSurface);
You are currently drawing into frameBufID instead of fboID.
To fix this problem, move this line:
glBindFramebuffer(GL_FRAMEBUFFER, fboID);
Here:
glGenFramebuffers(1, &frameBufID);
glBindFramebuffer(GL_FRAMEBUFFER, frameBufID);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, tex_image[0], 0);
glBindFramebuffer(GL_FRAMEBUFFER, fboID);
Then you will draw into fboID instead.

What might be the issue with my pipeline to rendering the depth buffer to a texture?

The main steps for depth testing from my understanding:
1) enable depth testing and how we want to depth test
2) create the frame buffer object and make sure it has a depth attached to it
3) bind our frame buffer object ( make sure to clear it before rendering )
4) draw stuff
And that should be it no? our frame buffer depth attachment should have depth data? But I always get straight 1's default depth clear color
step 1:
glEnable(GL_DEPTH_TEST);
glDepthFunc( GL_LEQUAL );
step 2:
//create the frame buffer object
glGenFramebuffers(1, &m_uifboHandle);
// Initialize FBO
glBindFramebuffer(GL_FRAMEBUFFER, m_uifboHandle);
//create 2 texture handles 1 for diffuse, 1 for depth
unsigned int m_uiTextureHandle[2];
glGenTextures( 2, m_uiTextureHandle );
//create the diffuse texture
glBindTexture( GL_TEXTURE_2D, m_uiTextureHandle[0]);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, uiWidth, uiHeight, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_uiTextureHandle[0], 0);
.
//create the depth buffer
glBindTexture(GL_TEXTURE_2D, m_uiTextureHandle[1]);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP );
glTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP );
glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT, uiWidth, uiHeight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_BYTE, 0);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, m_uiTextureHandle[1], 0);
//go back to default binding
glBindFramebuffer(GL_FRAMEBUFFER, 0);
step 3:
//bind the frame buffer object
glBindFramebuffer( GL_FRAMEBUFFER, m_uifboHandle );
//clear it
glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );
step 4:
//draw things
Are these not the steps?
Am i missing something?
I've tried a few different tutorials.
I can't get any depth to render to a texture
I keep getting straight 1's over and over.
The framebuffer probably is not complete. Try checking for completeness. Moreover your code was:
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, uiWidth, uiHeight, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
However, it should be (watch the RGB-RGBA):
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, uiWidth, uiHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);

glTexSubImage2d not working after adding in a particle system (with textures on each particle)

We were happily using glTexSubImage2d to update a texture every few frames which had been initialised with glTexImage2d in our GL initialisation. After adding in a particle system with each particle textured itself our quad showing the glTexSubImage2d texture doesn't display.
The particle's textures are PNG and so we use SDL to load the PNG to an SDL Surface and then glTexImage2d is used to bind the PNG to a texture.
If we change the quad's glTexSubImage2d call to a glTexImage2d call the texture shows but this is extremely inefficient and cuts the framerate in half at least and so would rather be using glTexSubImage2d (as it worked before).
Does anyone have any idea why we now can't use glTexSubImage2d?
Below is relevant pieces of code for the initialisation and binding of textures:
Loading in the particle texture
//Load smoke texture
SDL_Surface *surface;
SDL_Surface *alpha_image;
if( (surface = IMG_Load("smoke_particle.png")))
{
SDL_PixelFormat *pixf = SDL_GetVideoSurface()->format;
alpha_image = SDL_CreateRGBSurface( SDL_SWSURFACE, surface->w, surface->h, 32, pixf->Bmask, pixf->Gmask, pixf->Rmask, pixf->Amask );
SDL_SetAlpha(surface,0,0);
SDL_BlitSurface( surface, NULL, alpha_image, NULL );
glGenTextures( 1, &texture );
glBindTexture( GL_TEXTURE_2D, texture );
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR );
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA, surface->w, surface->h, 0,
GL_RGBA, GL_UNSIGNED_BYTE, surface->pixels );
}
Setting up the quad's texture:
glEnable(GL_TEXTURE_2D);
glGenTextures(1, &texVid);
glBindTexture(GL_TEXTURE_2D, texVid);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexImage2D(GL_TEXTURE_2D, 0, 3, VIDEO_WIDTH, VIDEO_HEIGHT, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
glPixelStorei(GL_UNPACK_ALIGNMENT, 2);
The only calls in initGL() are the enabling of GL_TEXTURE_2D, GL_BLEND setting up glBlendFunc() and the setting up of the quad's texture as above.
Any ideas?
Stupidly we had VIDEO_WIDTH set to the height of the texture and VIDEO_HEIGHT to the width of the texture.
Sorry if we wasted anyone's time.
Can anyone lock this or delete this or anything?
Thanks,
Infinitifizz

Depth render artifacts

I am depth only rendering scene to different frame buffer, the problem is a bit hard to explain but as you can see in the image the depth map it is actually suffering from grid like artifacts. Do you have any idea what can be the source of this ?
here is the code for fb creation:
self.shadowTexture = glGenTextures(1);
glBindTexture( GL_TEXTURE_2D, self.shadowTexture);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP );
glTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP );
glTexImage2D( GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT, w, h, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_BYTE, None);
glBindTexture( GL_TEXTURE_2D, 0);
self.fbo = glGenFramebuffers(1)
glBindFramebuffer(GL_FRAMEBUFFER_EXT, self.fbo)
glDrawBuffer(GL_NONE);
glReadBuffer(GL_NONE);
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT,GL_TEXTURE_2D, self.shadowTexture, 0);
Your problem most likely comes from the fact that your projection matrix has too much seperation between near and far planes (meaning close together pixels are suffering floating point accuracy issues). Shrink the gap between the 2 planes and that should solve your Z-fighting issues.