NVidia driver 320.86 texture buffer bug leads to crash - c++

The following code crashes really quickly ( way before the max buffer size of 2^27 texels )
I stripped every useless line of code, to make it easier to read.
const int MAX_LAYER_DEPTH = 5;
#include "vapp.h"
#include "vmath.h"
#include <stdio.h>
BEGIN_APP_DECLARATION(OITDemo)
// Override functions from base class
virtual void Initialize(const char * title);
virtual void Display(bool auto_redraw);
virtual void Finalize(void);
virtual void Reshape(int width, int height);
GLuint linked_list_buffer;
GLuint linked_list_texture;
GLint current_width;
GLint current_height;
END_APP_DECLARATION()
DEFINE_APP(OITDemo, "Order Independent Transparency")
void OITDemo::Initialize(const char * title)
{
base::Initialize(title);
glGenBuffers(1, &linked_list_buffer);
glGenTextures(1, &linked_list_texture);
Reshape(100,100);
return;
}
void OITDemo::Display(bool auto_redraw)
{
glClearColor(1.0f, 1.0f, 1.0f, 0.0f);
glBindImageTexture(1, linked_list_texture, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA32UI);
glClear(GL_DEPTH_BUFFER_BIT|GL_COLOR_BUFFER_BIT);
base::Display();
return;
}
void OITDemo::Reshape(int width, int height)
{
current_width = width;
current_height = height;
glBindImageTexture(1, 0, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA32UI);
static GLuint texBufferSize = 2047;
++texBufferSize;
printf("%d : texBufferSize\n",texBufferSize);
glBindBuffer(GL_TEXTURE_BUFFER, linked_list_buffer);
glBufferData(GL_TEXTURE_BUFFER, texBufferSize * texBufferSize * MAX_LAYER_DEPTH * sizeof(vmath::vec4), NULL, GL_DYNAMIC_DRAW);
glBindBuffer(GL_TEXTURE_BUFFER, 0);
// Bind it to a texture (for use as a TBO)
glBindTexture(GL_TEXTURE_BUFFER, linked_list_texture);
glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32UI, linked_list_buffer);
glBindTexture(GL_TEXTURE_BUFFER, 0);
glViewport(0, 0, current_width, current_height);
return;
}
void OITDemo::Finalize(void)
{
glDeleteTextures(1, &linked_list_texture);
glDeleteBuffers(1, &linked_list_buffer);
}
The driver most probably can't handle fragmentation
It crashes between reallocation of 21694445 ( 2083 x 2083 x 5 ) and 23587920 elements. The maximum buffer size ( number of texels ) returned by the graphic card is 2^27 ( 134 millions texels )
It seems to work better if we allocate one big buffer at the start of the application and never change it.
But fails miserably if we try to reallocate it during the life of the application.
Originally the code binds the image texture then trace using a shader that uses that image texture with imageStore but I discovered that I don't need any shader to make the driver crash.
Any clue to predict/prevent the driver crash?

Related

c++ cuda opengl not rendering vbo

I am trying to draw a bunch of points on the screen. I'm using CUDA to generate the data (position and color), and OpenGL to draw it. I am trying to get CUDA to update a VBO and then OpenGL to draw it, but I get a blank screen. I am not sure if CUDA is not able to update the buffer, or that the buffer is not drawing properly. My GPU is a GTX 1080, and I'm trying to use OpenGL 4.0. Colors are specified by CUDA as well. If my problem is that I need a shader, how do I add that, but also still specify the color through CUDA?
UPDATE: problem seems to be openGL. Updated code to use triangle So new question to add. Why is my VBO not being rendered?
Here is the code:
GPUmain.cuh:
#include <cuda_runtime.h>
#include "device_launch_parameters.h"
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/remove.h>
#include <curand.h>
#include <GL/glew.h>
#include <SDL_opengl.h>
#include <cuda_gl_interop.h>
#define BUFFER_OFFSET(i) ((char *)NULL + (i))
//ver: x, y, z, r, g, b, a
struct ver {
// x, y, z pos
GLuint x, y, z;
// r, g, b, a color
GLubyte r, g, b, a;
};
class GPU {
public:
static int nParticles;
static GLuint vboid;
static cudaGraphicsResource *CGR;
//collection of vertices to be simulated and rendered
static thrust::device_vector<ver> rverts;
static void init(int w, int h);
static void compute();
static void render();
static void GPUmain();
static void free();
};
GPUmain.cu:
#include "GPUmain.cuh"
__global__ void uploadVerts(ver *vv, ver *vb) {
int id = threadIdx.x + (blockDim.x * blockIdx.x);
vb[id] = vv[id];
vb[id].x = vv[id].x;
vb[id].y = vv[id].y;
vb[id].z = vv[id].z;
vb[id].r = vv[id].r;
vb[id].g = vv[id].g;
vb[id].b = vv[id].b;
vb[id].a = vv[id].a;
}
__global__ void genGrid(ver *v) {
int i = threadIdx.x + (blockDim.x * blockIdx.x);
float x = (float)(i % ((int)1080));
float y = (float)(i / ((int)1920));
v[i].x = x;
v[i].y = y;
v[i].z = 1;
v[i].r = 255;
v[i].g = 0;
v[i].b = 0;
v[i].a = 0;
}
int GPU::nParticles;
GLuint GPU::vboid;
cudaGraphicsResource *GPU::CGR;
//collection of vertices to be simulated and rendered
thrust::device_vector<ver> GPU::rverts;
void GPU::init(int w, int h)
{
nParticles = w * h;
/*rverts.resize(nParticles, ver{0,0,0,0,0,0,0});
genGrid<<<nParticles/1024,1024>>>(thrust::raw_pointer_cast(&rverts[0]));*/
ver e[3] = {
ver{1024,200,2,255,0,0,255},
ver{499,288,173,0,255,0,255},
ver{462,1674,8,0,0,255,255}
};
glGenBuffers(1,&vboid);
glBindBuffer(GL_ARRAY_BUFFER,vboid);
glBufferData(GL_ARRAY_BUFFER,3*sizeof(ver),e,GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
/*cudaGraphicsGLRegisterBuffer(&CGR,vboid,cudaGraphicsMapFlagsWriteDiscard);*/
}
void GPU::compute()
{
}
void GPU::render()
{
/*ver *verts;
size_t size;
cudaGraphicsMapResources(1, &CGR, 0);
cudaGraphicsResourceGetMappedPointer((void**)&verts, &size, CGR);
uploadVerts<<<nParticles/1024, 1024>>>(thrust::raw_pointer_cast(&rverts[0]), verts);
cudaGraphicsUnmapResources(1, &CGR, 0);
cudaDeviceSynchronize();*/
glClearColor(0, 0, 0, 0); // we clear the screen with black (else, frames would overlay...)
glClear(GL_COLOR_BUFFER_BIT); // clear the buffer
glBindBuffer(GL_ARRAY_BUFFER, vboid);
glEnableClientState(GL_VERTEX_ARRAY);
glEnableClientState(GL_COLOR_ARRAY);
glVertexPointer(3, GL_INT, 4 * sizeof(GLubyte), 0);
glColorPointer(4, GL_BYTE, 3 * sizeof(GLuint), BUFFER_OFFSET(3 * sizeof(GLuint)));
glDrawArrays(GL_TRIANGLES, 0, 3);
glDisableClientState(GL_VERTEX_ARRAY);
glDisableClientState(GL_COLOR_ARRAY);
glBindBuffer(GL_ARRAY_BUFFER, 0);
}
void GPU::GPUmain()
{
compute();
render();
}
void GPU::free()
{
cudaGraphicsUnregisterResource(CGR);
glBindBuffer(GL_ARRAY_BUFFER, vboid);
glDeleteBuffers(1, &vboid);
glBindBuffer(GL_ARRAY_BUFFER, 0);
rverts.clear();
thrust::device_vector<ver>().swap(rverts);
}
The relevant (that contain OpenGL code) parts of window.cpp:
bool Window::init()
{
//initialize SDL
if (SDL_Init(SDL_INIT_EVERYTHING) != 0) {
log << "Failed to initialize SDL!\n";
return false;
}
//set window atributes
SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 0);
SDL_GL_SetAttribute(SDL_GL_STENCIL_SIZE, 8);
SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);
//creat window
window = SDL_CreateWindow(
name.c_str(),
SDL_WINDOWPOS_CENTERED,
SDL_WINDOWPOS_CENTERED,
width,
height,
SDL_WINDOW_OPENGL
);
//create opengl context in the window
glcontext = SDL_GL_CreateContext(window);
SDL_GL_SetSwapInterval(1);
//check if the window was created
if (window == nullptr) {
log << "Failed to create window!\n";
return false;
}
//turn on experimental features
glewExperimental = GL_TRUE;
//initiallize glew
if (glewInit() != GLEW_OK) {
log << "Failed to Init GLEW";
return false;
}
//set drawing parameters
glViewport(0, 0, width, height);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glOrtho(0, width, 0, height, 0, 255);
glPointSize(1);
glEnable(GL_BLEND); // Allow Transparency
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); // how transparency acts
std::cout << sizeof(ver);
GPU::init(width, height);
return true;
}
void Window::renderFrame()
{
GPU::render();
SDL_GL_SwapWindow(window); //swap buffers
}
If you use the fixed-function attributes and client side capabilities, then you've to use a compatibility profile context.
See Fixed Function Pipeline and Legacy OpenGL.
If you want to use a core profile, then you've to use Vertex Array Object and Shader:
SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_COMPATIBILITY);
The following geometry
ver e[3] = {
// x y z r g b a
ver{1024, 200, 2, 255, 0, 0, 255},
ver{ 499, 288, 173, 0, 255, 0, 255},
ver{462, 1674, 8, 0, 0, 255, 255}
};
is clipped by the near plane of the orthographic projection. Note, in view space the z-axis points out of the viewport.
Change the orthographic projection (or invert the z coordinates of the geometry):
glOrtho(0, width, 0, height, 0, 255);
glOrtho(0, width, 0, height, -255, 0);
The stride parameter of glVertexPointer respectively glColorPointer is the offset between consecutive attributes. So it has to be sizeof(ver).
The type of the color attributes is GL_UNSIGNED_BYTE rather than GL_BYTE:
glVertexPointer(3, GL_INT, 4 * sizeof(GLubyte), 0);
glColorPointer(4, GL_BYTE, 3 * sizeof(GLuint), BUFFER_OFFSET(3 * sizeof(GLuint)));
glVertexPointer(3, GL_INT, sizeof(ver), 0);
glColorPointer(4, GL_UNSIGNED_BYTE, sizeof(ver), BUFFER_OFFSET(3 * sizeof(GLuint)));

Fast way to rasterize a grid of points/pixels

I want to fill the screen with a grid of points. My desired performance would be about the same speed as drawing that many pixels as a contiguous quad (or equivalent triangle clipped with glViewport). Using GL_POINT primitives (positioned via gl_VertexID, not attribs) or glPolygonStipple are possibilities, but are still a little slower. Here's an example of what I want (though the black points drawn may be yet more sparse):
Are there any other methods to draw this grid? (in a similar time to a smaller quad of the same number of pixels)
Wouldn't it be great if the rasterizer was programmable!
The main point of this is to be able to write to both stencil and colour buffers in this grid pattern from a fragment shader.
EDIT
Some rendering times:
Full screen for me is 1680x1050, GTX670. Times are calculated drawing 10,000 times each frame, no depth test. I draw a quad with a big triangle and clip using glViewport.
Rendering a full screen quad and calling discard for coord%4>0: 0.112ms
Rendering a full screen quad, assigning const colour: 0.059ms
Rendering with glPolygonStipple creating %4 pattern: 0.009ms
Rendering quarter full screen quad: 0.003ms
Rendering a 1x1 quad: 0.002ms (binding VBO and shader, could prob be optimized)
The differences get larger with a more sparse grid, for example %16.
EDIT
OK, I've thrown together a small example. Requires glut and glew libraries:
#include <GL/glew.h>
#include <GL/gl.h>
#include <GL/glut.h>
#include <memory.h>
#include <assert.h>
#include <stdio.h>
#define RESOLUTION_X 1680
#define RESOLUTION_Y 1050
#define USE_32_BIT 0
#define TEST_LOOP 1000 //number of quads to draw per frame
#define WARMUP_MS 1000 //time between switching methods
#define TEST_MS 4000 //time to benchmark for
#define TESTS 6
#define DRAW_GRAPH 1
#define SCALE_MS 0.2f //for drawing the graph
GLuint fbo, colourTex, vbo, shader, shaderPoints, shaderDiscard;
int viewport[2];
int test = 0;
int results_time[TESTS];
int results_frames[TESTS];
float colours[TESTS][3] = {
{1,0,0},
{1,1,0},
{1,0,1},
{0,1,0},
{0,1,1},
{0,0,1},
};
const char* names[TESTS] = {
"full",
"full discard",
"full stipple",
"draw points",
"quarter",
"one"
};
float triangleVerts[9] = {-1,-1,0,-1,4,0,4,-1,0};
const char* vertexShaderSrc = "#version 150\nin vec4 v;\nvoid main() {gl_Position = v;}\n";
const char* vertexShaderPointsSrc = "#version 150\nuniform ivec2 s;\nvoid main() {ivec2 p = ivec2(gl_VertexID%(s.x/4),gl_VertexID/(s.x/4)); gl_Position = vec4(2.0*(p*4+0.5)/s-1.0, 0, 1);}\n";
const char* fragmentShaderSrc = "#version 150\nout vec4 c;\nvoid main() {c = vec4(1,0,0,1);}\n";
const char* fragmentShaderDiscardSrc = "#version 150\nout vec4 c;\nvoid main() {if (int(gl_FragCoord.x)%4>0||int(gl_FragCoord.y)%4>0) discard; c = vec4(1,0,0,1);}\n";
void setupDraw(GLuint program, int x, int y)
{
glUseProgram(program);
glViewport(0, 0, x, y);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
GLuint loc = glGetAttribLocation(program, "v");
glEnableVertexAttribArray(loc);
glVertexAttribPointer(loc, 3, GL_FLOAT, GL_FALSE, 0, 0);
}
void polygonStippleGrid(int x, int y)
{
unsigned char tilePattern[32*32];
memset(tilePattern, 0, sizeof(tilePattern));
for (int j = 0; j < 32; j += y)
{
for (int i = 0; i < 32; i += x)
{
int index = (j * 32 + i);
tilePattern[index / 8] |= 1 << (index % 8);
}
}
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
glPolygonStipple(tilePattern);
}
void display()
{
static int lastTime = -1;
int elapsed = glutGet(GLUT_ELAPSED_TIME);
if (lastTime == -1) lastTime = elapsed;
int dt = elapsed - lastTime;
lastTime = elapsed;
static int warmup = WARMUP_MS + 2000;
static int running = TEST_MS;
warmup -= dt;
if (warmup <= 0 && test < TESTS)
{
running -= dt;
results_time[test] += dt;
results_frames[test] += 1;
if (running <= 0)
{
printf("%s %s %.6fms\n", names[test], USE_32_BIT?"rgba32":"rgba8", results_time[test]/(float)(results_frames[test] * TEST_LOOP));
test += 1;
warmup = WARMUP_MS;
running = TEST_MS;
}
}
#if DRAW_GRAPH
glBindFramebuffer(GL_FRAMEBUFFER, 0);
glViewport(0, 0, viewport[0], viewport[1]);
glClear(GL_COLOR_BUFFER_BIT);
float s = 2.0f / TESTS;
glBegin(GL_QUADS);
for (int i = 0; i < TESTS; ++i)
{
if (!results_frames[i]) continue;
glColor3fv(colours[i]);
float x = -1.0f + 2.0f * i / (float)TESTS;
float y = -1.0f + 2.0f * (results_time[i]/(float)(results_frames[i] * TEST_LOOP)) / SCALE_MS;
glVertex2f(x, -1.0f); glVertex2f(x, y); glVertex2f(x + s, y); glVertex2f(x + s, -1.0f);
}
glEnd();
#endif
glBindFramebuffer(GL_FRAMEBUFFER, fbo);
switch (test)
{
case 0: //straight full screen quad
setupDraw(shader, RESOLUTION_X, RESOLUTION_Y);
for (int i = 0; i < TEST_LOOP; ++i)
glDrawArrays(GL_TRIANGLES, 0, 3);
break;
case 1: //full screen quad, discarding pixels in the frag shader
setupDraw(shaderDiscard, RESOLUTION_X, RESOLUTION_Y);
for (int i = 0; i < TEST_LOOP; ++i)
glDrawArrays(GL_TRIANGLES, 0, 3);
break;
case 2: //using polygon stipple to mask out fragments
polygonStippleGrid(4, 4);
glEnable(GL_POLYGON_STIPPLE);
setupDraw(shader, RESOLUTION_X, RESOLUTION_Y);
for (int i = 0; i < TEST_LOOP; ++i)
glDrawArrays(GL_TRIANGLES, 0, 3);
glDisable(GL_POLYGON_STIPPLE);
break;
case 3: //drawing points, but computing the position in the vertex shader
glUseProgram(shaderPoints);
glUniform2i(glGetUniformLocation(shaderPoints, "s"), RESOLUTION_X, RESOLUTION_Y);
for (int i = 0; i < TEST_LOOP; ++i)
glDrawArrays(GL_POINTS, 0, (RESOLUTION_X/4)*(RESOLUTION_Y/4));
break;
case 4: //a quad one quarter of the screen (as a speed comparison)
setupDraw(shader, RESOLUTION_X / 4, RESOLUTION_Y / 4);
for (int i = 0; i < TEST_LOOP; ++i)
glDrawArrays(GL_TRIANGLES, 0, 3);
break;
case 5: //a 1x1 quad (as a speed comparison)
setupDraw(shader,1, 1);
for (int i = 0; i < TEST_LOOP; ++i)
glDrawArrays(GL_TRIANGLES, 0, 3);
break;
default: break;
}
glUseProgram(0);
glDisableVertexAttribArray(0); //HACK: assumes location is always zero
//printf("%i %i %i\n", test, warmup, running);
glFinish();
glutSwapBuffers();
glutPostRedisplay();
assert(glGetError() == GL_NO_ERROR);
}
void reshape(int x, int y)
{
viewport[0] = x;
viewport[1] = y;
}
int main(int argc, char **argv)
{
memset(results_time, 0, sizeof(results_time));
memset(results_frames, 0, sizeof(results_frames));
//init glut
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGBA);
glutCreateWindow("quadtest");
glutReshapeFunc(reshape);
glutDisplayFunc(display);
glewInit();
//init gl stuff
glGenTextures(1, &colourTex);
glBindTexture(GL_TEXTURE_2D, colourTex);
#if USE_32_BIT
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, RESOLUTION_X, RESOLUTION_Y, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
#else
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, RESOLUTION_X, RESOLUTION_Y, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
#endif
/*
GLuint stencilRB;
glGenRenderbuffers(1, &stencilRB);
glBindRenderbuffer(GL_RENDERBUFFER, stencilRB);
glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_STENCIL, RESOLUTION_X, RESOLUTION_Y);
*/
glGenFramebuffers(1, &fbo);
glBindFramebuffer(GL_FRAMEBUFFER, fbo);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, colourTex, 0);
//glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_RENDERBUFFER, stencilRB);
assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
glGenBuffers(1, &vbo);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBufferData(GL_ARRAY_BUFFER, sizeof(triangleVerts), triangleVerts, GL_STATIC_DRAW);
GLuint v = glCreateShader(GL_VERTEX_SHADER);
GLuint vp = glCreateShader(GL_VERTEX_SHADER);
GLuint f = glCreateShader(GL_FRAGMENT_SHADER);
GLuint fd = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(v, 1, &vertexShaderSrc, NULL);
glShaderSource(vp, 1, &vertexShaderPointsSrc, NULL);
glShaderSource(f, 1, &fragmentShaderSrc, NULL);
glShaderSource(fd, 1, &fragmentShaderDiscardSrc, NULL);
GLint ok = GL_TRUE;
shader = glCreateProgram();
glAttachShader(shader, v);
glAttachShader(shader, f);
glLinkProgram(shader);
glGetProgramiv(shader, GL_LINK_STATUS, &ok);
assert(ok == GL_TRUE);
/*
char log[512];
int n;
glGetShaderInfoLog(v, 512, &n, log);
printf("%s\n", log);
glGetProgramInfoLog(shader, 512, &n, log);
printf("%s\n", log);
*/
shaderPoints = glCreateProgram();
glAttachShader(shaderPoints, vp);
glAttachShader(shaderPoints, f);
glLinkProgram(shaderPoints);
glGetProgramiv(shaderPoints, GL_LINK_STATUS, &ok);
assert(ok == GL_TRUE);
shaderDiscard = glCreateProgram();
glAttachShader(shaderDiscard, v);
glAttachShader(shaderDiscard, fd);
glLinkProgram(shaderDiscard);
glGetProgramiv(shaderDiscard, GL_LINK_STATUS, &ok);
assert(ok == GL_TRUE);
glDisable(GL_DEPTH_TEST);
assert(glGetError() == GL_NO_ERROR);
glutMainLoop();
return 0;
}
Interestingly, using GL_RGBA32F 32 bit colour impacts performance a fair bit, also bringing back the overhead of the discard method to approximately the same as a full screen quad. The glPolygonStipple method gives dramatic improvements in this case, more so than with 8 bit. There is a discrepancy with the previous glPolygonStipple result too, I can reproduce both and haven't narrowed down the difference yet.
output for GL_RGBA:
full rgba8 0.059ms
full discard rgba8 0.112ms
full stipple rgba8 0.050ms
draw points rgba8 0.079ms
quarter rgba8 0.004ms
one rgba8 <0.001ms
output for GL_RGBA32F:
full rgba32 0.240ms
full discard rgba32 0.241ms
full stipple rgba32 0.101ms
draw points rgba32 0.091ms
quarter rgba32 0.015ms
one rgba32 <0.001ms
Drawing points and positioning from gl_VertexID will beat glPolygonStipple for GL_RGBA32F. I'd assume this trend would carry on for more expensive shaders (or at least memory-intensive).
Are there any other methods to draw this grid?
Exactly this grid? Well in that case your grid has a periodicity of 4 and an offset of -1 in x and -2 in y direction. So the fragment shader to produce it (discarding the "black" pixels) would be
void main()
{
if( ((gl_FragPosition.x-1) % 4) == 0 && ((gl_FragPosition.y-2) % 4) == 0 )
discard;
gl_FragColor = vec4(1,1,1,1);
}
Setting the stencil op to always replace the stencil value, will set the stencil buffer to your ref value everywhere, where no pixels are discarded.
If you can't express your grid by some kind of formula, well, use a texture instead.
The scattered memory writes of a sparse grid may simply mean more overhead that can't be avoided.
Draw GL_POINTs
Use glPolygonStipple
Initialize the stencil buffer with the pattern for a masking a full screen quad
What ever you do do not use the discard method if the fragment shader is expensive[1]. This is really stupid because you clog the pipeline with many threads which don't do anything.
[1] Either takes a long time to execute or uses lots of registers or local memory

Using a VBO to draw lines from a vector of points in OpenGL

I have a simple OpenGL program which I am trying to utilize Vertex Buffer Objects for rendering instead of the old glBegin() - glEnd(). Basically the user clicks on the window indicating a starting point, and then presses a key to generate subsequent points which OpenGL draws as a line.
I've implemented this using glBegin() and glEnd() but have not been successful using a VBO. I am wondering if the problem is that after I initialize the VBO, I'm adding more vertices which it doesn't have memory allocated for, and thus doesn't display them.
Edit: Also, I'm a bit confused as to how it knows exactly which values in the vertex struct to use for x and y, as well as for r, g, b. I haven't been able to find a clear example of this.
#include <windows.h>
#include <stdio.h>
#include <stdlib.h>
#include <Math.h>
#include <iostream>
#include <vector>
#include <GL/glew.h>
#include <GL/glut.h>
struct vertex {
float x, y, u, v, r, g, b;
};
const int D = 10; // distance
const int A = 10; // angle
const int WINDOW_WIDTH = 500, WINDOW_HEIGHT = 500;
std::vector<vertex> vertices;
boolean start = false;
GLuint vboId;
void update_line_point() {
vertex temp;
temp.x = vertices.back().x + D * vertices.back().u;
temp.y = vertices.back().y + D * vertices.back().v;
temp.u = vertices.back().u;
temp.v = vertices.back().v;
vertices.push_back(temp);
}
void update_line_angle() {
float u_prime, v_prime;
u_prime = vertices.back().u * cos(A) - vertices.back().v * sin(A);
v_prime = vertices.back().u * sin(A) + vertices.back().v * cos(A);
vertices.back().u = u_prime;
vertices.back().v = v_prime;
}
void initVertexBuffer() {
glGenBuffers(1, &vboId);
glBindBuffer(GL_ARRAY_BUFFER, vboId);
glBufferData(GL_ARRAY_BUFFER, sizeof(vertex) * vertices.size(), &vertices[0], GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
}
void displayCB() {
glClear(GL_COLOR_BUFFER_BIT);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
gluOrtho2D(0, WINDOW_WIDTH, 0, WINDOW_HEIGHT);
if (start) {
glBindBuffer(GL_ARRAY_BUFFER, vboId);
glEnableClientState(GL_VERTEX_ARRAY);
glEnableClientState(GL_COLOR_ARRAY);
glVertexPointer(2, GL_FLOAT, sizeof(vertex), &vertices[0]);
glColorPointer(3, GL_FLOAT, sizeof(vertex), &vertices[0]);
glDrawArrays(GL_LINE_STRIP, 0, vertices.size());
glDisableClientState(GL_VERTEX_ARRAY);
glDisableClientState(GL_COLOR_ARRAY);
glBindBuffer(GL_ARRAY_BUFFER, 0);
}
/***** this is what I'm trying to achieve
glColor3f(1, 0, 0);
glBegin(GL_LINE_STRIP);
for (std::vector<vertex>::size_type i = 0; i < vertices.size(); i++) {
glVertex2f(vertices[i].x, vertices[i].y);
}
glEnd();
*****/
glFlush();
glutSwapBuffers();
}
void mouseCB(int button, int state, int x, int y) {
if (state == GLUT_DOWN) {
vertices.clear();
vertex temp = {x, WINDOW_HEIGHT - y, 1, 0, 1, 0, 0}; // default red color
vertices.push_back(temp);
start = true;
initVertexBuffer();
}
glutPostRedisplay();
}
void keyboardCB(unsigned char key, int x, int y) {
switch(key) {
case 'f':
if (start) {
update_line_point();
}
break;
case 't':
if (start) {
update_line_angle();
}
break;
}
glutPostRedisplay();
}
void initCallbackFunc() {
glutDisplayFunc(displayCB);
glutMouseFunc(mouseCB);
glutKeyboardFunc(keyboardCB);
}
int main(int argc, char** argv) {
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_RGB|GLUT_DOUBLE|GLUT_DEPTH);
glutInitWindowSize(WINDOW_WIDTH, WINDOW_HEIGHT);
glutInitWindowPosition(100, 100);
glutCreateWindow("Test");
initCallbackFunc();
// initialize glew
GLenum glewInitResult;
glewExperimental = GL_TRUE;
glewInitResult = glewInit();
if (GLEW_OK != glewInitResult) {
std::cerr << "Error initializing glew." << std::endl;
return 1;
}
glClearColor(1, 1, 1, 0);
glutMainLoop();
return 0;
}
If you have a VBO bound then the pointer argument to the gl*Pointer() calls is interpreted as a byte offset from the beginning of the VBO, not an actual pointer. Your usage is consistent with vertex array usage though.
So for your vertex struct x starts at byte zero and r starts at byte sizeof(float) * 4.
Also, your mouse callback reset your vertex vector on every call so you would never be able have more than one vertex in it at any given time. It also leaked VBO names via the glGenBuffers() in initVertexBuffer().
Give this a shot:
#include <GL/glew.h>
#include <GL/glut.h>
#include <iostream>
#include <vector>
struct vertex
{
float x, y;
float u, v;
float r, g, b;
};
GLuint vboId;
std::vector<vertex> vertices;
void mouseCB(int button, int state, int x, int y)
{
y = glutGet( GLUT_WINDOW_HEIGHT ) - y;
if (state == GLUT_DOWN)
{
vertex temp = {x, y, 1, 0, 1, 0, 0}; // default red color
vertices.push_back(temp);
glBindBuffer(GL_ARRAY_BUFFER, vboId);
glBufferData(GL_ARRAY_BUFFER, sizeof(vertex) * vertices.size(), &vertices[0], GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
}
glutPostRedisplay();
}
void displayCB()
{
glClearColor(1, 1, 1, 0);
glClear(GL_COLOR_BUFFER_BIT);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
double w = glutGet( GLUT_WINDOW_WIDTH );
double h = glutGet( GLUT_WINDOW_HEIGHT );
glOrtho( 0, w, 0, h, -1, 1 );
glMatrixMode( GL_MODELVIEW );
glLoadIdentity();
if ( vertices.size() > 1 )
{
glBindBuffer(GL_ARRAY_BUFFER, vboId);
glEnableClientState(GL_VERTEX_ARRAY);
glEnableClientState(GL_COLOR_ARRAY);
glVertexPointer(2, GL_FLOAT, sizeof(vertex), (void*)(sizeof( float ) * 0));
glColorPointer(3, GL_FLOAT, sizeof(vertex), (void*)(sizeof( float ) * 4));
glDrawArrays(GL_LINE_STRIP, 0, vertices.size());
glDisableClientState(GL_VERTEX_ARRAY);
glDisableClientState(GL_COLOR_ARRAY);
glBindBuffer(GL_ARRAY_BUFFER, 0);
}
glutSwapBuffers();
}
int main(int argc, char** argv)
{
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_RGB|GLUT_DOUBLE|GLUT_DEPTH);
glutInitWindowSize(500, 500);
glutInitWindowPosition(100, 100);
glutCreateWindow("Test");
// initialize glew
glewExperimental = GL_TRUE;
GLenum glewInitResult = glewInit();
if (GLEW_OK != glewInitResult) {
std::cerr << "Error initializing glew." << std::endl;
return 1;
}
glGenBuffers(1, &vboId);
glutDisplayFunc(displayCB);
glutMouseFunc(mouseCB);
glutMainLoop();
return 0;
}
A VBO is a buffer located somewhere in memory (almost always in dedicated GPU memory - VRAM) of a fixed size. You specify this size in glBufferData, and you also simultaneously give the GL a pointer to copy from. The key word here is copy. Everything you do to the vector after glBufferData isn't reflected in the VBO.
You should be binding and doing another glBufferData call after changing the vector. You will also probably get better performance from glBufferSubData or glMapBuffer if the VBO is already large enough to handle the new data, but in a small application like this the performance hit of calling glBufferData every time is basically non-existent.
Also, to address your other question about the values you need to pick out x, y, etc. The way your VBO is set up is that the values are interleaved. so in memory, your vertices will look like this:
+-------------------------------------------------
| x | y | u | v | r | g | b | x | y | u | v | ...
+-------------------------------------------------
You tell OpenGL where your vertices and colors are with the glVertexPointer and glColorPointer functions respectively.
The size parameter specifies how many elements there are for each vertex. In this case, it's 2 for vertices, and 3 for colors.
The type parameter specifies what type each element is. In your case it's GL_FLOAT for both.
The stride parameter is how many bytes you need to skip from the start of one vertex to the start of the next. With an interleaved setup like yours, this is simply sizeof(vertex) for both.
The last parameter, pointer, isn't actually a pointer to your vector in this case. When a VBO is bound, pointer becomes a byte offset into the VBO. For vertices, this should be 0, since the first vertex starts at the very first byte of the VBO. For colors, this should be 4 * sizeof(float), since the first color is preceded by 4 floats.

OpenGL repeated calls to glTexImage2D and alpha blending

This is more out of curiosity than for any practical purpose: is there anything in the OpenGL specification that suggests that calling glTexImage2D many times (e.g., once per frame) is illegal? I mean illegal as in 'it could produce wrong results', not just inefficient (suppose I don't care about the performance impact of not using glTexSubImage2D instead).
The reason I'm asking is that I noticed some very odd artifacts when drawing overlapping, texture-mapped primitives that use a partly-transparent texture which is loaded once per every frame using glTexImage2D (see the attached picture): after a few seconds (i.e., a few hundred frames), small rectangular black patches appear on the screen (they're actually flipping between black and normal between consecutive frames).
I'm attaching below the simplest example code I could write that exhibits the problem.
#include <stdio.h>
#ifndef __APPLE__
# include <SDL/SDL.h>
# include <SDL/SDL_opengl.h>
#else
# include <SDL.h>
# include <SDL_opengl.h>
#endif
/* some constants and variables that several functions use */
const int width = 640;
const int height = 480;
#define texSize 64
GLuint vbo;
GLuint tex;
/* forward declaration, creates a random texture; uses glTexSubImage2D if
update is non-zero (otherwise glTexImage2D) */
void createTexture(GLuint label, int update);
int init()
{
/* SDL initialization */
if (SDL_Init(SDL_INIT_VIDEO) < 0)
return 0;
SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);
if (!SDL_SetVideoMode(width, height, 0, SDL_OPENGL)) {
fprintf(stderr, "Couldn't initialize OpenGL");
return 0;
}
/* OpenGL initialization */
glClearColor(0, 0, 0, 0);
glEnable(GL_TEXTURE_2D);
glEnable(GL_BLEND);
glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
glViewport(0, 0, width, height);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glOrtho(0, width, height, 0, -1, 1);
glMatrixMode(GL_MODELVIEW);
/* creating the VBO and the textures */
glGenBuffers(1, &vbo);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBufferData(GL_ARRAY_BUFFER, 1024, 0, GL_DYNAMIC_DRAW);
glGenTextures(1, &tex);
createTexture(tex, 0);
glEnableClientState(GL_VERTEX_ARRAY);
glEnableClientState(GL_TEXTURE_COORD_ARRAY);
return 1;
}
/* draw a triangle at the specified point */
void drawTriangle(GLfloat x, GLfloat y)
{
GLfloat coords1[12] = {0, 0, 0, 0, /**/200, 0, 1, 0, /**/200, 150, 1, 1};
glLoadIdentity();
glTranslatef(x, y, 0);
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(coords1), coords1);
glVertexPointer(2, GL_FLOAT, 4*sizeof(GLfloat), (void*)0);
glTexCoordPointer(2, GL_FLOAT, 4*sizeof(GLfloat),
(char*)0 + 2*sizeof(GLfloat));
glDrawArrays(GL_TRIANGLES, 0, 3);
}
void render()
{
glClear(GL_COLOR_BUFFER_BIT);
drawTriangle(250, 50);
createTexture(tex, 0);
drawTriangle(260, 120);
SDL_GL_SwapBuffers();
}
void cleanup()
{
glDeleteTextures(1, &tex);
glDeleteBuffers(1, &vbo);
SDL_Quit();
}
int main(int argc, char* argv[])
{
SDL_Event event;
if (!init()) return 1;
while (1) {
while (SDL_PollEvent(&event))
if (event.type == SDL_QUIT)
return 0;
render();
}
cleanup();
return 0;
}
void createTexture(GLuint label, int update)
{
GLubyte data[texSize*texSize*4];
GLubyte* p;
int i, j;
glBindTexture(GL_TEXTURE_2D, label);
for (i = 0; i < texSize; ++i) {
for (j = 0; j < texSize; ++j) {
p = data + (i + j*texSize)*4;
p[0] = ((i % 8) > 4?255:0);
p[1] = ((j % 8) > 4?255:0);
p[2] = ((i % 8) > 4?255:0);
p[3] = 255 - i*3;
}
}
if (!update)
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, texSize, texSize, 0, GL_RGBA,
GL_UNSIGNED_BYTE, data);
else
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, texSize, texSize, GL_RGBA,
GL_UNSIGNED_BYTE, data);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
}
Notes:
I'm using SDL, but I've seen the same happening in wxWidgets, so it's not an SDL-related problem.
If I use glTexSubImage2D instead for every frame (use update = 1 in createTexture), the artifacts disappear.
If I disable blending, there are no more artifacts.
I've been testing this on a late 2010 MacBook Air, though I doubt that's particularly relevant.
This clearly an OpenGL implementation bug (just calling glTexImage2D in a loop should not cause this to happen).

Code Assist, OpenGL VAO/VBO Classes not drawing

Edit II:
Current Code works great! Thanks everyone. I went ahead and included my shader code for reference at the bottom though they do absolutely nothing at this point really.
I am trying to get up and going with OpenGL 4.1 and am still very early in development. Currently I'm not even really using 4.0 features yet in this project, so this is just as much an OpenGL 3 question as well.
The goal I was working on first was simply working out two classes to handle VAOs and VBOs. I had some misconceptions but finally got past the blank screen.
/* THIS CODE IS NOW FULLY FUNCTIONAL */
/* well, fully is questionable lol, should work out of the box with glew and glfw */
/* A simple function that will read a file into an allocated char pointer buffer */
/* Borrowed from OpenGL.org tutorial */
char* filePull(char *file)
{
FILE *fptr;
long length;
char *buf;
fptr = fopen(file, "r"); /* Open file for reading */
if (!fptr) /* Return NULL on failure */
return NULL;
fseek(fptr, 0, SEEK_END); /* Seek to the end of the file */
length = ftell(fptr); /* Find out how many bytes into the file we are */
buf = (char*)malloc(length+1); /* Allocate a buffer for the entire length of the file and a null terminator */
fseek(fptr, 0, SEEK_SET); /* Go back to the beginning of the file */
fread(buf, length, 1, fptr); /* Read the contents of the file in to the buffer */
fclose(fptr); /* Close the file */
buf[length] = 0; /* Null terminator */
return buf; /* Return the buffer */
}
class VBO
{
public:
GLuint buffer;
bool isBound;
vector<void*> belongTo;
vector<GLfloat> vertex;
GLenum usage;
void Load()
{ glBufferData(GL_ARRAY_BUFFER, vertex.size()*sizeof(GLfloat), &vertex[0], usage); }
void Create(void* parent)
{
glGenBuffers(1, &buffer);
glBindBuffer(GL_ARRAY_BUFFER, buffer);
glBufferData(GL_ARRAY_BUFFER, vertex.size()*sizeof(GLfloat), &vertex[0], usage);
isBound=true;
belongTo.push_back(parent);
}
void Activate()
{
if(!isBound) glBindBuffer(GL_ARRAY_BUFFER, buffer);
isBound=true;
}
void Deactivate(){ glBindBuffer(GL_ARRAY_BUFFER, 0); }
VBO() : isBound(false), usage(GL_STATIC_DRAW)
{ }
~VBO() { }
private:
};
class VAO
{
public:
GLuint buffer;
string key;
unsigned long long cursor;
vector<VBO> child;
void Create()
{
glGenVertexArrays(1, &buffer);
for(unsigned int i=0; i<child.size(); i++)
child[i].Create(this);
}
void Activate()
{
glBindVertexArray(buffer);
for(unsigned int i=0; i<child.size(); i++)
child[i].Activate();
}
void Release(){ glBindVertexArray(0); }
void Remove(){ glDeleteVertexArrays(1, &buffer); }
VAO() : buffer(1) { }
~VAO() { }
private:
};
int main()
{
int width=640, height=480, frame=1; bool running = true;
glfwInit();
if( !glfwOpenWindow( width, height, 0, 0, 0, 0, 0, 0, GLFW_WINDOW ) )
{ glfwTerminate(); return 13; }
glfwSetWindowTitle("Genesis");
glewInit();
cout<<(GLEW_VERSION_4_1?"yes":"no"); //yes
GLchar *vsource, *fsource;
GLuint _vs, _fs;
GLuint Shader;
vsource = filePull("base.vert");
fsource = filePull("base.frag");
/* Compile Shaders */
_vs = glCreateShader(GL_VERTEX_SHADER);
glShaderSource(_vs, 1, (const GLchar**)&vsource, 0);
glCompileShader(_vs);
// glGetShaderiv(_vs, GL_COMPILE_STATUS, &IsCompiled_VS);
_fs = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(_fs, 1, (const GLchar**)&fsource, 0);
glCompileShader(_fs);
/***************** ^ Vertex | Fragment v *********************/
glAttachShader(Shader, _vs);
glAttachShader(Shader, _fs);
// glGetShaderiv(_fs, GL_COMPILE_STATUS, &IsCompiled_FS);
glBindAttribLocation(Shader, 0, "posIn");
glLinkProgram(Shader);
// glGetProgramiv(shaderprogram, GL_LINK_STATUS, (int *)&IsLinked);
VAO Object3D;
VBO myVBO[3];
glUseProgram(Shader);
for(int i=0; i<9; i++)
myVBO[0].vertex.push_back((i%9)*.11); //Arbitrary vertex values
Object3D.child.push_back(myVBO[0]);
Object3D.Create();
glClearColor( 0.7f, 0.74f, 0.77f, 0.0f ); //Black got lonely
int i=0; while(running)
{
frame++;
glfwGetWindowSize( &width, &height );
height = height > 0 ? height : 1;
glViewport( 0, 0, width, height );
glClear( GL_COLOR_BUFFER_BIT );
/* Bind, Draw, Unbind */
Object3D.Activate();
glEnableVertexAttribArray(0);
glVertexAttribPointer(0, 3, GL_FLOAT, false, 0, 0);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 9);
Object3D.Release();
glfwSwapBuffers();
// exit if ESC was pressed or window was closed
running = !glfwGetKey(GLFW_KEY_ESC) && glfwGetWindowParam( GLFW_OPENED);
i++;
}
glUseProgram(0); glDisableVertexAttribArray(0);
glDetachShader(Shader, _vs); glDetachShader(Shader, _fs);
glDeleteProgram(Shader); glDeleteShader(_vs); glDeleteShader(_fs);
glDeleteVertexArrays(1, &Object3D.buffer);
glfwTerminate();
return 0;
}
Basically I'm just hoping to get anything on the screen at this point. I am using glfw and glew. Am I completely leaving some things out or do I only need to correct something? Code is somewhat mangled at the moment, sorry.
base.vert
// Fragment Shader – file "base.vert"
#version 300
in vec3 posIn;
out vec4 colorOut;
void main(void)
{
gl_Position = vec4(posIn, 1.0);
colorOut = vec4(3.0,6.0,4.0,1.0);
}
base.frag
// Vertex Shader – file "base.frag"
#version 300
out vec3 colorOut;
void main(void)
{
colorOut = vec3(1.0,10,1.0);
}
&vertex
vertex is a vector. Taking its address will not give you a pointer to the data.
Edit to add:
Right. It still does not work, because you have at least 2 more issues:
You don't call any gl*Pointer call. The GL won't know what it needs to pull from your vertex buffer objects
your vertex data that you put in your vertex array is 3 times the same vertex. A triangle with the 3 points at the same location:
for(int i=0; i<9; i++)
myVBO[0].vertex.push_back((i%3)*.2); //Arbitrary vertex values
It creates 3 (.0 .2 .4) vectors, all at the same location.
That iBound member of VBO looks suspicious. The OpenGL binding state may change, for example after switching the bound VAO, but the VBO class instance still thinks it's active. Just drop iBound altogether and re-bind every time you need the object. With modern drivers rebinding an already bound object is almost for free.