The below code compiles without any errors, but when I run it, it says "Application was not able to start correctly (0xc000007b). Click OK to close the application.".
#include <math.h>
#include <GL\glew.h>
#include <GL\glut.h>
#include <cuda_gl_interop.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
GLuint vbo;
struct cudaGraphicsResource* vbo_cuda;
unsigned int width, height;
float tim;
__global__ void createVertices(float4* positions, float tim,
unsigned int width, unsigned int height) {
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
float u = x / (float)width;
float v = y / (float)height;
u = u * 2.0f - 1.0f;
v = v * 2.0f - 1.0f;
// calculate simple sine wave pattern
float freq = 4.0f;
float w = sinf(u * freq + tim)
* cosf(v * freq + tim) * 0.5f;
positions[y * width + x] = make_float4(u, w, v, 1.0f);
}
void init(void) {
glClearColor(0, 0, 0, 0);
glShadeModel(GL_FLAT);
}
void reshape(int w, int h) {
glViewport(0, 0, (GLsizei)w, (GLsizei)h);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
gluPerspective(60, (GLfloat)w/(GLfloat)h, 1, 200);
}
void display() {
float4* positions;
cudaGraphicsMapResources(1, &vbo_cuda, 0);
size_t num_bytes;
cudaGraphicsResourceGetMappedPointer((void**)&positions,
&num_bytes,
vbo_cuda);
// execute kernel
dim3 dimBlock(16, 16, 1);
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1);
createVertices<<<dimGrid, dimBlock>>>(positions, tim,
width, height);
cudaGraphicsUnmapResources(1, &vbo_cuda, 0);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
// render from the vbo
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glVertexPointer(4, GL_FLOAT, 0, 0);
glEnableClientState(GL_VERTEX_ARRAY);
glDrawArrays(GL_POINTS, 0, width * height);
glDisableClientState(GL_VERTEX_ARRAY);
glutSwapBuffers();
glutPostRedisplay();
}
void deleteVBO() {
cudaGraphicsUnregisterResource(vbo_cuda);
glDeleteBuffers(1, &vbo);
}
int main (int argc, char**argv) {
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);
glutInitWindowSize(500, 500);
glutInitWindowPosition(100, 100);
glutCreateWindow("Cuda OpenGL Interop");
init();
glutDisplayFunc(display);
glutReshapeFunc(reshape);
cudaGLSetGLDevice(0);
glGenBuffers(1, &vbo);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
unsigned int size = width * height * 4 * sizeof(float);
glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
cudaGLRegisterBufferObject(vbo);
glutMainLoop();
return 0;
}
The error is from Windows: your attempt is falling very short, as the executable you have produced is not valid for windows. It's possible you are using DEBUG DLL's with a RELEASE build. Or that you are mixing a 32 bit build with 64 bit DLL's, or many other odd combinations... (64 bit exe on 32 bit system?....)
Usually you can get more information regarding DLL problems looking in the Windows event viewer, but if you start running your application in the debugger (for sure with visual studio) you will get more information on your error.
If you can't understand what's wrong, you can try to find what is failing with http://www.dependencywalker.com/.
1st error, which was the reason to start this thread, disappeared by installing the right glew32.dll libraries into the right folders.
2nd error, where the debugger stopped at glGenBuffers(1, vbo), was because I forgot about glewInit()
Below you can find the working application:
#include <math.h>
#include <GL\glew.h>
#include <GL\glut.h>
#include <cuda_gl_interop.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
GLuint vbo;
struct cudaGraphicsResource* vbo_cuda;
const unsigned int window_width = 512;
const unsigned int window_height = 512;
const unsigned int mesh_width = 256;
const unsigned int mesh_height = 256;
float tim = 0.0;
__global__ void createVertices(float4* positions, float tim,
unsigned int mesh_width, unsigned int mesh_height) {
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
float u = x / (float)mesh_width;
float v = y / (float)mesh_height;
u = u * 2.0f - 1.0f;
v = v * 2.0f - 1.0f;
// calculate simple sine wave pattern
float freq = 4.0f;
float w = sinf(u * freq + tim)
* cosf(v * freq + tim) * 0.5f;
positions[y * mesh_width + x] = make_float4(u, w, v, 1.0f);
}
void runCuda(GLuint vbo)
{
// map OpenGL buffer object for writing from CUDA
float4* positions;
cudaGraphicsMapResources(1, &vbo_cuda, 0);
size_t num_bytes;
cudaGraphicsResourceGetMappedPointer((void**)&positions,
&num_bytes,
vbo_cuda);
// execute kernel
dim3 dimBlock(16, 16, 1);
dim3 dimGrid(mesh_width / dimBlock.x, mesh_height / dimBlock.y, 1);
createVertices<<<dimGrid, dimBlock>>>(positions, tim,
mesh_width, mesh_height);
cudaGraphicsUnmapResources(1, &vbo_cuda, 0);
}
void init(void) {
glewInit();
glClearColor(0, 0, 0, 1);
glDisable(GL_DEPTH_TEST);
}
void reshape(int w, int h) {
// viewport
glViewport(0, 0, w, h);
// projection
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
gluPerspective(60, (GLfloat)w/(GLfloat)h, 0.1, 10);
}
void createVBO(GLuint* vbo) {
// create buffer object
glGenBuffers(1, vbo);
glBindBuffer(GL_ARRAY_BUFFER, *vbo);
// initialize buffer object
unsigned int size = mesh_width * mesh_height * 4 * sizeof(float);
glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
cudaGLRegisterBufferObject(*vbo);
}
void deleteVBO(GLuint* vbo) {
cudaGraphicsUnregisterResource(vbo_cuda);
glBindBuffer(1, *vbo);
glDeleteBuffers(1, vbo);
cudaGLUnregisterBufferObject(*vbo);
}
void display() {
// run CUDA kernel to generate vertex positions
runCuda(vbo);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
// set view matrix
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
// render from the vbo
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glVertexPointer(4, GL_FLOAT, 0, 0);
glEnableClientState(GL_VERTEX_ARRAY);
glColor3f(1, 0, 0);
glDrawArrays(GL_POINTS, 0, mesh_width * mesh_height);
glDisableClientState(GL_VERTEX_ARRAY);
glutSwapBuffers();
glutPostRedisplay();
tim+=1;
}
void keyboard(unsigned char key, int x, int y)
{
switch(key) {
case(27) :
deleteVBO(&vbo);
exit(0);
}
}
int main (int argc, char**argv) {
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);
glutInitWindowSize(window_width, window_height);
glutInitWindowPosition(100, 100);
glutCreateWindow("Cuda GL interop");
init();
glutDisplayFunc(display);
glutKeyboardFunc(keyboard);
glutReshapeFunc(reshape);
// create VBO
createVBO(&vbo);
// run the cuda part
runCuda(vbo);
cudaGLSetGLDevice(0);
glutMainLoop();
return 0;
}
Related
Even though I have no errors showing from the compiler, the program starts a console and shows the following:
Process returned -1073741819 (0xC0000005) execution time : 2.266 s
Press any key to continue.
I don't know why. I have tried Visual Studio, Codeblocks, and DEV C++
#include <GL/glut.h>
#define RATIO 1.2
#define WW 100
#define WH (WW/RATIO)
#define HALFX ((int)(WW/2))
#define HALFY ((int)(WH/2))
#define deltat 0.001
int WindowWidth;
int WindowHeight;
void Display() {
glLineWidth(4.0);
float StartShape[12][2] = { {-15,-15},{-5,-15},{0,-5},{5,-15},{15,-15},{15,25},{5,25},
{5,-5},{0,0},{-5,-5},{-5,25},{-15,25} };
float EndShape[12][2] = { {-15,-15},{-5,-15},{-5,10},{0,0},{5,10},{5,-15},{15,-15},
{15,25},{5,25},{0,15},{-5,25},{-15,25} };
float IntermediateShape[12][2];
float VertexColors[12][3] = { {1,0,0},{1,1,0},{1,0,1},{0,1,0},{0,1,1},{0,0,1},{1,0.5,0},
{1,0,0.5},{0.5,1,0},{0.5,0,1},{1,0,0.5},{0,1,0.5} };
static float Tween = 0.0 - deltat;
if (Tween < 1) {
Tween += deltat;
}
for (int i = 0; i < 12; i++) {
IntermediateShape[i][0] = (1 - Tween) * StartShape[i][0] + Tween * EndShape[i][0];
IntermediateShape[i][1] = (1 - Tween) * StartShape[i][1] + Tween * EndShape[i][1];
}
glVertexPointer(2, GL_FLOAT, 0, IntermediateShape);
glColorPointer(3, GL_FLOAT, 0, VertexColors);
for (int i = 0; i < 1000000; i++) {
glClear(GL_COLOR_BUFFER_BIT);
glDrawArrays(GL_LINE_LOOP, 0, 12);
glutSwapBuffers();
glutPostRedisplay();
}
}
void InitGL() {
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
gluOrtho2D(-HALFX, HALFX, -HALFY, HALFY);
glMatrixMode(GL_MODELVIEW);
glClearColor(0, 0, 0, 0);
glEnableClientState(GL_VERTEX_ARRAY);
glEnableClientState(GL_COLOR_ARRAY);
glShadeModel(GL_SMOOTH);
glViewport(0, 0, WindowWidth, WindowHeight);
}
void Reshape(int w, int h) {
glutReshapeWindow(w, (int)(w / RATIO));
WindowWidth = w;
WindowHeight = (int)(w / RATIO);
InitGL();
}
int main(int& argc, char** argv) {
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE);
WindowWidth = (int)(glutGet((GLenum)GLUT_SCREEN_WIDTH) * 0.4);
WindowHeight = (int)(WindowWidth / RATIO);
glutInitWindowSize(WindowWidth, WindowHeight);
glutInitWindowPosition(((int)glutGet((GLenum)GLUT_SCREEN_WIDTH) * 0.1),
(glutGet((GLenum)GLUT_SCREEN_WIDTH) / 2) - (WindowHeight / 2));
glutCreateWindow("Bilguun Erdenebaatar Tweening Midterm Exam");
glutDisplayFunc(Display);
glutReshapeFunc(Reshape);
InitGL();
glutMainLoop();
return 0;
}
So, I am doing an assignment in OpenGL and C++. I am writing a code to draw the mandelbrot set. But the problem is that the code has no compiler errors, but as soon as I run the program, it crashes. Here is the code:
#define GLEW_STATIC
#include <GL\glew.h>
#include <GLFW\glfw3.h>
#include <iostream>
#include <cmath>
#include "Shader.h"
using namespace std;
// Tamanho da Janela
GLuint screenWidth = 800, screenHeight = 600;
void mandelbrotSet(GLfloat vertices[]);
int main() {
const GLint numOfPositions = 5 * 800 * 600;
GLint numOfPixels = screenWidth * screenHeight;
glfwInit();
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
glfwWindowHint(GLFW_SAMPLES, 4); // 4 Samples de anti-aliasing
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
glfwWindowHint(GLFW_RESIZABLE, GL_FALSE);
GLFWwindow* window = glfwCreateWindow(screenWidth, screenHeight, "Mandelbrot - Pratica 1", nullptr, nullptr);
glfwMakeContextCurrent(window);
glewExperimental = GL_TRUE;
glewInit();
glViewport(0, 0, screenWidth, screenHeight);
Shader myShaders("vShader.vs", "fShader.fs");
GLfloat vertices[numOfPositions];
//mandelbrotSet(vertices);
myShaders.Use();
GLuint VAO;
glGenVertexArrays(1, &VAO);
glBindVertexArray(VAO);
GLuint VBO;
glGenBuffers(1, &VBO);
glBindBuffer(GL_ARRAY_BUFFER, VBO);
glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), (GLvoid*)(0));
glEnableVertexAttribArray(0);
glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), (GLvoid*)(3 * sizeof(GLfloat)));
glEnableVertexAttribArray(1);
glBindVertexArray(0);
// Main Loop
cout << "AUSDHIAUSDH";
while (!glfwWindowShouldClose(window)) {
glClearColor(1.0f, 1.0f, 1.0f, 0.0f);
glClear(GL_COLOR_BUFFER_BIT);
glfwPollEvents();
myShaders.Use();
glBindVertexArray(VAO);
glDrawArrays(GL_POINTS, 0, numOfPixels);
glBindVertexArray(0);
glfwSwapBuffers(window);
}
return 0;
}
void mandelbrotSet(GLfloat vertices[]) {
double MinRe = -2.0;
double MaxRe = 1.0;
double MinIm = -1.2;
double MaxIm = MinIm + (MaxRe - MinRe) * screenHeight / screenWidth;
double Re_factor = (MaxRe - MinRe) / (screenWidth - 1);
double Im_factor = (MaxIm - MinIm) / (screenHeight - 1);
int MaxIterations = 30;
int posCount = 0;
for ( int y = 0; y < screenHeight; ++y )
{
double c_im = MaxIm - y * Im_factor;
for ( int x = 0; x < screenWidth; ++x )
{
double c_re = MinRe + x * Re_factor;
// Calcula se o pixel se o numero complexo c pertence ao set
double Z_re = c_re, Z_im = c_im; // Set Z = c
bool isInside = true;
for ( int n = 0; n<MaxIterations; ++n )
{
double Z_re2 = Z_re*Z_re, Z_im2 = Z_im*Z_im;
if ( Z_re2 + Z_im2 > 4 )
{
isInside = false;
break;
}
Z_im = 2 * Z_re*Z_im + c_im;
Z_re = Z_re2 - Z_im2 + c_re;
}
if ( isInside )
{
vertices[posCount] = float(x);
vertices[posCount + 1] = float(y);
vertices[posCount + 2] = 0.0f;
vertices[posCount + 3] = 0.0f;
vertices[posCount + 4] = 0.0f;
posCount += 5;
}
else
{
vertices[posCount] = float(x);
vertices[posCount + 1] = float(y);
vertices[posCount + 2] = 1.0f;
vertices[posCount + 3] = 1.0f;
vertices[posCount + 4] = 1.0f;
posCount += 5;
}
}
}
}
Since I am not good at debugging, I tried to put some couts in the code, but nothing shows. Also in debugging mode, the program crashes with the message:
"Unhandled exception at 0x00C21A47 in Mandelbrot - Pratica 1.exe: 0xC00000FD: Stack overflow (parameters: 0x00000000, 0x00DA2000)."
Notice that I commented the function call (mandelbrotSet(vertices)), so I don't think the problem is in the function.
EDIT
Also when I comment Shader myShaders("vShader.vs", "fShader.fs"); the problem still persists.
You are experiencing typical stack overflow because the size of array you are trying to allocate on stack is just too big:
const GLint numOfPositions = 5 * 800 * 600;
GLfloat vertices[numOfPositions];
The stack is limited in size (heap too, but much larger [usually]).
And use of debugger would help...
...is there a way to work around that?
Use dynamic allocation on heap (with new or malloc) or truncate size of array.
I have a simple OpenGL program which I am trying to utilize Vertex Buffer Objects for rendering instead of the old glBegin() - glEnd(). Basically the user clicks on the window indicating a starting point, and then presses a key to generate subsequent points which OpenGL draws as a line.
I've implemented this using glBegin() and glEnd() but have not been successful using a VBO. I am wondering if the problem is that after I initialize the VBO, I'm adding more vertices which it doesn't have memory allocated for, and thus doesn't display them.
Edit: Also, I'm a bit confused as to how it knows exactly which values in the vertex struct to use for x and y, as well as for r, g, b. I haven't been able to find a clear example of this.
#include <windows.h>
#include <stdio.h>
#include <stdlib.h>
#include <Math.h>
#include <iostream>
#include <vector>
#include <GL/glew.h>
#include <GL/glut.h>
struct vertex {
float x, y, u, v, r, g, b;
};
const int D = 10; // distance
const int A = 10; // angle
const int WINDOW_WIDTH = 500, WINDOW_HEIGHT = 500;
std::vector<vertex> vertices;
boolean start = false;
GLuint vboId;
void update_line_point() {
vertex temp;
temp.x = vertices.back().x + D * vertices.back().u;
temp.y = vertices.back().y + D * vertices.back().v;
temp.u = vertices.back().u;
temp.v = vertices.back().v;
vertices.push_back(temp);
}
void update_line_angle() {
float u_prime, v_prime;
u_prime = vertices.back().u * cos(A) - vertices.back().v * sin(A);
v_prime = vertices.back().u * sin(A) + vertices.back().v * cos(A);
vertices.back().u = u_prime;
vertices.back().v = v_prime;
}
void initVertexBuffer() {
glGenBuffers(1, &vboId);
glBindBuffer(GL_ARRAY_BUFFER, vboId);
glBufferData(GL_ARRAY_BUFFER, sizeof(vertex) * vertices.size(), &vertices[0], GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
}
void displayCB() {
glClear(GL_COLOR_BUFFER_BIT);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
gluOrtho2D(0, WINDOW_WIDTH, 0, WINDOW_HEIGHT);
if (start) {
glBindBuffer(GL_ARRAY_BUFFER, vboId);
glEnableClientState(GL_VERTEX_ARRAY);
glEnableClientState(GL_COLOR_ARRAY);
glVertexPointer(2, GL_FLOAT, sizeof(vertex), &vertices[0]);
glColorPointer(3, GL_FLOAT, sizeof(vertex), &vertices[0]);
glDrawArrays(GL_LINE_STRIP, 0, vertices.size());
glDisableClientState(GL_VERTEX_ARRAY);
glDisableClientState(GL_COLOR_ARRAY);
glBindBuffer(GL_ARRAY_BUFFER, 0);
}
/***** this is what I'm trying to achieve
glColor3f(1, 0, 0);
glBegin(GL_LINE_STRIP);
for (std::vector<vertex>::size_type i = 0; i < vertices.size(); i++) {
glVertex2f(vertices[i].x, vertices[i].y);
}
glEnd();
*****/
glFlush();
glutSwapBuffers();
}
void mouseCB(int button, int state, int x, int y) {
if (state == GLUT_DOWN) {
vertices.clear();
vertex temp = {x, WINDOW_HEIGHT - y, 1, 0, 1, 0, 0}; // default red color
vertices.push_back(temp);
start = true;
initVertexBuffer();
}
glutPostRedisplay();
}
void keyboardCB(unsigned char key, int x, int y) {
switch(key) {
case 'f':
if (start) {
update_line_point();
}
break;
case 't':
if (start) {
update_line_angle();
}
break;
}
glutPostRedisplay();
}
void initCallbackFunc() {
glutDisplayFunc(displayCB);
glutMouseFunc(mouseCB);
glutKeyboardFunc(keyboardCB);
}
int main(int argc, char** argv) {
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_RGB|GLUT_DOUBLE|GLUT_DEPTH);
glutInitWindowSize(WINDOW_WIDTH, WINDOW_HEIGHT);
glutInitWindowPosition(100, 100);
glutCreateWindow("Test");
initCallbackFunc();
// initialize glew
GLenum glewInitResult;
glewExperimental = GL_TRUE;
glewInitResult = glewInit();
if (GLEW_OK != glewInitResult) {
std::cerr << "Error initializing glew." << std::endl;
return 1;
}
glClearColor(1, 1, 1, 0);
glutMainLoop();
return 0;
}
If you have a VBO bound then the pointer argument to the gl*Pointer() calls is interpreted as a byte offset from the beginning of the VBO, not an actual pointer. Your usage is consistent with vertex array usage though.
So for your vertex struct x starts at byte zero and r starts at byte sizeof(float) * 4.
Also, your mouse callback reset your vertex vector on every call so you would never be able have more than one vertex in it at any given time. It also leaked VBO names via the glGenBuffers() in initVertexBuffer().
Give this a shot:
#include <GL/glew.h>
#include <GL/glut.h>
#include <iostream>
#include <vector>
struct vertex
{
float x, y;
float u, v;
float r, g, b;
};
GLuint vboId;
std::vector<vertex> vertices;
void mouseCB(int button, int state, int x, int y)
{
y = glutGet( GLUT_WINDOW_HEIGHT ) - y;
if (state == GLUT_DOWN)
{
vertex temp = {x, y, 1, 0, 1, 0, 0}; // default red color
vertices.push_back(temp);
glBindBuffer(GL_ARRAY_BUFFER, vboId);
glBufferData(GL_ARRAY_BUFFER, sizeof(vertex) * vertices.size(), &vertices[0], GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
}
glutPostRedisplay();
}
void displayCB()
{
glClearColor(1, 1, 1, 0);
glClear(GL_COLOR_BUFFER_BIT);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
double w = glutGet( GLUT_WINDOW_WIDTH );
double h = glutGet( GLUT_WINDOW_HEIGHT );
glOrtho( 0, w, 0, h, -1, 1 );
glMatrixMode( GL_MODELVIEW );
glLoadIdentity();
if ( vertices.size() > 1 )
{
glBindBuffer(GL_ARRAY_BUFFER, vboId);
glEnableClientState(GL_VERTEX_ARRAY);
glEnableClientState(GL_COLOR_ARRAY);
glVertexPointer(2, GL_FLOAT, sizeof(vertex), (void*)(sizeof( float ) * 0));
glColorPointer(3, GL_FLOAT, sizeof(vertex), (void*)(sizeof( float ) * 4));
glDrawArrays(GL_LINE_STRIP, 0, vertices.size());
glDisableClientState(GL_VERTEX_ARRAY);
glDisableClientState(GL_COLOR_ARRAY);
glBindBuffer(GL_ARRAY_BUFFER, 0);
}
glutSwapBuffers();
}
int main(int argc, char** argv)
{
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_RGB|GLUT_DOUBLE|GLUT_DEPTH);
glutInitWindowSize(500, 500);
glutInitWindowPosition(100, 100);
glutCreateWindow("Test");
// initialize glew
glewExperimental = GL_TRUE;
GLenum glewInitResult = glewInit();
if (GLEW_OK != glewInitResult) {
std::cerr << "Error initializing glew." << std::endl;
return 1;
}
glGenBuffers(1, &vboId);
glutDisplayFunc(displayCB);
glutMouseFunc(mouseCB);
glutMainLoop();
return 0;
}
A VBO is a buffer located somewhere in memory (almost always in dedicated GPU memory - VRAM) of a fixed size. You specify this size in glBufferData, and you also simultaneously give the GL a pointer to copy from. The key word here is copy. Everything you do to the vector after glBufferData isn't reflected in the VBO.
You should be binding and doing another glBufferData call after changing the vector. You will also probably get better performance from glBufferSubData or glMapBuffer if the VBO is already large enough to handle the new data, but in a small application like this the performance hit of calling glBufferData every time is basically non-existent.
Also, to address your other question about the values you need to pick out x, y, etc. The way your VBO is set up is that the values are interleaved. so in memory, your vertices will look like this:
+-------------------------------------------------
| x | y | u | v | r | g | b | x | y | u | v | ...
+-------------------------------------------------
You tell OpenGL where your vertices and colors are with the glVertexPointer and glColorPointer functions respectively.
The size parameter specifies how many elements there are for each vertex. In this case, it's 2 for vertices, and 3 for colors.
The type parameter specifies what type each element is. In your case it's GL_FLOAT for both.
The stride parameter is how many bytes you need to skip from the start of one vertex to the start of the next. With an interleaved setup like yours, this is simply sizeof(vertex) for both.
The last parameter, pointer, isn't actually a pointer to your vector in this case. When a VBO is bound, pointer becomes a byte offset into the VBO. For vertices, this should be 0, since the first vertex starts at the very first byte of the VBO. For colors, this should be 4 * sizeof(float), since the first color is preceded by 4 floats.
First off, I am giving a screenshot of the problematically rendered images in opengl. The fourth surface image is drawn by Matlab and it is what the image supposed to look like in Opengl.
.
.
.
Matlab rendering of the dataset:
(First 3 images are the problematic serrated drawing from OpenGL in different angles, and the 4th one is the MATLAB drawn image which is correct)
The image is a 1024 x 1024 complex matrix. Each element's imaginal part is the height of the point (in a 1024x1024 heightmap), and the real part is the colour of the point.
In matlab we have created a small gaussian shaped mountain. In OpenGL it is rendered with rags and serration. The "raggedness" is spread through the entire image.
Moreover, according to the viewing angle of the object, there appears to be region beyond a line where not only a more weird version of serration happens and also the rendered graphics make a height jump/change.
What can cause this? why is this "raggedness" happenning and what is that line? we have run out of all ideas now and will appreaciate any help. Related parts of the VBO code is given below. We basically create a float4 object for a vertex. first, second and third float numbers in the structure correspond the the coordinations of the point. 4th float (treated as 4 one-byte numbers) is the RGBA color.
also note that the complex matrix which contains the heightmap and the color information is stored in the GPU, so there are calls to CUDA in the code. when all the data is dumped into a file, matlab successfully draws the map, so the data is definitely correct.
#define BUFFER_OFFSET(i) ((char *)NULL + (i))
void initGL()
{
...
glViewport(0, 0, window_width, window_height);
glEnable(GL_BLEND);
glEnable(GL_COLOR_MATERIAL);
glBlendFunc (GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
// projection
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
gluPerspective(60.0, (GLfloat)window_width / (GLfloat) window_height, 0.1, 15.0);
...
}
void display()
{
camx += camx_v;
camy += camy_v;
camx_v=0;
camy_v=0;
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
// set view matrix
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
gluLookAt(0, 0, 1, /* look from camera XYZ */
0, 0, 0, /* look at the origin */
0, 1, 0); /* positive Y up vector */
drawGround();
glTranslatef(camx, camy, translate_z);
glRotatef(rotate_x, 1.0, 0.0, 0.0);
glRotatef(rotate_y, 0.0, 1.0, 0.0);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glEnableClientState(GL_VERTEX_ARRAY);
glVertexPointer(3, GL_FLOAT, 16, BUFFER_OFFSET(0));
glEnableClientState(GL_COLOR_ARRAY);
glColorPointer(4, GL_UNSIGNED_BYTE, 16, BUFFER_OFFSET(12));
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vbo_i);
glDrawElements(GL_TRIANGLES, (mesh_width-1) * (mesh_height-1) * 6, GL_UNSIGNED_INT, (GLvoid*)0);
glDisableClientState(GL_VERTEX_ARRAY);
glDisableClientState(GL_COLOR_ARRAY);
glutSwapBuffers();
}
void createVBO(GLuint* vbo, struct cudaGraphicsResource **vbo_res,
unsigned int vbo_res_flags)
{
glGenBuffers(1, vbo);
glBindBuffer(GL_ARRAY_BUFFER, *vbo);
unsigned int size = mesh_width * mesh_height * 4 * sizeof(float);
glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
cutilSafeCall(cudaGraphicsGLRegisterBuffer(vbo_res, *vbo, vbo_res_flags));
}
void createIBO(GLuint* vbo, struct cudaGraphicsResource **vbo_res,
unsigned int vbo_res_flags, unsigned int numofindice)
{
glGenBuffers(1, vbo);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, *vbo);
unsigned int size = (mesh_width-1) * (mesh_height-1) * numofindice * sizeof(GLuint);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, size, 0, GL_STATIC_DRAW);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
cutilSafeCall(cudaGraphicsGLRegisterBuffer(vbo_res, *vbo, vbo_res_flags));
}
void main()
{
initGL();
createVBO(&vbo, &cuda_vbo_resource, cudaGraphicsMapFlagsWriteDiscard);
createIBO(&vbo_i, &cuda_vbo_resource_i, cudaGraphicsMapFlagsWriteDiscard, 6);
glutMainLoop();
}
//KERNEL TO FILL the INDEX BUFFER in GPU, called once at the initialization of the program.
__global__ void fillIBO(unsigned int* pos_i, unsigned int M)
{
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
unsigned int bi;
if(y<M-1 && x<M-1)
{
bi = ((M-1)*y +x)*6;
//TRI
pos_i[bi++] = x + y*M + 1;
pos_i[bi++] = x + y*M + M + 1;
pos_i[bi++] = x + y*M;
pos_i[bi++] = x + y*M;
pos_i[bi++] = x + y*M + M + 1;
pos_i[bi++] = x + y*M + M;
}
}
replace second triangle by :
pos_i[bi++] = x + y*M + 1;
pos_i[bi++] = x + y*M + M + 1;
pos_i[bi++] = x + y*M + M;
also, I'm pretty sure it should be
bi = (M*y +x)*6;
I'm taking the first step into OpenCL coding. I have a framework that I know can at least take an array from the CPU, do an operation in OpenCL, then read back the array (with the right answer). I'm currently trying to improve this by adding a displaced mesh as found in this OpenCL example (slides 18-23; only significant improvement is I changed the VBO to a float3 instead of a float4).
I have set up a shared context as per earlier in those slides and this resource. I tested the VBO with CPU input data (so I know it draws correctly). Also, I create the context before the VBO (as motivated by this thread). Finally, I tried reworking the kernel into the following [edited]:
__kernel void sine_wave(__global float3* pos, int width, int height, float time) {
uint x = get_global_id(0); uint y = get_global_id(1);
pos[y*width+x] = (float3)(1.0f,1.0f,1.0f);
}
Yet, no matter what I do, I cannot get the OpenCL program to update anything. There are no errors, nothing, yet the VBO remains the same as the input data. If I do not specify input data, the points all render at (0,0,0). I can't figure out what could cause this.
Ideas? Thanks,
Ian
PS #1: current system is NVIDIA GTX 580M, on Windows 7 x64, though the code written is intended to be portable.
PS #2: I can provide code if no one has any clues . . .
Well, I figured it out. After further hours of searching, I downloaded NVIDIA's GPU computing toolkit, which appears to be where the linked demo derives from. I then reduced their code down immensely to the following ~220 line source (may it help ye future coders):
#pragma comment(lib,"Opengl32.lib")
#pragma comment(lib,"glu32.lib")
#pragma comment(lib,"OpenCL.lib")
#pragma comment(lib,"glew32.lib")
#pragma comment(lib,"glut32.lib")
// OpenGL Graphics Includes
#include <GL/glew.h>
#if defined (__APPLE__) || defined(MACOSX)
#include <OpenGL/OpenGL.h>
#include <GLUT/glut.h>
#else
#include <GL/glut.h>
#ifdef UNIX
#include <GL/glx.h>
#endif
#endif
#include <CL/opencl.h>
// Rendering window vars
const unsigned int window_width = 512;
const unsigned int window_height = 512;
const unsigned int mesh_width = 256;
const unsigned int mesh_height = 256;
// OpenCL vars
cl_context cxGPUContext;
cl_device_id* cdDevices;
cl_command_queue cqCommandQueue;
cl_kernel ckKernel;
cl_mem vbo_cl;
cl_program cpProgram;
size_t szGlobalWorkSize[] = {mesh_width, mesh_height};
// vbo variables
GLuint vbo;
int mouse_old_x, mouse_old_y;
int mouse_buttons = 0;
float rotate_x = 0.0, rotate_y = 0.0;
float translate_z = -3.0;
void mouse(int button, int state, int x, int y) {
if (state == GLUT_DOWN) {
mouse_buttons |= 1<<button;
} else if (state == GLUT_UP) {
mouse_buttons = 0;
}
mouse_old_x = x;
mouse_old_y = y;
}
void motion(int x, int y) {
float dx, dy;
dx = (float)(x - mouse_old_x);
dy = (float)(y - mouse_old_y);
if (mouse_buttons & 1) {
rotate_x += dy * 0.2f;
rotate_y += dx * 0.2f;
} else if (mouse_buttons & 4) {
translate_z += dy * 0.01f;
}
mouse_old_x = x;
mouse_old_y = y;
}
void DisplayGL(void) {
static float anim = 0.0f;
// run OpenCL kernel to generate vertex positions
glFinish();
clEnqueueAcquireGLObjects(cqCommandQueue, 1, &vbo_cl, 0,0,0);
clSetKernelArg(ckKernel, 3, sizeof(float), &anim);
clEnqueueNDRangeKernel(cqCommandQueue, ckKernel, 2, NULL, szGlobalWorkSize, NULL, 0,0,0 );
clEnqueueReleaseGLObjects(cqCommandQueue, 1, &vbo_cl, 0,0,0);
clFinish(cqCommandQueue);
// set view matrix
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glLoadIdentity();
glTranslatef(0.0, 0.0, translate_z);
glRotatef(rotate_x, 1.0, 0.0, 0.0);
glRotatef(rotate_y, 0.0, 1.0, 0.0);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glVertexPointer(4, GL_FLOAT, 0, 0);
glEnableClientState(GL_VERTEX_ARRAY);
glColor3f(1.0, 0.0, 0.0);
glDrawArrays(GL_POINTS, 0, mesh_width * mesh_height);
glDisableClientState(GL_VERTEX_ARRAY);
// flip backbuffer to screen
glutSwapBuffers();
anim += 0.01f;
}
void timerEvent(int value) {
glutPostRedisplay();
glutTimerFunc(10, timerEvent,0);
}
int main(int argc, char** argv) {
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);
glutInitWindowPosition (glutGet(GLUT_SCREEN_WIDTH)/2 - window_width/2, glutGet(GLUT_SCREEN_HEIGHT)/2 - window_height/2);
glutInitWindowSize(window_width, window_height);
glutCreateWindow("OpenCL/GL Interop (VBO)");
glutDisplayFunc(DisplayGL);
glutMouseFunc(mouse);
glutMotionFunc(motion);
glutTimerFunc(10, timerEvent,0);
glewInit();
glClearColor(0.0, 0.0, 0.0, 1.0);
glDisable(GL_DEPTH_TEST);
glViewport(0, 0, window_width, window_height);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
gluPerspective(60.0, (GLfloat)window_width / (GLfloat) window_height, 0.1, 10.0);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
//Get the NVIDIA platform
cl_platform_id cpPlatform;
clGetPlatformIDs(1,&cpPlatform,NULL);
// Get the number of GPU devices available to the platform
cl_uint uiDevCount;
clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, 0, NULL, &uiDevCount);
// Create the device list
cdDevices = new cl_device_id [uiDevCount];
clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, uiDevCount, cdDevices, NULL);
// Define OS-specific context properties and create the OpenCL context
#if defined (__APPLE__)
CGLContextObj kCGLContext = CGLGetCurrentContext();
CGLShareGroupObj kCGLShareGroup = CGLGetShareGroup(kCGLContext);
cl_context_properties props[] =
{
CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)kCGLShareGroup,
0
};
cxGPUContext = clCreateContext(props, 0,0, NULL, NULL, &ciErrNum);
#else
#ifdef UNIX
cl_context_properties props[] =
{
CL_GL_CONTEXT_KHR, (cl_context_properties)glXGetCurrentContext(),
CL_GLX_DISPLAY_KHR, (cl_context_properties)glXGetCurrentDisplay(),
CL_CONTEXT_PLATFORM, (cl_context_properties)cpPlatform,
0
};
cxGPUContext = clCreateContext(props, 1, &cdDevices[uiDeviceUsed], NULL, NULL, &ciErrNum);
#else // Win32
cl_context_properties props[] =
{
CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(),
CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(),
CL_CONTEXT_PLATFORM, (cl_context_properties)cpPlatform,
0
};
cxGPUContext = clCreateContext(props, 1, &cdDevices[0], NULL, NULL, NULL);
#endif
#endif
// create a command-queue
cqCommandQueue = clCreateCommandQueue(cxGPUContext, cdDevices[0], 0, NULL);
const char* cSourceCL = "__kernel void sine_wave(__global float4* pos, unsigned int width, unsigned int height, float time)\n"
"{\n"
" unsigned int x = get_global_id(0);\n"
" unsigned int y = get_global_id(1);\n"
"\n"
" // calculate uv coordinates\n"
" float u = x / (float) width;\n"
" float v = y / (float) height;\n"
" u = u*2.0f - 1.0f;\n"
" v = v*2.0f - 1.0f;\n"
"\n"
" // calculate simple sine wave pattern\n"
" float freq = 4.0f;\n"
" float w = sin(u*freq + time) * cos(v*freq + time) * 0.5f;\n"
"\n"
" // write output vertex\n"
" pos[y*width+x] = (float4)(u, w, v, 1.0f);\n"
"}\n";
cpProgram = clCreateProgramWithSource(cxGPUContext, 1, (const char **) &cSourceCL, NULL, NULL);
clBuildProgram(cpProgram, 0, NULL, "-cl-fast-relaxed-math", NULL, NULL);
// create the kernel
ckKernel = clCreateKernel(cpProgram, "sine_wave", NULL);
// create VBO (if using standard GL or CL-GL interop), otherwise create Cl buffer
unsigned int size = mesh_width * mesh_height * 4 * sizeof(float);
glGenBuffers(1,&vbo);
glBindBuffer(GL_ARRAY_BUFFER,vbo);
// initialize buffer object
glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);
// create OpenCL buffer from GL VBO
vbo_cl = clCreateFromGLBuffer(cxGPUContext, CL_MEM_WRITE_ONLY, vbo, NULL);
// set the args values
clSetKernelArg(ckKernel, 0, sizeof(cl_mem), (void *) &vbo_cl);
clSetKernelArg(ckKernel, 1, sizeof(unsigned int), &mesh_width);
clSetKernelArg(ckKernel, 2, sizeof(unsigned int), &mesh_height);
glutMainLoop();
}
After comparison with my original code, I (eventually) found the key difference.
Right:
clEnqueueNDRangeKernel(context->command_queue, kernel->kernel, 2, NULL, global,NULL, 0,0,0 );
Wrong:
clEnqueueNDRangeKernel(context->command_queue, kernel->kernel, 2, NULL, global,local, 0,0,0 );
It turns out that the grid size I was using, 10x10, was smaller than the examples I had seen elsewhere, which told me to use 16x16 for "local". Because "global" is the grid size, "global" was smaller than "local".
For some reason this didn't cause any errors, though at this point I honestly can't say I understand these variables' purposes completely.
Ian