Program not entering the main()

Program not entering the main() - c++

So, I am doing an assignment in OpenGL and C++. I am writing a code to draw the mandelbrot set. But the problem is that the code has no compiler errors, but as soon as I run the program, it crashes. Here is the code:
#define GLEW_STATIC
#include <GL\glew.h>
#include <GLFW\glfw3.h>
#include <iostream>
#include <cmath>
#include "Shader.h"
using namespace std;
// Tamanho da Janela
GLuint screenWidth = 800, screenHeight = 600;
void mandelbrotSet(GLfloat vertices[]);
int main() {
const GLint numOfPositions = 5 * 800 * 600;
GLint numOfPixels = screenWidth * screenHeight;
glfwInit();
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
glfwWindowHint(GLFW_SAMPLES, 4); // 4 Samples de anti-aliasing
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
glfwWindowHint(GLFW_RESIZABLE, GL_FALSE);
GLFWwindow* window = glfwCreateWindow(screenWidth, screenHeight, "Mandelbrot - Pratica 1", nullptr, nullptr);
glfwMakeContextCurrent(window);
glewExperimental = GL_TRUE;
glewInit();
glViewport(0, 0, screenWidth, screenHeight);
Shader myShaders("vShader.vs", "fShader.fs");
GLfloat vertices[numOfPositions];
//mandelbrotSet(vertices);
myShaders.Use();
GLuint VAO;
glGenVertexArrays(1, &VAO);
glBindVertexArray(VAO);
GLuint VBO;
glGenBuffers(1, &VBO);
glBindBuffer(GL_ARRAY_BUFFER, VBO);
glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), (GLvoid*)(0));
glEnableVertexAttribArray(0);
glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), (GLvoid*)(3 * sizeof(GLfloat)));
glEnableVertexAttribArray(1);
glBindVertexArray(0);
// Main Loop
cout << "AUSDHIAUSDH";
while (!glfwWindowShouldClose(window)) {
glClearColor(1.0f, 1.0f, 1.0f, 0.0f);
glClear(GL_COLOR_BUFFER_BIT);
glfwPollEvents();
myShaders.Use();
glBindVertexArray(VAO);
glDrawArrays(GL_POINTS, 0, numOfPixels);
glBindVertexArray(0);
glfwSwapBuffers(window);
}
return 0;
}
void mandelbrotSet(GLfloat vertices[]) {
double MinRe = -2.0;
double MaxRe = 1.0;
double MinIm = -1.2;
double MaxIm = MinIm + (MaxRe - MinRe) * screenHeight / screenWidth;
double Re_factor = (MaxRe - MinRe) / (screenWidth - 1);
double Im_factor = (MaxIm - MinIm) / (screenHeight - 1);
int MaxIterations = 30;
int posCount = 0;
for ( int y = 0; y < screenHeight; ++y )
{
double c_im = MaxIm - y * Im_factor;
for ( int x = 0; x < screenWidth; ++x )
{
double c_re = MinRe + x * Re_factor;
// Calcula se o pixel se o numero complexo c pertence ao set
double Z_re = c_re, Z_im = c_im; // Set Z = c
bool isInside = true;
for ( int n = 0; n<MaxIterations; ++n )
{
double Z_re2 = Z_re*Z_re, Z_im2 = Z_im*Z_im;
if ( Z_re2 + Z_im2 > 4 )
{
isInside = false;
break;
}
Z_im = 2 * Z_re*Z_im + c_im;
Z_re = Z_re2 - Z_im2 + c_re;
}
if ( isInside )
{
vertices[posCount] = float(x);
vertices[posCount + 1] = float(y);
vertices[posCount + 2] = 0.0f;
vertices[posCount + 3] = 0.0f;
vertices[posCount + 4] = 0.0f;
posCount += 5;
}
else
{
vertices[posCount] = float(x);
vertices[posCount + 1] = float(y);
vertices[posCount + 2] = 1.0f;
vertices[posCount + 3] = 1.0f;
vertices[posCount + 4] = 1.0f;
posCount += 5;
}
}
}
}
Since I am not good at debugging, I tried to put some couts in the code, but nothing shows. Also in debugging mode, the program crashes with the message:
"Unhandled exception at 0x00C21A47 in Mandelbrot - Pratica 1.exe: 0xC00000FD: Stack overflow (parameters: 0x00000000, 0x00DA2000)."
Notice that I commented the function call (mandelbrotSet(vertices)), so I don't think the problem is in the function.
EDIT
Also when I comment Shader myShaders("vShader.vs", "fShader.fs"); the problem still persists.

You are experiencing typical stack overflow because the size of array you are trying to allocate on stack is just too big:
const GLint numOfPositions = 5 * 800 * 600;
GLfloat vertices[numOfPositions];
The stack is limited in size (heap too, but much larger [usually]).
And use of debugger would help...
...is there a way to work around that?
Use dynamic allocation on heap (with new or malloc) or truncate size of array.

Related

Polygon tearing in OpenGL

500x500 grid with 1000 sub Divisions:
Just one question.
Why is this happening ?
#include <iostream>
#include <sstream>
#include <vector>
#define GLEW_STATIC
#include <GL/glew.h>
#include <GLFW/glfw3.h>
#include "glm/glm.hpp"
#include "glm/gtc/matrix_transform.hpp"
#include "GameEngine.hpp"
#include "ShaderProgram.h"
#include "Camera.h"
#include "Mesh.h"
const char *title = "Terrain";
GameEngine engine;
OrbitCamera orbitCamera;
float gYaw = 0.0f;
float gPitch = 1.0f;
float gRadius = 200.0f;
const float MOUSE_SENSTIVITY = 0.25f;
bool gWireFrame = false;
void glfw_onKey(GLFWwindow *window, int key, int scancode, int action, int mode);
void glfw_onMouseMove(GLFWwindow *window, double posX, double posY);
void glfw_onMouseScroll(GLFWwindow *window, double deltaX, double deltaY);
int main()
{
if (!engine.init(1024, 768, title))
{
std::cerr << "OpenGL init failed" << std::endl;
std::cin.get();
return -1;
}
//set callbacks
glfwSetKeyCallback(engine.getWindow(), glfw_onKey);
glfwSetCursorPosCallback(engine.getWindow(), glfw_onMouseMove);
std::vector<Vertex> VER;
std::vector<glm::vec3> verts;
std::vector<unsigned int> indices;
std::vector<glm::vec3> norms;
int subDiv = 1000;
int width = 500;
int height = 500;
int size = 0;
for (int row = 0; row < subDiv; row++)
{
for (int col = 0; col < subDiv; col++)
{
float x = (float)((col * width) / subDiv - (width / 2.0));
float z = ((subDiv - row) * height) / subDiv - (height / 2.0);
glm::vec3 pos = glm::vec3(x, 0, z);
verts.push_back(pos);
}
}
size = subDiv * subDiv;
size = verts.size();
for (int row = 0; row < subDiv -1 ; row++)
{
for (int col = 0; col < subDiv -1; col++)
{
int row1 = row * (subDiv);
int row2 = (row+1) * (subDiv);
indices.push_back(row1+col);
indices.push_back(row1+col+1);
indices.push_back( row2+col+1);
indices.push_back(row1+col);
indices.push_back( row2+col+1);
indices.push_back(row2+col);
}
}
for (int i = 0; i < verts.size(); i++)
{
Vertex vertex;
vertex.position = verts[i];
vertex.normal = glm::vec3(0, 0, 0);
vertex.texCoords = glm::vec2(0, 0);
VER.push_back(vertex);
}
VER.begin();
for (int i = 0; i < indices.size(); i += 3)
{
Vertex a = VER[indices[i]];
Vertex b = VER[indices[i + 1]];
Vertex c = VER[indices[i + 2]];
glm::vec3 p = glm::cross(b.position - a.position, c.position - a.position);
VER[indices[i]].normal += p;
VER[indices[i + 1]].normal += p;
VER[indices[i + 2]].normal += p;
}
for (int i = 0; i < VER.size(); i++)
{
VER[i].normal = glm::normalize(VER[i].normal);
}
glm::vec3 cubePos = glm::vec3(0.0f, 0.0f, -5.0f);
GLuint vbo, vao, ibo;
glGenVertexArrays(1, &vao);
glGenBuffers(1, &vbo);
glBindVertexArray(vao);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBufferData(GL_ARRAY_BUFFER, VER.size() * sizeof(Vertex), &VER[0], GL_STATIC_DRAW);
// Vertex Positions
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof(Vertex), (GLvoid*)0);
glEnableVertexAttribArray(0);
// Normals attribute
glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, sizeof(Vertex), (GLvoid*)(3 * sizeof(GLfloat)));
glEnableVertexAttribArray(1);
// Vertex Texture Coords
glVertexAttribPointer(2, 2, GL_FLOAT, GL_FALSE, sizeof(Vertex), (GLvoid*)(6 * sizeof(GLfloat)));
glEnableVertexAttribArray(2);
int n = indices.size() * sizeof(unsigned int);
glGenBuffers(1, &ibo);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ibo);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, indices.size() * sizeof(unsigned int), &indices[0], GL_STATIC_DRAW);
glBindVertexArray(0);
ShaderProgram shaderProgram;
shaderProgram.loadShaders("shaders/vert.glsl", "shaders/frag.glsl");
glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
while (!glfwWindowShouldClose(engine.getWindow()))
{
glfwPollEvents();
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glm::mat4 model, view, projection;
model = glm::mat4(1.0f);
orbitCamera.setLookAt(glm::vec3(0, 0, 0));
orbitCamera.rotate(gYaw, gPitch);
orbitCamera.setRadius(gRadius);
model = glm::translate(model, glm::vec3(0, 0, 0));
//model = glm::scale(model, glm::vec3(1, 0, 1));
//model = scaleMat;
projection = glm::perspective(glm::radians(45.0f), (float)engine.getWidth() / (float)engine.getHeight(), 0.00001f, 100.0f);
shaderProgram.use();
glm::vec3 viewPos;
viewPos.x = orbitCamera.getPosition().x;
viewPos.y = orbitCamera.getPosition().y;
viewPos.z = orbitCamera.getPosition().z;
shaderProgram.setUniform("projection", projection);
shaderProgram.setUniform("view", orbitCamera.getViewMatrix());
shaderProgram.setUniform("model", model);
shaderProgram.setUniform("lightPos", glm::vec3(5, 10, 10));
shaderProgram.setUniform("viewPos", viewPos);
glBindVertexArray(vao);
glDrawElements(GL_TRIANGLES,indices.size(), GL_UNSIGNED_INT, 0);
//glDrawArrays(GL_TRIANGLES, 0, VER.size());
glBindVertexArray(0);
glfwSwapBuffers(engine.getWindow());
}
//cleanup
glDeleteVertexArrays(1, &vao);
glDeleteBuffers(1, &vbo);
glfwTerminate();
return 0;
}
void glfw_onKey(GLFWwindow *window, int key, int scancode, int action, int mode)
{
if (key == GLFW_KEY_ESCAPE && action == GLFW_PRESS)
{
glfwSetWindowShouldClose(window, GL_TRUE);
}
if (key == GLFW_KEY_E && action == GLFW_PRESS)
{
gWireFrame = !gWireFrame;
if (gWireFrame)
glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
else
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
}
}
void glfw_onMouseMove(GLFWwindow *window, double posX, double posY)
{
static glm::vec2 lastMousePos = glm::vec2(0, 0);
if (glfwGetMouseButton(engine.getWindow(), GLFW_MOUSE_BUTTON_LEFT) == 1)
{
gYaw -= ((float)posX - lastMousePos.x) * MOUSE_SENSTIVITY;
gPitch += ((float)posY - lastMousePos.y) * MOUSE_SENSTIVITY;
}
if (glfwGetMouseButton(engine.getWindow(), GLFW_MOUSE_BUTTON_RIGHT) == 1)
{
float dx = 0.01f * ((float)posX - lastMousePos.x);
float dy = 0.01f * ((float)posY - lastMousePos.y);
gRadius += dx - dy;
}
lastMousePos.x = (float)posX;
lastMousePos.y = (float)posY;
}
This is the main code. Rest is just basic initializing code, nothing fancy.
I've tried changing the swapinterval but that doesn't seems to be the problem.
I can share code for the other classes if anyone wants to take a look. And I've also tried lowering the sub divisions.
Edit*
After increasing the value of far plane to 8000:
Still not crisp.

the edit with second image is telling you what is happening ... if tampering with znear/zfar changes output like that it means your depth buffer has low bitwidth to the range you want to use...
However increasing zfar should make things worse (you just for some reason don't see it maybe its cut off or some weird math accuracy singularity).
for me its usual to select the planes so:
zfar/znear < (2^depth_buffer_bitwidth)/2
check you depth_buffer_bitwidth
Try to use 24 bits (you might have 16 bits right now). That should work on all gfx cards these days. You can try 32 bits too but that will work only on newer cards. I am using this code to get the max I can:
What is the proper OpenGL initialisation on Intel HD 3000?
However you are using GLFW so you need to find how to do it in it ... probably there is some hint for this in it ...
increase znear as much as you can
tampering znear has much much more impact than zfar...
Use linear depth buffer
this is the best option for large depth range views like terrains that covers stuf in whole depth view range. See:
How to correctly linearize depth in OpenGL ES in iOS?
however you need shaders and new api for this... I do not think this is doable in old api but luckily you are on new api already ...
if none of above is enough
You can stack up more frustrums together at a cost of multiple rendering of the same geometry. for more info see:
Is it possible to make realistic n-body solar system simulation in matter of size and mass?

How do you initialize OpenGL?
Are you using GL_BLEND?
Using blending is nice to get anti-aliased polygon edges, however it also means your z-buffer gets updated even when a very translucent fragment is drawn. This prevents other opaque fragments with the same z-depth from being drawn, which might be what is causing those holes. You could try disabling GL_BLEND to see if the issue goes away.
What depth function are you using?
By default it is set to GL_LESS. You might want to try glDepthFunc(GL_LEQUAL); So fragments with the same z-depth will be drawn. However, due to rounding errors this might not solve your problem entirely.

Instancing with OpenGL 3.3 seems very slow

I wrote a minimal code-sample in C++, which is rendering 10000 colored
quads on the screen. I am using "instancing" and so updating only
the model-matrix for each quad each frame. The data of the 6 vertices
are stored in an indivdual VBO und will be reused all the time.
The projection-matrix (orthographic) is injected once at program-start
via uniform. The model-matrix is calculated on the CPU with the library GLM.
I measured the rendering-time and I got only an average FPS of 52.
I think this is MUCH to less, but I cannot find the mistake/bottleneck in my little sample program.
After some analysis it seems, that the 3 calculations done with GLM
are very slow. Am I doing something wrong here? For example, If
I remove the rotating-calculation, I get an FPS-boost of 10 FPS!
Maybe you can help me to find out, what I can do better here and how
can I optimize my sample. It is important for me, that each quad is individual configurable during runtime, so I decided to use instancing.
Moving the matrix-calculations to the GPU seems another option, but I am really confused, why the CPU has so much problems calculating the 10000
model-matrices! Ok, my CPU is very bad (Athlon 2 Core-Duo M300, GPU is ATI Mobility Radeon 4100), but It should do this task in no measurable time, or?
Here is minimal, fully working, compilable example (If u have GLFW and GLM).
Maybe someone have some time and can help me out here :)
#define GLEW_STATIC
#define GLM_FORCE_INLINE
#define GLM_FORCE_SSE2
#include "glew.h"
#include "glfw3.h"
#include "glm.hpp"
#include "glm/gtc/matrix_transform.hpp"
#include <conio.h>
#include <cstdlib>
#include <iostream>
#include <ctime>
GLuint buildShader()
{
std::string strVSCode =
"#version 330 core\n"
"in vec3 vertexPosition;\n"
"in mat4 modelMatrix;\n"
"uniform mat4 projectionMatrix;\n"
"out vec4 m_color;\n"
"void main() {\n"
" vec4 vecVertex = vec4(vertexPosition, 1);\n"
" gl_Position = projectionMatrix * modelMatrix * vecVertex;\n"
" m_color = gl_Position;\n"
"}\n";
std::string strFSCode = "#version 330 core\n"
"out vec4 frag_colour;\n"
"in vec4 m_color;\n"
"void main() {\n"
" frag_colour = vec4(m_color.x, m_color.y, m_color.z, 0.5f);\n"
"}\n";
GLuint gluiVertexShaderId = glCreateShader(GL_VERTEX_SHADER);
char const * VertexSourcePointer = strVSCode.c_str();
glShaderSource(gluiVertexShaderId, 1, &VertexSourcePointer, NULL);
glCompileShader(gluiVertexShaderId);
GLuint gluiFragmentShaderId = glCreateShader(GL_FRAGMENT_SHADER);
char const * FragmentSourcePointer = strFSCode.c_str();
glShaderSource(gluiFragmentShaderId, 1, &FragmentSourcePointer, NULL);
glCompileShader(gluiFragmentShaderId);
GLuint gluiProgramId = glCreateProgram();
glAttachShader(gluiProgramId, gluiVertexShaderId);
glAttachShader(gluiProgramId, gluiFragmentShaderId);
glLinkProgram(gluiProgramId);
glDeleteShader(gluiVertexShaderId);
glDeleteShader(gluiFragmentShaderId);
return gluiProgramId;
}
struct Sprite
{
glm::vec3 position, dimension;
float speed, rotation, rx, ry;
};
struct Vertex
{
float x, y, z;
Vertex(){};
Vertex(float x, float y, float z) : x(x), y(y), z(z) {}
};
int main(int arc, char **argv)
{
// GLFW init
int displayResWith = 1366; //modify this here
int displayResHeight = 768; //modify this here
glfwInit();
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, 1);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
glfwWindowHint(GLFW_RED_BITS, 8);
glfwWindowHint(GLFW_GREEN_BITS, 8);
glfwWindowHint(GLFW_BLUE_BITS, 8);
glfwWindowHint(GLFW_ALPHA_BITS, 8);
glfwWindowHint(GLFW_DEPTH_BITS, 32);
glfwWindowHint(GLFW_STENCIL_BITS, 32);
GLFWwindow* window = glfwCreateWindow(displayResWith, displayResHeight,"Instancing", glfwGetPrimaryMonitor(),NULL);
int width, height;
glfwMakeContextCurrent(window);
glfwSwapInterval(0);
glfwGetFramebufferSize(window, &width, &height);
//GLEW init
glewExperimental = GL_TRUE;
glewInit();
const GLubyte* renderer = glGetString(GL_RENDERER);
const GLubyte* version = glGetString(GL_VERSION);
std::cout << "Renderer: " << renderer << std::endl;
std::cout << "OpenGL supported version: " << version << std::endl;
//OpenGL init
glEnable(GL_CULL_FACE);
glCullFace(GL_BACK);
glEnable(GL_DEPTH_TEST);
glDepthFunc(GL_LESS);
glEnable(GL_BLEND);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
glClearColor(255.0f, 255.0f, 255.0f, 255.0f);
//Shader
GLuint programID = buildShader();
//VBO vertexBuffer
GLuint vertexBuffer;
glGenBuffers(1, &vertexBuffer);
glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer);
Vertex VertexBufferData[6];
VertexBufferData[0] = Vertex(-0.5f, 0.5f, 0.0f); //Links oben
VertexBufferData[1] = Vertex(-0.5f, -0.5f, 0.0f); //Links unten
VertexBufferData[2] = Vertex(0.5f, -0.5f, 0.0f); //Rechts unten
VertexBufferData[3] = VertexBufferData[2]; //Rechts unten
VertexBufferData[4] = Vertex(0.5f, 0.5f, 0.0f); //Rechts oben
VertexBufferData[5] = VertexBufferData[0]; //Links oben
glBufferData(GL_ARRAY_BUFFER, sizeof(Vertex)*6, VertexBufferData, GL_STATIC_DRAW);
//VBO instanceBuffer
GLuint instanceBuffer;
glGenBuffers(1, &instanceBuffer);
glBindBuffer(GL_ARRAY_BUFFER, instanceBuffer);
int iMaxInstanceCount = 30000;
glm::mat4 *ptrInstanceBufferData = new glm::mat4[iMaxInstanceCount];
glBufferData(GL_ARRAY_BUFFER, iMaxInstanceCount * sizeof(glm::mat4), NULL, GL_STREAM_DRAW);
//VAO - Start
GLuint vertexArrayObject;
glGenVertexArrays(1, &vertexArrayObject);
glBindVertexArray(vertexArrayObject);
//For VBO vertexbuffer
glEnableVertexAttribArray(glGetAttribLocation(programID, "vertexPosition"));
glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer);
glVertexAttribPointer(
glGetAttribLocation(programID, "vertexPosition"),
3,
GL_FLOAT,
GL_FALSE,
sizeof(Vertex),
(void*)0
);
glVertexAttribDivisor(0, 0);
//For VBO instanceBuffer
int pos = glGetAttribLocation(programID, "modelMatrix");
int pos1 = pos + 0;
int pos2 = pos + 1;
int pos3 = pos + 2;
int pos4 = pos + 3;
glEnableVertexAttribArray(pos1);
glEnableVertexAttribArray(pos2);
glEnableVertexAttribArray(pos3);
glEnableVertexAttribArray(pos4);
glBindBuffer(GL_ARRAY_BUFFER, instanceBuffer);
glVertexAttribPointer(pos1, 4, GL_FLOAT, GL_FALSE, sizeof(GLfloat) * 4 * 4, (void*)(0));
glVertexAttribPointer(pos2, 4, GL_FLOAT, GL_FALSE, sizeof(GLfloat) * 4 * 4, (void*)(sizeof(float) * 4));
glVertexAttribPointer(pos3, 4, GL_FLOAT, GL_FALSE, sizeof(GLfloat) * 4 * 4, (void*)(sizeof(float) * 8));
glVertexAttribPointer(pos4, 4, GL_FLOAT, GL_FALSE, sizeof(GLfloat) * 4 * 4, (void*)(sizeof(float) * 12));
glVertexAttribDivisor(pos1, 1);
glVertexAttribDivisor(pos2, 1);
glVertexAttribDivisor(pos3, 1);
glVertexAttribDivisor(pos4, 1);
glBindVertexArray(0); //VAO - End
//Matrix vars
glm::mat4 Projection, Rotating, Scaling, Translation, Identity;
glm::vec3 ZRotateVec(0.0f, 0.0f, 1.0f);
//Calc projection-matrix and put shader (uniform)
Projection = glm::ortho(0.0f, (float)width, 0.0f, (float)height, 0.0f, 1.0f);
glUseProgram(programID);
glUniformMatrix4fv(glGetUniformLocation(programID, "projectionMatrix"), 1, GL_FALSE, &Projection[0][0]);
//Creating sprites
std::srand(static_cast<unsigned int>(std::time(0)));
int iActInstanceCount = 10000;
Sprite *ptrSprites = new Sprite[iActInstanceCount];
for (int i = 0; i < iActInstanceCount; ++i)
{
ptrSprites[i].dimension = glm::vec3(16, 16, 1.0f);
ptrSprites[i].position = glm::vec3(std::rand()%(width-32),std::rand()%(height-32),-1.0f *((std::rand()%256)/256.0f));
ptrSprites[i].rotation = rand() % 360 + 0.0f;
ptrSprites[i].rx = static_cast<float>(std::rand() % 2);
ptrSprites[i].ry = static_cast<float>(std::rand() % 2);
ptrSprites[i].speed = (std::rand() % 100) + 1.0f;
if (ptrSprites[i].speed < 1.0f) ptrSprites[i].speed = 1.0f;
}
//FPS init
double fFramesRendered = 0.0f;
double fFrameMeasurementStart = 0.0f;
double fFPS = 0.0f;
double fCurrentTime = 0.0f;
glfwSetTime(0);
//Main-loop (also renderloop)
while (!glfwWindowShouldClose(window))
{
//application-logic
if (glfwGetKey(window, GLFW_KEY_ESCAPE)== GLFW_PRESS)
glfwSetWindowShouldClose(window, GL_TRUE);
const double fNewTime = glfwGetTime();
double fDeltaTime = fNewTime - fCurrentTime;
fCurrentTime = fNewTime;
for (int i = 0; i < iActInstanceCount; ++i)
{
float fSpeed = ptrSprites[i].speed * static_cast<float>(fDeltaTime);
ptrSprites[i].rotation += fSpeed;
if (ptrSprites[i].rotation >= 360.0f) ptrSprites[i].rotation = 0.0f;
if (ptrSprites[i].rx == 1) ptrSprites[i].position.x = ptrSprites[i].position.x + fSpeed;
if (ptrSprites[i].rx == 0) ptrSprites[i].position.x = ptrSprites[i].position.x - fSpeed;
if (ptrSprites[i].ry == 1) ptrSprites[i].position.y = ptrSprites[i].position.y + fSpeed;
if (ptrSprites[i].ry == 0) ptrSprites[i].position.y = ptrSprites[i].position.y - fSpeed;
if (ptrSprites[i].position.x <= 0) ptrSprites[i].rx = 1;
if (ptrSprites[i].position.x + ptrSprites[i].dimension.x >= width) ptrSprites[i].rx = 0;
if (ptrSprites[i].position.y <= 0) ptrSprites[i].ry = 1;
if (ptrSprites[i].position.y + ptrSprites[i].dimension.y >= height) ptrSprites[i].ry = 0;
//matrix-calculations (saved in local buffer)
Translation = glm::translate(Identity, ptrSprites[i].position + glm::vec3(ptrSprites[i].dimension.x / 2.0f, ptrSprites[i].dimension.y / 2.0f, 0.0f));
Scaling = glm::scale(Translation, ptrSprites[i].dimension);
ptrInstanceBufferData[i] = glm::rotate(Scaling, ptrSprites[i].rotation, ZRotateVec);
}
//render-call
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glUseProgram(programID);
glBindVertexArray(vertexArrayObject);
glBindBuffer(GL_ARRAY_BUFFER, instanceBuffer);
glBufferData(GL_ARRAY_BUFFER, iMaxInstanceCount * sizeof(glm::mat4), NULL, GL_STREAM_DRAW); // Buffer orphaning
glBufferSubData(GL_ARRAY_BUFFER, 0, iActInstanceCount * sizeof(glm::mat4), ptrInstanceBufferData);
glDrawArraysInstanced(GL_TRIANGLES, 0, 6, iActInstanceCount);
glBindVertexArray(0);
glfwSwapBuffers(window);
glfwPollEvents();
//FPS-stuff
++fFramesRendered;
if ((fCurrentTime*1000.0f) >= (fFrameMeasurementStart*1000.0f) + 1000.0f)
{
fFPS = ((fCurrentTime*1000.0f) - (fFrameMeasurementStart*1000.0f)) / 1000.0f * fFramesRendered;
fFrameMeasurementStart = fCurrentTime;
fFramesRendered = 0;
std::cout << "FPS: " << fFPS << std::endl;
}
}
//Termination and cleanup
glDeleteBuffers(1, &vertexBuffer);
glDeleteBuffers(1, &instanceBuffer);
glDeleteVertexArrays(1, &vertexArrayObject);
glDeleteProgram(programID);
glfwDestroyWindow(window);
glfwTerminate();
return _getch();
}

Well, after testing it on my machine, it is definitely CPU limited, so nothing you do with OGL is going to make much difference. I get about ~300fps with GCC on at least -O1, but only ~80 with -O0. My CPU is very fast (i7 2600k, 4.7ghz), but my GPU is rather slow (GT 520). I'm also on Ubuntu.
Some quick ideas for things that might speed it up a little:
Put the vertex positions in an array in the vertex shader and use gl_VertexID to access them
Use GL_TRIANGLE_STRIP instead of GL_TRIANGLES
Use radians for angles, as otherwise GLM has to convert them
None of these are likely to make much of any impact, really. Just make sure your compiler is set up right, and there probably isn't much more to do.

Is it possible to build a heatmap from point data at 60 times per second?

I'm working on a simulation for clouds (actual clouds) where the clouds are simulated by 3D points, then projected into a 2D heatmap, about 640x480 units big. The number of points is about 50k, which is as small as I can go without the simulation breaking, but I can't seem to find a way to perform this with any speed (it usually takes 3-5 seconds of runtime)
I suppose my question is, is it feasible for an average computer to be able to do this yet? I usually underestimate how fast computers are nowadays, but I might be overestimating them in this case. I haven't optimized the simulation yet, but if it's flat-out not possible, it'd be good to know and save the trouble now.
If it is possible, is there any technique that might prove useful for making the conversion from point data to heatmap fast enough to update 60 times a second? It really is just looking at the point data and writing to a 2D array the results after a transformation, so it's mostly bound to memory lookup I think.

Yes if your data is already in memory or you can compute it quickly
Just try it out with SDL textures (or OpenGL textures directly, which is what SDL uses):
heatmap.c
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <SDL2/SDL.h>
#define COLOR_MAX 255
double common_get_secs(void) {
struct timespec ts;
timespec_get(&ts, TIME_UTC);
return ts.tv_sec + (1e-9 * ts.tv_nsec);
}
const double COMMON_FPS_GRANULARITY_S = 0.5;
double common_fps_last_time_s;
unsigned int common_fps_nframes;
void common_fps_init() {
common_fps_nframes = 0;
common_fps_last_time_s = common_get_secs();
}
void common_fps_update_and_print() {
double dt, current_time_s;
current_time_s = common_get_secs();
common_fps_nframes++;
dt = current_time_s - common_fps_last_time_s;
if (dt > COMMON_FPS_GRANULARITY_S) {
printf("FPS = %f\n", common_fps_nframes / dt);
common_fps_last_time_s = current_time_s;
common_fps_nframes = 0;
}
}
int main(void) {
SDL_Event event;
SDL_Renderer *renderer = NULL;
SDL_Texture *texture = NULL;
SDL_Window *window = NULL;
Uint8 *base;
int pitch;
void *pixels = NULL;
const unsigned int
WINDOW_WIDTH = 500,
WINDOW_HEIGHT = WINDOW_WIDTH;
const double
SPEED = WINDOW_WIDTH / 10.0,
CENTER_X = WINDOW_WIDTH / 2.0,
CENTER_Y = WINDOW_HEIGHT / 2.0,
PERIOD = WINDOW_WIDTH / 10.0,
PI2 = 2.0 * acos(-1.0);
double dt, initial_time;
float z;
unsigned int x, xc, y, yc;
SDL_Init(SDL_INIT_TIMER | SDL_INIT_VIDEO);
SDL_CreateWindowAndRenderer(WINDOW_WIDTH, WINDOW_WIDTH, 0, &window, &renderer);
texture = SDL_CreateTexture(renderer, SDL_PIXELFORMAT_ARGB8888,
SDL_TEXTUREACCESS_STREAMING, WINDOW_WIDTH, WINDOW_HEIGHT);
initial_time = common_get_secs();
common_fps_init();
while (1) {
dt = common_get_secs() - initial_time;
SDL_LockTexture(texture, NULL, &pixels, &pitch);
for (x = 0; x < WINDOW_WIDTH; x++) {
for (y = 0; y < WINDOW_HEIGHT; y++) {
xc = CENTER_X - x;
yc = CENTER_Y - y;
/*z = COLOR_MAX * 0.5 * (1.0 + (sin(PI2 * (sqrt(xc*xc + yc*yc) - SPEED * dt) / PERIOD)));*/
z = (int)(x + y + SPEED * dt) % COLOR_MAX;
base = ((Uint8 *)pixels) + (4 * (x * WINDOW_WIDTH + y));
base[0] = 0;
base[1] = 0;
base[2] = z;
base[3] = COLOR_MAX;
}
}
SDL_UnlockTexture(texture);
SDL_RenderCopy(renderer, texture, NULL, NULL);
SDL_RenderPresent(renderer);
common_fps_update_and_print();
if (SDL_PollEvent(&event) && event.type == SDL_QUIT)
break;
}
SDL_DestroyRenderer(renderer);
SDL_DestroyWindow(window);
SDL_Quit();
return EXIT_SUCCESS;
}
Compile and run:
gcc -Wall -std=c11 -o heatmap.out heatmap.c -lSDL2 -lm
./heatmap.out
On Ubuntu 16.04, the simpler calculation:
z = (x + y + SPEED * dt) % COLOR_MAX
reaches 300 FPS on a Lenovo Thinkpad T430 with an Nvidia NVS 5400M (2012 mid-end).
So of course, a pre-computed result on memory would be even faster.
If the computation is a little more complicated however:
z = COLOR_MAX * 0.5 * (1.0 + (sin(PI2 * (sqrt(xc*xc + yc*yc) - SPEED * dt) / PERIOD)))
FPS is just 30, so we see that the limiting factor quickly becomes the calculation:
If you cannot run calculations fast enough, you will likely need to store to disk to not overflow memory, and then it is all about benchmarking your disk + compression methods (video codecs).
Fragment shaders
If you can run your computation on the fragment shader, you can do much more complicated things however in real time.
With the following code, the more complicated calculation runs at 3k FPS!
But it will be harder to implement, so make sure you need it.
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <SDL2/SDL.h>
#define GLEW_STATIC
#include <GL/glew.h>
static const GLuint WIDTH = 500;
static const GLuint HEIGHT = 500;
static const GLchar* vertex_shader_source =
"#version 120\n"
"attribute vec2 coord2d;\n"
"void main(void) {\n"
" gl_Position = vec4(coord2d, 0.0, 1.0);\n"
"}\n";
static const GLchar* fragment_shader_source =
"#version 120\n"
"uniform float pi2;\n"
"uniform float time;\n"
"uniform float width;\n"
"uniform float height;\n"
"uniform float periods_x;\n"
"uniform float periods_y;\n"
"void main(void) {\n"
" float center_x = width / 2.0;"
" float center_y = height / 2.0;"
" float x = (gl_FragCoord.x - center_x) * periods_x / width;"
" float y = (gl_FragCoord.y - center_y) * periods_y / height;"
" gl_FragColor[0] = 0.5 * (1.0 + (sin((pi2 * (sqrt(x*x + y*y) - time)))));\n"
" gl_FragColor[1] = 0.0;\n"
" gl_FragColor[2] = 0.0;\n"
"}\n";
static const GLfloat vertices[] = {
-1.0, 1.0,
1.0, 1.0,
1.0, -1.0,
-1.0, -1.0,
};
static const GLuint indexes[] = {
0, 2, 1,
0, 3, 2,
};
double common_get_secs(void) {
struct timespec ts;
timespec_get(&ts, TIME_UTC);
return ts.tv_sec + (1e-9 * ts.tv_nsec);
}
const double COMMON_FPS_GRANULARITY_S = 0.5;
double common_fps_last_time_s;
unsigned int common_fps_nframes;
void common_fps_init() {
common_fps_nframes = 0;
common_fps_last_time_s = common_get_secs();
}
void common_fps_update_and_print() {
double dt, current_time_s;
current_time_s = common_get_secs();
common_fps_nframes++;
dt = current_time_s - common_fps_last_time_s;
if (dt > COMMON_FPS_GRANULARITY_S) {
printf("FPS = %f\n", common_fps_nframes / dt);
common_fps_last_time_s = current_time_s;
common_fps_nframes = 0;
}
}
/* Copy paste. Upstream on OpenGL. */
GLint common_get_shader_program(
const char *vertex_shader_source,
const char *fragment_shader_source) {
GLchar *log = NULL;
GLint fragment_shader, log_length, program, success, vertex_shader;
/* Vertex shader */
vertex_shader = glCreateShader(GL_VERTEX_SHADER);
glShaderSource(vertex_shader, 1, &vertex_shader_source, NULL);
glCompileShader(vertex_shader);
glGetShaderiv(vertex_shader, GL_COMPILE_STATUS, &success);
glGetShaderiv(vertex_shader, GL_INFO_LOG_LENGTH, &log_length);
log = malloc(log_length);
if (log_length > 0) {
glGetShaderInfoLog(vertex_shader, log_length, NULL, log);
printf("vertex shader log:\n\n%s\n", log);
}
if (!success) {
printf("vertex shader compile error\n");
exit(EXIT_FAILURE);
}
/* Fragment shader */
fragment_shader = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(fragment_shader, 1, &fragment_shader_source, NULL);
glCompileShader(fragment_shader);
glGetShaderiv(fragment_shader, GL_COMPILE_STATUS, &success);
glGetShaderiv(fragment_shader, GL_INFO_LOG_LENGTH, &log_length);
if (log_length > 0) {
log = realloc(log, log_length);
glGetShaderInfoLog(fragment_shader, log_length, NULL, log);
printf("fragment shader log:\n\n%s\n", log);
}
if (!success) {
printf("fragment shader compile error\n");
exit(EXIT_FAILURE);
}
/* Link shaders */
program = glCreateProgram();
glAttachShader(program, vertex_shader);
glAttachShader(program, fragment_shader);
glLinkProgram(program);
glGetProgramiv(program, GL_LINK_STATUS, &success);
glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length);
if (log_length > 0) {
log = realloc(log, log_length);
glGetProgramInfoLog(program, log_length, NULL, log);
printf("shader link log:\n\n%s\n", log);
}
if (!success) {
printf("shader link error");
exit(EXIT_FAILURE);
}
free(log);
glDeleteShader(vertex_shader);
glDeleteShader(fragment_shader);
return program;
}
int main(void) {
/* SDL variables. */
SDL_Event event;
SDL_Window *window;
SDL_GLContext gl_context;
const unsigned int WINDOW_WIDTH = 500, WINDOW_HEIGHT = WINDOW_WIDTH;
double dt, initial_time;
/* OpenGL variables. */
GLint
attribute_coord2d,
ibo_size,
width_location,
height_location,
time_location,
periods_x_location,
periods_y_location,
pi2_location,
program
;
GLuint ibo, vbo;
const char *attribute_name = "coord2d";
const float
periods_x = 10.0,
periods_y = 10.0,
pi2 = 2.0 * acos(-1.0)
;
/* SDL init. */
SDL_Init(SDL_INIT_TIMER | SDL_INIT_VIDEO);
window = SDL_CreateWindow(__FILE__, 0, 0,
WINDOW_WIDTH, WINDOW_HEIGHT, SDL_WINDOW_OPENGL);
gl_context = SDL_GL_CreateContext(window);
glewInit();
/* OpenGL init. */
{
program = common_get_shader_program(vertex_shader_source, fragment_shader_source);
attribute_coord2d = glGetAttribLocation(program, attribute_name);
if (attribute_coord2d == -1) {
fprintf(stderr, "error: attribute_coord2d: %s\n", attribute_name);
return EXIT_FAILURE;
}
height_location = glGetUniformLocation(program, "height");
periods_x_location = glGetUniformLocation(program, "periods_x");
periods_y_location = glGetUniformLocation(program, "periods_y");
pi2_location = glGetUniformLocation(program, "pi2");
time_location = glGetUniformLocation(program, "time");
width_location = glGetUniformLocation(program, "width");
glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
glUseProgram(program);
glViewport(0, 0, WIDTH, HEIGHT);
glGenBuffers(1, &vbo);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
glGenBuffers(1, &ibo);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ibo);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indexes), indexes, GL_STATIC_DRAW);
glGetBufferParameteriv(GL_ELEMENT_ARRAY_BUFFER, GL_BUFFER_SIZE, &ibo_size);
glUniform1f(pi2_location, pi2);
glUniform1f(width_location, WIDTH);
glUniform1f(height_location, HEIGHT);
glUniform1f(periods_x_location, periods_x);
glUniform1f(periods_y_location, periods_y);
}
initial_time = common_get_secs();
common_fps_init();
while (1) {
dt = common_get_secs() - initial_time;
/* OpenGL draw. */
glClear(GL_COLOR_BUFFER_BIT);
glEnableVertexAttribArray(attribute_coord2d);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glVertexAttribPointer(attribute_coord2d, 2, GL_FLOAT, GL_FALSE, 0, 0);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ibo);
glUniform1f(time_location, dt);
glDrawElements(GL_TRIANGLES, ibo_size / sizeof(indexes[0]), GL_UNSIGNED_INT, 0);
glDisableVertexAttribArray(attribute_coord2d);
common_fps_update_and_print();
SDL_GL_SwapWindow(window);
if (SDL_PollEvent(&event) && event.type == SDL_QUIT)
break;
}
/* OpenGL cleanup. */
glDeleteBuffers(1, &ibo);
glDeleteBuffers(1, &vbo);
glDeleteProgram(program);
/* SDL cleanup. */
SDL_GL_DeleteContext(gl_context);
SDL_DestroyWindow(window);
SDL_Quit();
return EXIT_SUCCESS;
}
Then:
gcc -Wall -std=c11 a.c -lSDL2 -lm -lGL -lGLEW
recordmydesktop screen capture including FPS periodically printed to terminal and converted to GIF for upload:
GitHub upstreams:
https://github.com/cirosantilli/cpp-cheat/blob/ac2dd35396634b3ac0d188eb57aa299e31be0436/sdl/heatmap_streaming.c
https://github.com/cirosantilli/cpp-cheat/blob/ac2dd35396634b3ac0d188eb57aa299e31be0436/sdl/heatmap_shader.c

It is definitely feasible, probably even if the calculation are done by the CPU. Ideally you should be using the GPU. The APIs needed are either OpenCL or since you are rendering the results you might want to make use of Compute Shaders.
Both techniques allow you to write a small program (shader) that works on a single element (point). These all get run in parallel on the GPU which should allow them to run really fast.

Why my CUDA application isn't starting?

The below code compiles without any errors, but when I run it, it says "Application was not able to start correctly (0xc000007b). Click OK to close the application.".
#include <math.h>
#include <GL\glew.h>
#include <GL\glut.h>
#include <cuda_gl_interop.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
GLuint vbo;
struct cudaGraphicsResource* vbo_cuda;
unsigned int width, height;
float tim;
__global__ void createVertices(float4* positions, float tim,
unsigned int width, unsigned int height) {
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
float u = x / (float)width;
float v = y / (float)height;
u = u * 2.0f - 1.0f;
v = v * 2.0f - 1.0f;
// calculate simple sine wave pattern
float freq = 4.0f;
float w = sinf(u * freq + tim)
* cosf(v * freq + tim) * 0.5f;
positions[y * width + x] = make_float4(u, w, v, 1.0f);
}
void init(void) {
glClearColor(0, 0, 0, 0);
glShadeModel(GL_FLAT);
}
void reshape(int w, int h) {
glViewport(0, 0, (GLsizei)w, (GLsizei)h);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
gluPerspective(60, (GLfloat)w/(GLfloat)h, 1, 200);
}
void display() {
float4* positions;
cudaGraphicsMapResources(1, &vbo_cuda, 0);
size_t num_bytes;
cudaGraphicsResourceGetMappedPointer((void**)&positions,
&num_bytes,
vbo_cuda);
// execute kernel
dim3 dimBlock(16, 16, 1);
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1);
createVertices<<<dimGrid, dimBlock>>>(positions, tim,
width, height);
cudaGraphicsUnmapResources(1, &vbo_cuda, 0);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
// render from the vbo
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glVertexPointer(4, GL_FLOAT, 0, 0);
glEnableClientState(GL_VERTEX_ARRAY);
glDrawArrays(GL_POINTS, 0, width * height);
glDisableClientState(GL_VERTEX_ARRAY);
glutSwapBuffers();
glutPostRedisplay();
}
void deleteVBO() {
cudaGraphicsUnregisterResource(vbo_cuda);
glDeleteBuffers(1, &vbo);
}
int main (int argc, char**argv) {
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);
glutInitWindowSize(500, 500);
glutInitWindowPosition(100, 100);
glutCreateWindow("Cuda OpenGL Interop");
init();
glutDisplayFunc(display);
glutReshapeFunc(reshape);
cudaGLSetGLDevice(0);
glGenBuffers(1, &vbo);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
unsigned int size = width * height * 4 * sizeof(float);
glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
cudaGLRegisterBufferObject(vbo);
glutMainLoop();
return 0;
}

The error is from Windows: your attempt is falling very short, as the executable you have produced is not valid for windows. It's possible you are using DEBUG DLL's with a RELEASE build. Or that you are mixing a 32 bit build with 64 bit DLL's, or many other odd combinations... (64 bit exe on 32 bit system?....)
Usually you can get more information regarding DLL problems looking in the Windows event viewer, but if you start running your application in the debugger (for sure with visual studio) you will get more information on your error.
If you can't understand what's wrong, you can try to find what is failing with http://www.dependencywalker.com/.

1st error, which was the reason to start this thread, disappeared by installing the right glew32.dll libraries into the right folders.
2nd error, where the debugger stopped at glGenBuffers(1, vbo), was because I forgot about glewInit()
Below you can find the working application:
#include <math.h>
#include <GL\glew.h>
#include <GL\glut.h>
#include <cuda_gl_interop.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
GLuint vbo;
struct cudaGraphicsResource* vbo_cuda;
const unsigned int window_width = 512;
const unsigned int window_height = 512;
const unsigned int mesh_width = 256;
const unsigned int mesh_height = 256;
float tim = 0.0;
__global__ void createVertices(float4* positions, float tim,
unsigned int mesh_width, unsigned int mesh_height) {
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
float u = x / (float)mesh_width;
float v = y / (float)mesh_height;
u = u * 2.0f - 1.0f;
v = v * 2.0f - 1.0f;
// calculate simple sine wave pattern
float freq = 4.0f;
float w = sinf(u * freq + tim)
* cosf(v * freq + tim) * 0.5f;
positions[y * mesh_width + x] = make_float4(u, w, v, 1.0f);
}
void runCuda(GLuint vbo)
{
// map OpenGL buffer object for writing from CUDA
float4* positions;
cudaGraphicsMapResources(1, &vbo_cuda, 0);
size_t num_bytes;
cudaGraphicsResourceGetMappedPointer((void**)&positions,
&num_bytes,
vbo_cuda);
// execute kernel
dim3 dimBlock(16, 16, 1);
dim3 dimGrid(mesh_width / dimBlock.x, mesh_height / dimBlock.y, 1);
createVertices<<<dimGrid, dimBlock>>>(positions, tim,
mesh_width, mesh_height);
cudaGraphicsUnmapResources(1, &vbo_cuda, 0);
}
void init(void) {
glewInit();
glClearColor(0, 0, 0, 1);
glDisable(GL_DEPTH_TEST);
}
void reshape(int w, int h) {
// viewport
glViewport(0, 0, w, h);
// projection
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
gluPerspective(60, (GLfloat)w/(GLfloat)h, 0.1, 10);
}
void createVBO(GLuint* vbo) {
// create buffer object
glGenBuffers(1, vbo);
glBindBuffer(GL_ARRAY_BUFFER, *vbo);
// initialize buffer object
unsigned int size = mesh_width * mesh_height * 4 * sizeof(float);
glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
cudaGLRegisterBufferObject(*vbo);
}
void deleteVBO(GLuint* vbo) {
cudaGraphicsUnregisterResource(vbo_cuda);
glBindBuffer(1, *vbo);
glDeleteBuffers(1, vbo);
cudaGLUnregisterBufferObject(*vbo);
}
void display() {
// run CUDA kernel to generate vertex positions
runCuda(vbo);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
// set view matrix
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
// render from the vbo
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glVertexPointer(4, GL_FLOAT, 0, 0);
glEnableClientState(GL_VERTEX_ARRAY);
glColor3f(1, 0, 0);
glDrawArrays(GL_POINTS, 0, mesh_width * mesh_height);
glDisableClientState(GL_VERTEX_ARRAY);
glutSwapBuffers();
glutPostRedisplay();
tim+=1;
}
void keyboard(unsigned char key, int x, int y)
{
switch(key) {
case(27) :
deleteVBO(&vbo);
exit(0);
}
}
int main (int argc, char**argv) {
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);
glutInitWindowSize(window_width, window_height);
glutInitWindowPosition(100, 100);
glutCreateWindow("Cuda GL interop");
init();
glutDisplayFunc(display);
glutKeyboardFunc(keyboard);
glutReshapeFunc(reshape);
// create VBO
createVBO(&vbo);
// run the cuda part
runCuda(vbo);
cudaGLSetGLDevice(0);
glutMainLoop();
return 0;
}

Screen corruption when using Opengl Buffers

I'm writing an Opengl game but I got some problems using Opengl buffers.
My old code thats works (but has a lot of CPU consumption and low fps) looks like this:
void Terrain::drawObject(sf::RenderWindow* window)
{
float scale = 5.0f / max(width_ - 1, length_ - 1);
glScalef(scale, scale, scale);
glTranslatef(-(float) (width_ - 1) / 2, 0.0f, -(float) (length_ - 1) / 2);
bool texture = true;
for (int z = 0; z < width_ - 1; z++) {
//Makes OpenGL draw a triangle at every three consecutive vertices
if (getHeight(0, z) > 15)
{
glBindTexture(GL_TEXTURE_2D, textures_.find(Layer::High)->second);
}
else
{
glBindTexture(GL_TEXTURE_2D, textures_.find(Layer::Mid)->second);
}
glBegin(GL_TRIANGLE_STRIP);
for (int x = 0; x < width_; x++) {
sf::Vector3f normal = getNormal(x, z);
glNormal3f(normal.x, normal.y, normal.z);
if (texture)
{
glTexCoord2f(0, 0);
}
else
{
glTexCoord2f(1, 0);
}
glVertex3f((GLfloat) x, (GLfloat) getHeight(x, z), (GLfloat) z);
normal = getNormal(x, z + 1);
glNormal3f(normal.x, normal.y, normal.z);
if (texture)
{
glTexCoord2f(0, 1);
texture = !texture;
}
else
{
glTexCoord2f(1, 1);
texture = !texture;
}
glVertex3f((GLfloat) x, (GLfloat) getHeight(x, z + 1), (GLfloat) z + 1);
}
glEnd();
}
}
Now I have changed my code to get a higher fps. I use Opengl buffers to get that. But when I use them everything on the screen is corrupted. i use following source code now:
void Terrain::drawObject(sf::RenderWindow* window)
{
if (!buffersCreated_)
{
createBuffers();
buffersCreated_ = true;
}
float scale = 5.0f / max(width_ - 1, length_ - 1);
glScalef(scale, scale, scale);
glTranslatef(-(float) (width_ - 1) / 2, 0.0f, -(float) (length_ - 1) / 2);
glEnableClientState(GL_TEXTURE_COORD_ARRAY);
glEnableClientState(GL_NORMAL_ARRAY);
glEnableClientState(GL_VERTEX_ARRAY);
glEnable(GL_TEXTURE_2D);
glBindTexture(GL_TEXTURE_2D, textures_.find(Layer::Mid)->second);
glBindBuffer(GL_ARRAY_BUFFER, textCoordBuffer_);
glTexCoordPointer(2, GL_FLOAT, 0, (char *) NULL);
glEnableClientState(GL_NORMAL_ARRAY);
glBindBuffer(GL_ARRAY_BUFFER, normalBuffer_);
glNormalPointer(GL_FLOAT, 0, (char *) NULL);
glEnableClientState(GL_VERTEX_ARRAY);
glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer_);
glVertexPointer(3, GL_FLOAT, 0, (char *) NULL);
glDrawArrays(GL_TRIANGLE_STRIP, 0, vhVertexCount);
glDisableClientState(GL_VERTEX_ARRAY);
glDisable(GL_TEXTURE_2D);
glDisableClientState(GL_TEXTURE_COORD_ARRAY);
glDisableClientState(GL_NORMAL_ARRAY);
glDisableClientState(GL_VERTEX_ARRAY);
}
void Terrain::createBuffers()
{
vhVertexCount = (int) (width_ * length_ * 6) / (1 * 1);
sf::Vector3f* vhVertices = new sf::Vector3f[vhVertexCount];
sf::Vector3f* vhNormal = new sf::Vector3f[vhVertexCount];
sf::Vector2i* vhTexCoords = new sf::Vector2i[vhVertexCount];
bool texture = true;
int nIndex = 0;
for (int z = 0; z < length_ - 1; z++) {
for (int x = 0; x < width_; x++) {
sf::Vector3f normal = getNormal(x, z);
if (texture)
{
vhTexCoords[nIndex] = sf::Vector2i(0, 0);
}
else
{
vhTexCoords[nIndex] = sf::Vector2i(1, 0);
}
vhVertices[nIndex] = sf::Vector3f((float) x, getHeight(x, z), (float) z);
vhNormal[nIndex] = sf::Vector3f(normal.x, normal.y, normal.z);
nIndex++;
normal = getNormal(x, z + 1);
if (texture)
{
vhTexCoords[nIndex] = sf::Vector2i(0, 1);
}
else
{
vhTexCoords[nIndex] = sf::Vector2i(1, 1);
}
vhVertices[nIndex] = sf::Vector3f((float) x, getHeight(x, z + 1), (float) z + 1);
vhNormal[nIndex] = sf::Vector3f(normal.x, normal.y, normal.z);
nIndex++;
}
}
glGenBuffers(1, &vertexBuffer_);
glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer_);
glBufferData(GL_ARRAY_BUFFER, vhVertexCount * sizeof(sf::Vector3f), vhVertices, GL_STATIC_DRAW);
glGenBuffers(1, &normalBuffer_);
glBindBuffer(GL_ARRAY_BUFFER, normalBuffer_);
glBufferData(GL_ARRAY_BUFFER, vhVertexCount * sizeof(sf::Vector3f), vhNormal, GL_STATIC_DRAW);
glGenBuffers(1, &textCoordBuffer_);
glBindBuffer(GL_ARRAY_BUFFER, textCoordBuffer_);
glBufferData(GL_ARRAY_BUFFER, vhVertexCount * sizeof(sf::Vector2i), vhTexCoords, GL_STATIC_DRAW);
delete [] vhVertices;
vhVertices = nullptr;
delete [] vhNormal;
vhNormal = nullptr;
delete [] vhTexCoords;
vhTexCoords = nullptr;
}
I use SFML to create the window and render 2D stuff like the menu in the lower left corner.
The code to render SFML stuff with Opengl stuff looks like:
void GameEngine::gameDraw()
{
// Clear the depth buffer
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glLoadIdentity();
if (camera_ != nullptr)
{
camera_->drawCamera();
}
openglObjectsMutex_.lock();
for (OpenglObject* openglObject : openglObjects_)
{
openglObject->drawObject(window_);
}
openglObjectsMutex_.unlock();
window_->pushGLStates();
sfmlObjectsMutex_.lock();
for (SfmlObject * gameObject : sfmlObjects_)
{
gameObject->drawObject(window_);
}
sfmlObjectsMutex_.unlock();
window_->popGLStates();
}
Can someone find any problems with the buffer code?
The above image is the correct one but with low fps.
After changing the source to using buffers i got the below image.

SFML but it can only saves/restores OpenGL 2.x states. We must disable what we enable in +3.x states. It works adding:
It's fixed adding at the end of own drawing something like:
glBindBuffer( GL_ARRAY_BUFFER, 0 );
glBindBuffer( GL_ELEMENT_ARRAY_BUFFER, 0 );
glBindTexture( GL_TEXTURE_2D, 0 );
glDisableVertexAttribArray( 0 );
glUseProgram( 0 );

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

Program not entering the main() - c++

Related

Polygon tearing in OpenGL

Instancing with OpenGL 3.3 seems very slow

Is it possible to build a heatmap from point data at 60 times per second?

Why my CUDA application isn't starting?

Screen corruption when using Opengl Buffers

Categories

Resources