What's the GLSL equivalent of [[vk::binding(0, 0)]] RWStructuredBuffer<int> in a compute shader - glsl

I have this HLSL and I want to write the equivalent in GLSL. If it's any use, I'm trying to run this example https://github.com/mcleary/VulkanHpp-Compute-Sample
[[vk::binding(0, 0)]] RWStructuredBuffer<int> InBuffer;
[[vk::binding(1, 0)]] RWStructuredBuffer<int> OutBuffer;
[numthreads(1, 1, 1)]
void Main(uint3 DTid : SV_DispatchThreadID)
{
OutBuffer[DTid.x] = InBuffer[DTid.x] * InBuffer[DTid.x];
}

The GLSL equivalent should look something like this:
layout(std430, binding = 0) buffer InBuffer {
int inBuffer[ ];
};
layout(std430, binding = 1) buffer OutBuffer {
int outBuffer[ ];
};
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
void main()
{
outBuffer[gl_GlobalInvocationID.x] = inBuffer[gl_GlobalInvocationID.x] * inBuffer[gl_GlobalInvocationID.x];
}

Related

Weird compute shader latency

I'm trying to make frustrum culling via compute shader. For that I have a pair of buffers for instanced vertex attributes, and a pair of buffers for indirect draw commands. My compute shader checks if instance coordinates from first buffer are within bounding volume, referencing first draw buffer for counts, subgroupBallot and bitCount to see offset within subgroup, then add results from other subgroups and a global offset, and finally stores the result in second buffer. The global offset is stored in second indirect draw buffer.
The problem is that, when under load, frustum may be few(>1) frames late to the moving camera, with wide lines of disappeared objects on edge. It seems weird to me because culling and rendering are done within same command buffer.
When taking capture in renderdoc, taking a screenshot alt+printScreen, or pausing the render-present thread, things snap back to as they should be.
My only guess is that compute shader from past frame continues to execute even when new frame starts to be drawn, though this should not be happening due to pipeline barriers.
Shader code:
#version 460
#extension GL_KHR_shader_subgroup_ballot : require
struct drawData{
uint indexCount;
uint instanceCount;
uint firstIndex;
uint vertexOffset;
uint firstInstance;
};
struct instanceData{
float x, y, z;
float a, b, c, d;
};
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;
layout(set = 0, binding = 0) uniform A
{
mat4 cam;
vec4 camPos;
vec4 l;
vec4 t;
vec4 r;
vec4 b;
};
layout(set = 0, binding = 1) buffer B
{
uint count;
drawData data[];
} Draw[2];
layout(set = 0, binding = 2) buffer C
{
instanceData data[];
} Instance[2];
shared uint offsetsM[32];
void main()
{
const uint gID = gl_LocalInvocationID.x;
const uint lID = gl_SubgroupInvocationID;
const uint patchSize = gl_WorkGroupSize.x;
Draw[1].data[0] = Draw[0].data[0];//copy data like index count
Draw[1].count = Draw[0].count;
uint offsetG = 0;//accumulating offset within end buffer
uint loops = Draw[0].data[0].instanceCount/patchSize;//constant loop count
for(uint i = 0; i<loops;++i){
uint posa = i*patchSize+gID;//runs better this way for some reason
vec3 pos = camPos.xyz-vec3(Instance[0].data[posa].x, Instance[0].data[posa].y, Instance[0].data[posa].z);//position relative to camera
mat4x3 lrtb = mat4x3(l.xyz, r.xyz, t.xyz, b.xyz);
vec4 dist = pos*lrtb+Model.data[0].rad;//dot products and radius tolerance
bool Pass = posa<Draw[0].data[0].instanceCount&&//is real
(dot(pos, pos)<l.w*l.w) &&//not too far
all(greaterThan(dist, vec4(0))); //within view frustum
subgroupBarrier();//no idea what is the best, put what works
uvec4 actives = subgroupBallot(Pass);//count passed instances
if(subgroupElect())
offsetsM[gl_SubgroupID] = bitCount(actives).x+bitCount(actives).y;
barrier();
uint offsetL = bitCount(actives&gl_SubgroupLtMask).x+bitCount(actives&gl_SubgroupLtMask).y;//offset withing subgroup
uint ii = 0;
if(Pass){
for(; ii<gl_SubgroupID; ++ii)
offsetG+= offsetsM[ii];//offsets before subgroup
Instance[1].data[offsetG+offsetL] = Instance[0].data[posa];
for(; ii<gl_NumSubgroups; ++ii)
offsetG+= offsetsM[ii];}//offsets after subgroup
else for(; ii<gl_NumSubgroups; ++ii)
offsetG+= offsetsM[ii];//same but no data copying
}
if(gID == 0)
Draw[1].data[0].instanceCount = offsetG;
}
For renderpass after the compute I have dependencies:
{//1
deps[1].srcSubpass = VK_SUBPASS_EXTERNAL;
deps[1].dstSubpass = 0;
deps[1].srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
deps[1].dstStageMask = VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
deps[1].srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
deps[1].dstAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
deps[1].dependencyFlags = 0;
}
{//2
deps[2].srcSubpass = VK_SUBPASS_EXTERNAL;
deps[2].dstSubpass = 0;
deps[2].srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
deps[2].dstStageMask = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
deps[2].srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
deps[2].dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
deps[2].dependencyFlags = 0;
}
The command buffer is(fully reused as is, one for each image in swapchain):
vkBeginCommandBuffer(cmd, &begInfo);
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, layoutsPipe[1],
0, 1, &descs[1], 0, 0);
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, pipes[1]);
vkCmdDispatch(cmd, 1, 1, 1);
VkBufferMemoryBarrier bufMemBar[2];
{//mem bars
{//0 indirect
bufMemBar[0].srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
bufMemBar[0].dstAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
bufMemBar[0].buffer = bufferIndirect;
bufMemBar[0].offset = 0;
bufMemBar[0].size = -1;
}
{//1 vertex instance
bufMemBar[1].srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
bufMemBar[1].dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
bufMemBar[1].buffer = bufferInstance;
bufMemBar[1].offset = 0;
bufMemBar[1].size = -1;
}
}
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, 0, 0, 0, 1, &bufMemBar[0], 0, 0);
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT , 0, 0, 0, 1, &bufMemBar[1], 0, 0);
VkRenderPassBeginInfo passBegInfo;
passBegInfo.renderPass = pass;
passBegInfo.framebuffer = chain.frames[i];
passBegInfo.renderArea = {{0, 0}, chain.dim};
VkClearValue clears[2]{{0},{0}};
passBegInfo.clearValueCount = 2;
passBegInfo.pClearValues = clears;
vkCmdBeginRenderPass(cmd, &passBegInfo, VK_SUBPASS_CONTENTS_INLINE);
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, layoutsPipe[0], 0, 1, &descs[0], 0, 0);
vkCmdBindPipeline (cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipes[0]);
VkBuffer buffersVertex[2]{bufferVertexProto, bufferInstance};
VkDeviceSize offsetsVertex[2]{0, 0};
vkCmdBindVertexBuffers(cmd, 0, 2, buffersVertex, offsetsVertex);
vkCmdBindIndexBuffer (cmd, bufferIndex, 0, VK_INDEX_TYPE_UINT32);
vkCmdDrawIndexedIndirectCount(cmd, bufferIndirect, 0+4,
bufferIndirect, 0,
count.maxDraws, sizeof(VkDrawIndexedIndirectCommand));
vkCmdEndRenderPass(cmd);
vkEndCommandBuffer(cmd);
Rendering and presentation are synchronised with two semaphores - imageAvailable, and renderFinished. Frustum calculation is in right order on CPU. Validation layers are enabled.
The problem was that I lacked host synchronisation. Indeed, even within same command buffer, there are no host synchronisation guarantees (and that makes sense, since it enables us to use events).

error: 'subgroup op' : requires SPIR-V 1.3

I am compiling a GLSL file to SPIR-V using the command:
C:/VulkanSDK/1.2.148.1/Bin/glslc C:/Users/jonat/Projects/sum.comp -o C:/Users/jonat/Projects/sum.spv
Getting the error:
error: 'subgroup op' : requires SPIR-V 1.3
The error occurs on lines 32 and 45, which are both sum = subgroupAdd(sum);
The full GLSL code:
#version 450
#extension GL_KHR_shader_subgroup_arithmetic : enable
layout(std430, binding = 0) buffer Input
{
float inputs[];
};
layout(std430, binding = 1) buffer Output
{
float outputs[];
};
layout (local_size_x_id = 1) in;
layout (constant_id = 2) const int sumSubGroupSize = 64;
layout(push_constant) uniform PushConsts
{
int n;
} consts;
shared float sdata[sumSubGroupSize];
void main()
{
float sum = 0.0;
if (gl_GlobalInvocationID.x < consts.n)
{
sum = inputs[gl_GlobalInvocationID.x];
}
sum = subgroupAdd(sum);
if (gl_SubgroupInvocationID == 0)
{
sdata[gl_SubgroupID] = sum;
}
memoryBarrierShared();
barrier();
if (gl_SubgroupID == 0)
{
sum = gl_SubgroupInvocationID < gl_NumSubgroups ? sdata[gl_SubgroupInvocationID] : 0;
sum = subgroupAdd(sum);
}
if (gl_LocalInvocationID.x == 0)
{
outputs[gl_WorkGroupID.x] = sum;
}
}
I have got the latest version of VulkanSDK.
Looks like you need --target-env=vulkan1.1 for glslc to emit SPIR-V 1.3:
4.2.6. --target-env=
...
Generated code uses SPIR-V 1.0, except for code compiled for Vulkan 1.1, which uses SPIR-V 1.3, and code compiled for Vulkan 1.5, which uses SPIR-V 1.5.
If this option is not specified, a default of vulkan1.0 is used.

OpenGL buffer problem when adding >= 2^16 numbers

I'm facing some strange difficulties with OpenGL buffer. I tried to shrunk the problem to the minimum source code, so I created program that increment each number of the FloatBuffer in each iteration. When I am adding less than 2^16 float numbers to the FloatBuffer, everything works just fine, but when I add >= 2^16 numbers, then the numbers are not incrementing and stays the same in each iteration.
Renderer:
public class Renderer extends AbstractRenderer {
int computeShaderProgram;
int[] locBuffer = new int[2];
FloatBuffer data;
int numbersCount = 65_536, round = 0; // 65_535 - OK, 65_536 - wrong
#Override
public void init() {
computeShaderProgram = ShaderUtils.loadProgram(null, null, null, null, null,
"/main/computeBuffer");
glGenBuffers(locBuffer);
// dataSizeInBytes = count of numbers to sort * (float=4B + padding=3*4B)
int dataSizeInBytes = numbersCount * (1 + 3) * 4;
data = ByteBuffer.allocateDirect(dataSizeInBytes)
.order(ByteOrder.nativeOrder())
.asFloatBuffer();
initBuffer();
printBuffer(data);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, locBuffer[0]);
glBufferData(GL_SHADER_STORAGE_BUFFER, data, GL_DYNAMIC_DRAW);
glShaderStorageBlockBinding(computeShaderProgram, 0, 0);
glViewport(0, 0, width, height);
}
private void initBuffer() {
data.rewind();
Random r = new Random();
for (int i = 0; i < numbersCount; i++) {
data.put(i*4, r.nextFloat());
}
}
#Override
public void display() {
if (round < 5) {
glUseProgram(computeShaderProgram);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, locBuffer[0]);
glDispatchCompute(numbersCount, 1, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, data);
printBuffer(data);
round++;
}
}
...
}
Compute buffer
#version 430
#extension GL_ARB_compute_shader : enable
#extension GL_ARB_shader_storage_buffer_object : enable
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
layout(binding = 0) buffer Input {
float elements[];
}input_data;
void main () {
input_data.elements[gl_WorkGroupID.x ] = input_data.elements[gl_WorkGroupID.x] + 1;
}
glDispatchCompute(numbersCount, 1, 1);
You must not dispatch a compute shader workgroup count exceeding the corresponding GL_MAX_GL_MAX_COMPUTE_WORK_GROUP_COUNT for each dimension. The spec guarantees that limit to be at least 65535, so it is very likely that you just exceed the limit on your implementation. Actually, you should be getting a GL_INVALID_VALUE error for that call, and you should really consider using a debug context and debug message callback to have such obvious errors easily spotted during development.

Compute Shader - gl_GlobalInvocationID and local_size

While trying to implement a naive Compute Shader that assigns affecting lights to a cluster, i have encountered an unexpected(well for a noob like me) behavior:
I invoke this shader with glDispatchCompute(32, 32, 32); and it supposed to write a [light counter + 8 indices] for each invocation into "indices" buffer. But while debugging, i found that my writes into that buffer overlap between invocations even though I use unique clusterId. I detect it by values of indices[outIndexStart] going over 8 and visual flickering.
According to documentation, gl_GlobalInvocationID is gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID. But if set all local sizes to 1, write issues go away. Why local_size affects this code in such a way? And how can i reason about choosing it's value here?
#version 430
layout (local_size_x = 4, local_size_y = 4, local_size_z = 4) in;
uniform int lightCount;
const unsigned int clusterSize = 32;
const unsigned int clusterSquared = clusterSize * clusterSize;
struct LightInfo {
vec4 color;
vec3 position;
float radius;
};
layout(std430, binding = 0) buffer occupancyGrid {
int exists[];
};
layout(std430, binding = 2) buffer lightInfos
{
LightInfo lights [];
};
layout(std430, binding = 1) buffer outputList {
int indices[];
};
void main(){
unsigned int clusterId = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * clusterSize + gl_GlobalInvocationID.z * clusterSquared;
if(exists[clusterId] == 0)
return;
//... not so relevant calculations
unsigned int outIndexStart = clusterId * 9;
unsigned int outOffset = 1;
for(int i = 0; i < lightCount && outOffset < 9; i++){
if(distance(lights[i].position, wordSpace.xyz) < lights[i].radius) {
indices[outIndexStart + outOffset] = i;
indices[outIndexStart]++;
outOffset++;
}
}
}
Let's look at two declarations:
layout (local_size_x = 4, local_size_y = 4, local_size_z = 4) in;
and
const unsigned int clusterSize = 32;
These say different things. The local_size declaration says that each work group will have 4x4x4 invocations, which is 64. By contrast, your clusterSize says that each work group will only have 32 invocations.
If you want to fix this problem, use the actual local size constant provided by the system:
const unsigned int clusterSize = gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z;
And you can even do this:
const uvec3 linearizeInvocation = uvec3{1, clusterSize, clusterSize * clusterSize};
...
unsigned int clusterId = dot(gl_GlobalInvocationID, linearizeInvocation);

OpenGL Draws Nothing When Sent Too Much Data?

I seem to have ran into a strange issue with OpenGL. Everything works fine with my class until I make the map too big (around 800x800 is the max), and then OpenGL doesn't draw anything. I have made calls to glGetBufferSubData, and as far as I could tell the data seemed correct in both the vertex and index buffers, yet nothing is being drawn? At first I assumed an overflow somewhere in my code, but according to std::numeric_limits my vertex and index iterators don't seem to come anywhere close to the max size of a (signed) int. I use a lot of wrapper classes around OpenGL objects, but they are very simple, usually inline calls to their OpenGL equivalent. Same for the "M_" typedefs around primitive types. Below are the main loop I render in, the class where I believe the issue lies, and 2 screenshots of the output.
Correct output: http://i.imgur.com/cvC1T7L.png
Blank ouput, after expanding map: http://i.imgur.com/MmmNgj4.png
Main loop:
int main(){
//open window
Memento::MainWindow& main_window = Memento::MainWindow::GetInstance();
Memento::MainWindow::Init();
main_window.SetTitle("Memento");
main_window.Open();
//matrices
glmx_mat4 ortho_matrix = {};
glmx_mat4_ortho(0.0f, 800.0f, 600.0f, 0.0f, 5.0f, 25.0f, ortho_matrix);
glmx_mat4 modelview_matrix = {};
glmx_mat4_identity(modelview_matrix);
glmx_vec3 translate_vec = {0.0f, 0.0f, -10.0f};
glmx_mat4_translate(modelview_matrix, translate_vec, modelview_matrix);
glmx_mat4_multiply(ortho_matrix, modelview_matrix, ortho_matrix);
//shaders
Memento::GLShader default_vert_shader("default.vert", GL_VERTEX_SHADER);
default_vert_shader.Compile();
Memento::GLShader default_frag_shader("default.frag", GL_FRAGMENT_SHADER);
default_frag_shader.Compile();
//program
Memento::GLProgram default_program;
default_program.Create();
default_program.AttachShader(default_vert_shader);
default_program.AttachShader(default_frag_shader);
Memento::GLVertexArray default_vert_array;
default_vert_array.Create();
default_vert_array.Bind();
//BufferGameMap class- where I believe the issue lies
Memento::TextureAtlas atlas1("atlas/cat_image.png", "atlas/cat_source.xml");
Memento::BufferGameMap map1("tryagain.tmx", atlas1);
//bind buffers
map1.GetVertexBuffer().Bind();
map1.GetIndexBuffer().Bind();
//upload vertex attributes
default_vert_array.EnableIndex(0);
default_vert_array.IndexData(0, 2, GL_FLOAT, NULL, 8 * sizeof(Memento::M_float));
default_vert_array.BindIndex(default_program, 0, "map_vert");
//link, validate, and use program
default_program.Link();
default_program.Validate();
default_program.Use();
//upload matrix as uniform
glUniformMatrix4fv(default_program.GetUniformLocation("modelviewprojection_matrix"),
1, GL_FALSE, ortho_matrix);
//main draw loop
while(not glfwGetKey(GLFW_KEY_ESC)){
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glDrawElements(GL_TRIANGLES, map1.GetIndexBufferLength(), GL_UNSIGNED_INT, NULL);
glfwSwapBuffers();
}
//close window & exit
main_window.Close();
return (0);
}
BufferGameMap class (issue is probably here!):
Memento::BufferGameMap::BufferGameMap(std::string const& file, const Memento::TextureAtlas& atlas):
TmxMap::GameMap(), background_color_color4(), vertex_buffer(), index_buffer(),
vertex_buffer_len(0), index_buffer_len(0){
Create(file, atlas);
}
Memento::M_void Memento::BufferGameMap::Create(std::string const& file, const Memento::TextureAtlas& atlas){
if(IsCreated())Destroy();
TmxMap::GameMap::CreateFromFile(file);
std::vector<TmxMap::Layer> const& layers = GetLayers();
if(not layers.empty()){
const std::vector<TmxMap::Layer>::const_iterator layers_end = layers.end();
std::vector<TmxMap::Layer>::const_iterator layers_iter = layers.begin();
Memento::M_float* vertex_buffer_data = NULL;
Memento::M_uint* index_buffer_data = NULL;
for(; layers_iter != layers_end; ++layers_iter){
vertex_buffer_len += layers_iter -> GetMapTiles().size() * (4 * (2 +
2 + 2 + 2));
index_buffer_len += layers_iter -> GetMapTiles().size() * 6;
}
vertex_buffer_data = new Memento::M_float[vertex_buffer_len];
index_buffer_data = new Memento::M_uint[index_buffer_len];
//fill data to send to the gl
Memento::M_sizei vertex_buffer_iter = 0, index_buffer_iter = 0, index_buffer_quad_iter = 0;
//map data
const Memento::M_uint map_size_x = GetMapSize().x, map_size_y = GetMapSize().y;
const Memento::M_float map_tile_size_x = GetTileSize().x, map_tile_size_y = GetTileSize().y;
//per layer data
std::vector<TmxMap::MapTile> const* map_tiles = NULL;
std::vector<TmxMap::MapTile>::const_iterator map_tiles_iter, map_tiles_end;
//per tile data
Memento::M_float map_origin_x = 0.0f, map_origin_y = 0.0f;
for(layers_iter = layers.begin(); layers_iter != layers_end; ++layers_iter){
map_tiles = &layers_iter -> GetMapTiles();
for(map_tiles_iter = map_tiles -> begin(), map_tiles_end = map_tiles -> end();
map_tiles_iter != map_tiles_end; ++map_tiles_iter,
vertex_buffer_iter += 4 * (2 + 2 + 2 +
2), index_buffer_iter += 6,
index_buffer_quad_iter += 4){
map_origin_x = static_cast<Memento::M_float>(map_tiles_iter -> map_tile_index /
map_size_y) * map_tile_size_x;
map_origin_y = static_cast<Memento::M_float>(map_tiles_iter -> map_tile_index %
map_size_y) * map_tile_size_y;
vertex_buffer_data[vertex_buffer_iter] = map_origin_x;
vertex_buffer_data[vertex_buffer_iter + 1] = map_origin_y;
//=========================================================
vertex_buffer_data[vertex_buffer_iter + 8] = map_origin_x;
vertex_buffer_data[vertex_buffer_iter + 9] = map_origin_y + map_tile_size_y;
//=========================================================
vertex_buffer_data[vertex_buffer_iter + 16] = map_origin_x + map_tile_size_x;
vertex_buffer_data[vertex_buffer_iter + 17] = map_origin_y + map_tile_size_y;
//=========================================================
vertex_buffer_data[vertex_buffer_iter + 24] = map_origin_x + map_tile_size_x;
vertex_buffer_data[vertex_buffer_iter + 25] = map_origin_y;
//=========================================================
index_buffer_data[index_buffer_iter] = index_buffer_quad_iter;
index_buffer_data[index_buffer_iter + 1] = index_buffer_quad_iter + 1;
index_buffer_data[index_buffer_iter + 2] = index_buffer_quad_iter + 2;
index_buffer_data[index_buffer_iter + 3] = index_buffer_quad_iter;
index_buffer_data[index_buffer_iter + 4] = index_buffer_quad_iter + 2;
index_buffer_data[index_buffer_iter + 5] = index_buffer_quad_iter + 3;
}
}
vertex_buffer.Create(GL_ARRAY_BUFFER, GL_STATIC_DRAW);
vertex_buffer.Bind();
vertex_buffer.AllocateRef(vertex_buffer_len * sizeof(Memento::M_float),
static_cast<const Memento::M_void*>(vertex_buffer_data));
vertex_buffer.Unbind();
index_buffer.Create(GL_ELEMENT_ARRAY_BUFFER, GL_STATIC_DRAW);
index_buffer.Bind();
index_buffer.AllocateRef(index_buffer_len * sizeof(Memento::M_uint),
static_cast<const Memento::M_void*>(index_buffer_data));
index_buffer.Unbind();
delete[] vertex_buffer_data;
delete[] index_buffer_data;
}
}
Vertex shader:
#version 140
precision highp float;
uniform mat4 modelviewprojection_matrix;
in vec2 map_vert;
void main(){
gl_Position = modelviewprojection_matrix * vec4(map_vert, 0, 1);
}
Fragment shader:
#version 140
precision highp float;
out vec4 frag_color;
void main(){
frag_color = vec4(0.4, 0.2, 0.6, 0.5);
}
I think you are running out of stack memory.
By allocating the data on the heap you can use all the memory available to your process, while the stack is limited to 1MB.
In other words: Move the object allocation outside of the main scope to the global scope.
Memento::TextureAtlas * atlas1;//("atlas/cat_image.png", "atlas/cat_source.xml");
Memento::BufferGameMap * map1;//("tryagain.tmx", atlas1);
int main(){
atlas1 = new Memento::TextureAtlas("atlas/cat_image.png", "atlas/cat_source.xml");
map1 = new Memento::BufferGameMap("tryagain.tmx", atlas1);
//.... acess with ->
}
or if this will not cause compiler errors:
Memento::TextureAtlas atlas1("atlas/cat_image.png", "atlas/cat_source.xml");
Memento::BufferGameMap map1("tryagain.tmx", atlas1);
int main(){
//.... acess with .
}