I'm working on a voxel engine in C++ using Vulkan. Most of the boilerplate code is heavily based on vulkan-tutorial.com. I have a drawFrame function that looks like this...
void drawFrame(float dt) {
vkWaitForFences(device, 1, &inFlightFences[currentFrame], VK_TRUE, UINT64_MAX);
uint32_t imageIndex;
VkResult result = vkAcquireNextImageKHR(device, swapChain, UINT64_MAX, imageAvailableSemaphores[currentFrame], VK_NULL_HANDLE, &imageIndex);
updateUniformBuffer(imageIndex, dt);
if (result == VK_ERROR_OUT_OF_DATE_KHR) {
recreateSwapChain();
return;
} else if (result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR) {
throw std::runtime_error("failed to acquire swap chain image!");
}
// Check if a previous frame is using this image (i.e.there is its fence to wait on)
if (imagesInFlight[imageIndex] != VK_NULL_HANDLE) {
vkWaitForFences(device, 1, &imagesInFlight[imageIndex], VK_TRUE, UINT64_MAX);
}
// Mark the image as now being in use by this frame
imagesInFlight[imageIndex] = inFlightFences[currentFrame];
VkSubmitInfo submitInfo{};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
VkSemaphore waitSemaphores[] = { imageAvailableSemaphores[currentFrame] };
VkPipelineStageFlags waitStages[] = { VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT };
submitInfo.waitSemaphoreCount = 1;
submitInfo.pWaitSemaphores = waitSemaphores;
submitInfo.pWaitDstStageMask = waitStages;
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &commandBuffers[imageIndex];
VkSemaphore signalSemaphores[] = { renderFinishedSemaphores[currentFrame] };
submitInfo.signalSemaphoreCount = 1;
submitInfo.pSignalSemaphores = signalSemaphores;
vkResetFences(device, 1, &inFlightFences[currentFrame]);
result = vkQueueSubmit(graphicsQueue, 1, &submitInfo, inFlightFences[currentFrame]);
if (result != VK_SUCCESS) {
throw std::runtime_error("failed to submit draw command buffer!");
}
VkPresentInfoKHR presentInfo{};
presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
presentInfo.waitSemaphoreCount = 1;
presentInfo.pWaitSemaphores = signalSemaphores;
VkSwapchainKHR swapChains[] = { swapChain };
presentInfo.swapchainCount = 1;
presentInfo.pSwapchains = swapChains;
presentInfo.pImageIndices = &imageIndex;
presentInfo.pResults = nullptr; // Optional
result = vkQueuePresentKHR(presentQueue, &presentInfo);
if (result == VK_ERROR_OUT_OF_DATE_KHR || result == VK_SUBOPTIMAL_KHR || framebufferResized) {
framebufferResized = false;
recreateSwapChain();
} else if (result != VK_SUCCESS) {
throw std::runtime_error("failed to present swap chain image!");
}
// Increment the frame. By using the modulo(%) operator, we ensure that the frame index loops around after every MAX_FRAMES_IN_FLIGHT enqueued frames.
currentFrame = (currentFrame + 1) % config->maxFramesInFlight;
}
I'm passing in vertices like this...
void createVertexAndIndexBuffer() {
for (size_t x = 0; x < 100; x++) {
for (size_t y = 0; y < 4; y++) {
for (size_t z = 0; z < 100; z++) {
// for each block in the world vector
auto blockId = world.getBlock(x, y, z);
if (blockId == BlockId::Air) {
continue;
}
Vec3 blockPosition = { x, y, z };
// get its data
auto verts = blockdb.blockDataFor(blockId).getVertices();
auto inds = blockdb.blockDataFor(blockId).getIndices();
// account for the block position and store the new verts for later
for (int i = 0; i < verts.size(); i++) {
Vertex v(verts[i]);
v.pos += blockPosition;
vertices.push_back(v);
}
// store the indices for later accounting for the offset into the verts vector
for (int i = 0; i < inds.size(); i++) {
int ind(inds[i] + vertices.size());
indices.push_back(ind);
}
}
}
}
// time to start creating the actual buffer
VkDeviceSize vertexBufferSize = sizeof(vertices[0]) * vertices.size();
VkBuffer vertexStagingBuffer;
VkDeviceMemory vertexStagingBufferMemory;
createBuffer(vertexBufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, vertexStagingBuffer, vertexStagingBufferMemory);
void* vertexData;
vkMapMemory(device, vertexStagingBufferMemory, 0, vertexBufferSize, 0, &vertexData);
memcpy(vertexData, vertices.data(), (size_t)vertexBufferSize);
vkUnmapMemory(device, vertexStagingBufferMemory);
createBuffer(vertexBufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, vertexBuffer, vertexBufferMemory);
// use copyBuffer() to move the vertex data to the device local buffer
copyBuffer(vertexStagingBuffer, vertexBuffer, vertexBufferSize);
// After copying the data from the staging buffer to the device buffer, we should clean up the staging buffer since it is no longer needed.
vkDestroyBuffer(device, vertexStagingBuffer, nullptr);
vkFreeMemory(device, vertexStagingBufferMemory, nullptr);
// and do the same for the index buffer
VkDeviceSize indexBufferSize = sizeof(indices[0]) * indices.size();
VkBuffer indexStagingBuffer;
VkDeviceMemory indexStagingBufferMemory;
createBuffer(indexBufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, indexStagingBuffer, indexStagingBufferMemory);
void* indexData;
vkMapMemory(device, indexStagingBufferMemory, 0, indexBufferSize, 0, &indexData);
memcpy(indexData, indices.data(), (size_t)indexBufferSize);
vkUnmapMemory(device, indexStagingBufferMemory);
createBuffer(indexBufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, indexBuffer, indexBufferMemory);
copyBuffer(indexStagingBuffer, indexBuffer, indexBufferSize);
vkDestroyBuffer(device, indexStagingBuffer, nullptr);
vkFreeMemory(device, indexStagingBufferMemory, nullptr);
}
Everything works fine like that but I need be able to render by chunk instead of by block in order to implement chunk geometry optimizations. This is my chunk.h and chunk.cpp...
#pragma once
#include "Layer.h"
class Chunk {
public:
Chunk() = default;
Chunk(World* _world, Vec2XZ pos);
~Chunk() {}
BlockId getBlock(int x, int y, int z);
bool setBlock(BlockId id, int x, int y, int z);
bool isBlockOutOfBounds(int x, int y, int z);
void generateVerticesAndIndices();
void load();
std::array<Layer, CHUNK_HEIGHT> layers;
const Vec2XZ position;
const World* world;
bool isLoaded = false;
std::vector<Vertex> vertices;
std::vector<uint32_t> indices;
private:
};
#pragma once
#include "Chunk.h"
Chunk::Chunk(World* _world, Vec2XZ pos) :
position(pos),
world(_world) {
}
BlockId Chunk::getBlock(int x, int y, int z) {
if (isBlockOutOfBounds(x, y, z)) {
return BlockId::Air;
}
return layers[y].getBlock(x, z);
}
bool Chunk::setBlock(BlockId id, int x, int y, int z) {
if (!isBlockOutOfBounds(x, y, z)) {
if (layers[y].setBlock(id, x, z)) {
return true;
}
}
return false;
}
bool Chunk::isBlockOutOfBounds(int x, int y, int z) {
if (x >= CHUNK_WIDTH)
return true;
if (z >= CHUNK_WIDTH)
return true;
if (x < 0)
return true;
if (y < 0)
return true;
if (z < 0)
return true;
if (y >= CHUNK_HEIGHT) {
return true;
}
return false;
}
void Chunk::generateVerticesAndIndices() {
vertices.clear();
indices.clear();
for (int y = 0; y < CHUNK_HEIGHT; y++) {
for (int x = 0; x < CHUNK_WIDTH; x++) {
for (int z = 0; z < CHUNK_WIDTH; z++) {
// for each block in this chunk
auto blockId = getBlock(x, y, z);
if (blockId == BlockId::Air) {
continue; // dont render air
}
// infer the block position using its coordinates
Vec3 blockPosition = { x, y, z };
// get its data
auto verts = world->blockdb->blockDataFor(blockId).getVertices();
auto inds = world->blockdb->blockDataFor(blockId).getIndices();
// account for the block position and store the new verts
for (int i = 0; i < verts.size(); i++) {
Vertex v(verts[i]);
v.pos += blockPosition;
vertices.push_back(v);
}
// store the indices for later accounting for the offset into the verts vector
for (int i = 0; i < inds.size(); i++) {
int ind(inds[i] + vertices.size());
indices.push_back(ind);
}
}
}
}
}
void Chunk::load() {
if (isLoaded) {
return;
}
// todo: actual terrain generation
for (int y = 0; y < 4; y++) {
for (int x = 0; x < CHUNK_WIDTH; x++) {
for (int z = 0; z < CHUNK_WIDTH; z++) {
setBlock(BlockId::Grass, x, y, z);
}
}
}
isLoaded = true;
}
So I've basically migrated the top part of createVertexAndIndexBuffer() over to the chunk class. Then within createVertexAndIndexBuffer(), I iterate through the chunks around the player within render distance like this...
void createVertexAndIndexBuffer() {
// set bounds of how far out to render based on what chunk the player is in
Vec2XZ playerChunkCoords = { floor(player.position.x) / CHUNK_WIDTH, floor(player.position.z) / CHUNK_WIDTH };
Vec2XZ lowChunkXZ = { playerChunkCoords.x - renderDistance, playerChunkCoords.z - renderDistance };
Vec2XZ highChunkXZ = { playerChunkCoords.x + renderDistance, playerChunkCoords.z + renderDistance };
// for each chunk around the player within render distance
for (int x = lowChunkXZ.x; x < highChunkXZ.x; x++) {
for (int z = lowChunkXZ.z; z < highChunkXZ.z; z++) {
// get the chunk
Chunk* chunk = &world.getChunk(x, z);
// load it if it isnt already
if (!chunk->isLoaded) {
chunk->load();
}
// generate its geometry if it doesnt already exist
if (chunk->vertices.size() == 0 || chunk->indices.size() == 0) {
chunk->generateVerticesAndIndices();
}
auto verts = chunk->vertices;
auto inds = chunk->indices;
// account for the chunk position and store the new verts for later
for (int i = 0; i < verts.size(); i++) {
Vertex v(verts[i]);
v.pos.x += x * CHUNK_WIDTH;
v.pos.z += z * CHUNK_WIDTH;
vertices.push_back(v);
}
// store the indices for later accounting for the offset into the verts vector
for (int i = 0; i < inds.size(); i++) {
int ind(inds[i] + vertices.size());
indices.push_back(ind);
}
}
}
// time to start creating the actual buffer
VkDeviceSize vertexBufferSize = sizeof(vertices[0]) * vertices.size();
VkBuffer vertexStagingBuffer;
VkDeviceMemory vertexStagingBufferMemory;
createBuffer(vertexBufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, vertexStagingBuffer, vertexStagingBufferMemory);
void* vertexData;
vkMapMemory(device, vertexStagingBufferMemory, 0, vertexBufferSize, 0, &vertexData);
memcpy(vertexData, vertices.data(), (size_t)vertexBufferSize);
vkUnmapMemory(device, vertexStagingBufferMemory);
createBuffer(vertexBufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, vertexBuffer, vertexBufferMemory);
// use copyBuffer() to move the vertex data to the device local buffer
copyBuffer(vertexStagingBuffer, vertexBuffer, vertexBufferSize);
// After copying the data from the staging buffer to the device buffer, we should clean up the staging buffer since it is no longer needed.
vkDestroyBuffer(device, vertexStagingBuffer, nullptr);
vkFreeMemory(device, vertexStagingBufferMemory, nullptr);
// and do the same for the index buffer
VkDeviceSize indexBufferSize = sizeof(indices[0]) * indices.size();
VkBuffer indexStagingBuffer;
VkDeviceMemory indexStagingBufferMemory;
createBuffer(indexBufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, indexStagingBuffer, indexStagingBufferMemory);
void* indexData;
vkMapMemory(device, indexStagingBufferMemory, 0, indexBufferSize, 0, &indexData);
memcpy(indexData, indices.data(), (size_t)indexBufferSize);
vkUnmapMemory(device, indexStagingBufferMemory);
createBuffer(indexBufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, indexBuffer, indexBufferMemory);
copyBuffer(indexStagingBuffer, indexBuffer, indexBufferSize);
vkDestroyBuffer(device, indexStagingBuffer, nullptr);
vkFreeMemory(device, indexStagingBufferMemory, nullptr);
}
With this code, the engine starts up fine but the screen stays white and then after a few calls to vkQueueSubmit() within drawFrame(), vkQueueSubmit() returns VK_ERROR_DEVICE_LOST instead of VK_SUCCESS and then the app throws the corresponding runtime error, prints out the corresponding debug information "failed to submit draw command buffer!", waits for me to press a key, and then finally terminates with EXIT_FAILURE.
Why does pushing vertices from blocks directly work fine, but pushing them from chunks does not? I have checked the Vulkan specification and did a lot of googling but I just couldn't find much on what causes this error to be thrown. I want to know how to fix it and in turn, fix my engine.
I was assigning indices incorrectly and it was being caught by the graphics driver. There were indices that were larger than the vertex vector.
Related
I have code that are to paint a BMP image on a TPaintBox on a VCL form in a C++ application.
Everything works fine as long as I only have one image to paint, on one form. When I create a second form, I get sporadic access violations.
The code is called from a thread and I'm using the Synchronize function in order to synchronize with the main VCL thread as this
void TCameraForm::loadImage(FramePtr frame)
{
syncing s;
s.aFrame = frame;
s.theForm = this;
//Synchronize with UI thread
TThread::Synchronize(0, &s.fn);
}
In the code, a FramePtr is a shared pointer to one individual 'frame', holding a device dependent bitmap.
The syncing variable is a structure, holding the code for the actual painting:
//This is a trick to use VCL's TThread::Synchronize function "with parameters"
//Thanks to Mr. R. Lebeau for sharing this trick.
struct syncing
{
FramePtr aFrame;
TCameraForm* theForm;
int tag;
void __fastcall fn()
{
try
{
//Create a device dependent bitmap
BitMap aBitMap(aFrame);
//Get the bitmap memory into a TMemoryStream
TMemoryStream* ms = new TMemoryStream();
int bytes = ms->Write(aBitmap.getBuffer()->mMemoryBuffer, aBitmap.getBuffer()->mBufferSize);
ms->Position = 0;
//Create a TPicture object that will be used for drawing on the paintbox
TBitmap* tbm = new TBitmap();
tbm->LoadFromStream(ms);
TRect stretchedRect(getStretchedDimensions(tbm->Width, tbm->Height, theForm->PaintBox1->Width, theForm->PaintBox1->Height));
theForm->PaintBox1->Canvas->StretchDraw(stretchedRect, tbm);
delete ms;
delete tbm;
}
catch(...)
{
Log(lError) << "Exception occured in the CameraFrame sync function";
}
}
};
The debugger mainly stops on creation of the bitmap.
I'm using BCC builder 10.3.2 and the classic compiler.
The bitmap class looks like this:
Header
class BitMap
{
public:
BitMap(FramePtr aFrame);
BitMap(unsigned long width, unsigned long height, ColorCode c, ImageMemoryBuffer& buf);
ImageMemoryBuffer* getBuffer();
~BitMap();
bool write(const string& file);
protected:
unsigned int mWidth;
unsigned int mHeight;
ColorCode mColorCode;
ImageMemoryBuffer mImageMemoryBuffer;
bool create();
bool release();
};
And CPP:
enum { THREE_CHANNEL = 0xC,};
enum { BMP_HEADER_SIZE = 54, };
enum { ALIGNMENT_SIZE = 4, };
namespace ai
{
BitMap::BitMap(FramePtr aFrame)
:
mWidth(0),
mHeight(0),
mColorCode(ColorCodeMono8),
mImageMemoryBuffer()
{
aFrame->GetImageSize(mImageMemoryBuffer.mBufferSize);
aFrame->GetWidth(mWidth);
aFrame->GetHeight(mHeight);
VmbPixelFormatType ePixelFormat = VmbPixelFormatMono8;
aFrame->GetPixelFormat(ePixelFormat);
if((ePixelFormat != VmbPixelFormatMono8) && (ePixelFormat != VmbPixelFormatRgb8))
{
throw(MVRException("Invalid pixel format: " + toString(ePixelFormat)));
}
mColorCode = (ePixelFormat == VmbPixelFormatRgb8) ? ColorCodeRGB24 : ColorCodeMono8;
VmbUchar_t *pImage = NULL;
if (aFrame->GetImage(pImage) != VmbErrorSuccess)
{
throw(MVRException("Failed \"getting\" image"));
}
mImageMemoryBuffer.mMemoryBuffer = (unsigned char*) pImage;
if(!create())
{
Log(lError) << "There was an error creating the bitmap";
throw(MVRException("Failed creating Bitmap"));
}
}
BitMap::BitMap(unsigned long width, unsigned long height, ColorCode c, ImageMemoryBuffer& buf)
:
mWidth(width),
mHeight(height),
mColorCode(c),
mImageMemoryBuffer(buf)
{
if(!create())
{
Log(lError) << "There was an error creating the bitmap";
throw(MVRException("Failed creating bitmap"));
}
}
BitMap::~BitMap()
{
if(!release())
{
Log(lError) << "There was an error releasing the bitmap";
}
}
ImageMemoryBuffer* BitMap::getBuffer()
{
return &mImageMemoryBuffer;
}
bool BitMap::create()
{
try
{
unsigned char nNumColors; // Number of colors of our image
unsigned char nPadLength; // The padding we need to align the bitmap ALIGNMENT_SIZE
unsigned long nPaletteSize = 0; // The size of the bitmap's palette
unsigned long nHeaderSize; // The size of the bitmap's header
unsigned long nFileSize; // The size of the bitmap file
unsigned char* pBitmapBuffer; // A buffer we use for creating the bitmap
unsigned char* pCurBitmapBuf; // A cursor to move over "pBitmapBuffer"
unsigned char* pCurSrc; // A cursor to move over the given buffer "pBuffer"
unsigned long px; // A single pixel for storing transformed color information
unsigned long x; // The horizontal position within our image
unsigned long y; // The vertical position within our image
unsigned long i; // Counter for some iteration
// The bitmap header
char fileHeader[14] = { 'B','M', // Default
0,0,0,0, // File size
0,0,0,0, // Reserved
0,0,0,0 }; // Offset to image content
char infoHeader[40] = { 40,0,0,0, // Size of info header
0,0,0,0, // Width
0,0,0,0, // Height
1,0, // Default
0, 0 }; // bpp
if ( 0 == mImageMemoryBuffer.mBufferSize || 0 == mWidth || 0 == mHeight )
{
Log(lError) << "Zero bitmap buffer, width ot height in Bitmap constructor";
return false;
}
if ( mColorCode == (mColorCode & THREE_CHANNEL) )
{
nNumColors = 3;
}
else
{
nNumColors = 1;
}
// Bitmap padding always is a multiple of four Bytes. If data is not we need to pad with zeros.
nPadLength = (mWidth * nNumColors) % ALIGNMENT_SIZE;
if ( 0 != nPadLength )
{
nPadLength = ALIGNMENT_SIZE - nPadLength;
}
if ( ColorCodeRGB24 != mColorCode )
{
nPaletteSize = 256;
}
nHeaderSize = BMP_HEADER_SIZE + nPaletteSize * 4;
pBitmapBuffer = (unsigned char*)malloc( nHeaderSize + mImageMemoryBuffer.mBufferSize + (nPadLength * mHeight) );
nFileSize = nHeaderSize + mImageMemoryBuffer.mBufferSize + (nPadLength * mHeight);
// File size
fileHeader[ 2] = (char)(nFileSize);
fileHeader[ 3] = (char)(nFileSize >> 8);
fileHeader[ 4] = (char)(nFileSize >> 16);
fileHeader[ 5] = (char)(nFileSize >> 24);
// Offset to image content
fileHeader[10] = (char)(nHeaderSize);
fileHeader[11] = (char)(nHeaderSize >> 8);
fileHeader[12] = (char)(nHeaderSize >> 16);
fileHeader[13] = (char)(nHeaderSize >> 24);
// Width
infoHeader[ 4] = (char)(mWidth);
infoHeader[ 5] = (char)(mWidth >> 8);
infoHeader[ 6] = (char)(mWidth >> 16);
infoHeader[ 7] = (char)(mWidth >> 24);
// Height (has to be negative for a top down image)
infoHeader[ 8] = (char)(-(long)mHeight);
infoHeader[ 9] = (char)(-(long)mHeight >> 8);
infoHeader[10] = (char)(-(long)mHeight >> 16);
infoHeader[11] = (char)(-(long)mHeight >> 24);
// bpp
infoHeader[14] = 8 * nNumColors;
// Image size
infoHeader[20] = (char)(mImageMemoryBuffer.mBufferSize);
infoHeader[21] = (char)(mImageMemoryBuffer.mBufferSize >> 8);
infoHeader[22] = (char)(mImageMemoryBuffer.mBufferSize >> 16);
infoHeader[23] = (char)(mImageMemoryBuffer.mBufferSize >> 24);
// Palette size
infoHeader[32] = (char)(nPaletteSize);
infoHeader[33] = (char)(nPaletteSize >> 8);
infoHeader[34] = (char)(nPaletteSize >> 16);
infoHeader[35] = (char)(nPaletteSize >> 24);
// Used colors
infoHeader[36] = (char)(nPaletteSize);
infoHeader[37] = (char)(nPaletteSize >> 8);
infoHeader[38] = (char)(nPaletteSize >> 16);
infoHeader[39] = (char)(nPaletteSize >> 24);
// Write header
pCurBitmapBuf = pBitmapBuffer;
memcpy(pCurBitmapBuf, fileHeader, 14);
pCurBitmapBuf += 14;
memcpy(pCurBitmapBuf, infoHeader, 40);
pCurBitmapBuf += 40;
for(i = 0; i < nPaletteSize; ++i)
{
pCurBitmapBuf[0] = (char)(i);
pCurBitmapBuf[1] = (char)(i);
pCurBitmapBuf[2] = (char)(i);
pCurBitmapBuf[3] = 0;
pCurBitmapBuf += 4;
}
// RGB -> BGR (a Windows bitmap is BGR)
if(mColorCode == ColorCodeRGB24)
{
pCurSrc = (unsigned char*) mImageMemoryBuffer.mMemoryBuffer;
for(y=0; y < mHeight; ++y, pCurBitmapBuf += nPadLength )
{
for (x = 0; x < mWidth; ++x, pCurSrc += 3, pCurBitmapBuf += 3)
{
px = 0;
// Create a 4 Byte structure to store ARGB (we don't use A)
px = px | (pCurSrc[0] << 16) | (pCurSrc[1] << 8) | pCurSrc[2];
// Due to endianess ARGB is stored as BGRA
// and we only have to write the first three Bytes
memcpy( pCurBitmapBuf, &px, 3 );
}
// Add padding at the end of each row
memset( pCurBitmapBuf, 0, nPadLength );
}
mColorCode = ColorCodeBGR24;
}
// Mono8
else
{
if(nPadLength == 0)
{
memcpy( pCurBitmapBuf, mImageMemoryBuffer.mMemoryBuffer, mImageMemoryBuffer.mBufferSize );
}
else
{
pCurSrc = (unsigned char*)mImageMemoryBuffer.mMemoryBuffer;
for (y=0; y < mHeight; ++y, pCurSrc += mWidth * nNumColors)
{
// Write a single row of colored pixels
memcpy( pCurBitmapBuf, pCurSrc, mWidth * nNumColors );
pCurBitmapBuf += mWidth * nNumColors;
// Write padding pixels
memset(pCurBitmapBuf, 0, nPadLength);
pCurBitmapBuf += nPadLength;
}
}
}
mImageMemoryBuffer.mMemoryBuffer = pBitmapBuffer;
mImageMemoryBuffer.mBufferSize = nFileSize;
return true;
}
catch(...)
{
Log(lError) << "Exception in creation of bitmap create function";
return false;
}
}
bool BitMap::release()
{
try
{
if (mImageMemoryBuffer.mMemoryBuffer != NULL && mImageMemoryBuffer.mBufferSize > 0)
{
free(mImageMemoryBuffer.mMemoryBuffer);
mImageMemoryBuffer.mMemoryBuffer = NULL;
}
return true;
}
catch(...)
{
return false;
}
}
bool BitMap::write(const string& fName)
{
if (mImageMemoryBuffer.mMemoryBuffer == NULL)
{
return false;
}
FILE *file = fopen(fName.c_str(), "wb");
if(!file)
{
Log(lError) << "Failed opening file: " << fName;
return false;
}
fwrite(mImageMemoryBuffer.mMemoryBuffer, 1, mImageMemoryBuffer.mBufferSize, file );
fclose(file);
return true;
}
}
UPDATE: The above code works fine when executed by one thread. The problem occurs when several threads are involved. I have located the problem happening when the memcpy function is called in the creation of the bitmap (not the TBitmap), supporting this. So main problem is that the same memory is being manipulated by two or more threads at the same time.
For the above code, where would it be appropriate to incorporate a Mutex, in order to prevent the memory corruption? Or could one use another technique?
The shader takes an SSBO of Photons that have a position, direction, wavelength and intensity and each thread is responsible for tracing exactly one photon through the grid, where at each grid cell the photon hits, the intensity is accumulated for each wavelength to create a spectral distribution for each grid cell.
The problem is that the shader works perfectly for 100,000 photons, but doesn't return a result for 1,000,000 photons.
I looked into the sizes for the SSBOs and all were within my GPUs (NVIDIA Quadro P6000) limits of 2GB:
SSBO Grid size: 1.5GB
SSBO Photons Size: 0.02GB
If I change the logic at some places it works with one million photons (see lines 87 and 114 for comments).
I currently can't see any explanation of why the shader fails for 1,000,000 photons, but works for 100,000 photons. The logic is the same and the buffer sizes are within limits. (That the buffer size can't be a problem is also confirmed by that it works when changing the logic.)
Below is the source code. If you want to try it yourself here is the code on github: https://github.com/TheJhonny007/TextureTracerDebug
Compute Shader:
#version 430
#extension GL_EXT_compute_shader: enable
#extension GL_EXT_shader_storage_buffer_object: enable
#extension GL_ARB_compute_variable_group_size: enable
const uint TEX_WIDTH = 1024u;
const uint TEX_HEIGHT = TEX_WIDTH;
const uint MIN_WAVELENGTH = 380u;
const uint MAX_WAVELENGTH = 740u;
const uint NUM_WAVELENGTHS = MAX_WAVELENGTH - MIN_WAVELENGTH;
// Size: 24 bytes -> ~40,000,000 photons per available gigabyte of ram
struct Photon {
vec2 position;// m
vec2 direction;// normalized
uint wavelength;// nm
float intensity;// 0..1 should start at 1
};
layout(std430, binding = 0) buffer Photons {
Photon photons[];
};
// Size: 1440 bytes -> ~700,000 pixels per available gigabyte of ram
struct Pixel {
uint intensityAtWavelengths[NUM_WAVELENGTHS];// [0..1000]
};
layout(std430, binding = 1) buffer Pixels {
//Pixel pixels[TEX_WIDTH][TEX_HEIGHT];
// NVIDIAs linker takes ages to link if the sizes are specified :(
Pixel[] pixels;
};
uniform float xAxisScalingFactor;
vec2 getHorizontalRectangleAt(int i) {
float x = pow(float(i), xAxisScalingFactor);
float w = pow(float(i + 1), xAxisScalingFactor);
return vec2(x, w);
}
uniform float rectangleHeight;
struct Rectangle {
float x;
float y;
float w;
float h;
};
layout (local_size_variable) in;
void addToPixel(uvec2 idx, uint wavelength, uint intensity) {
if (idx.x >= 0u && idx.x < TEX_WIDTH && idx.y >= 0u && idx.y < TEX_HEIGHT) {
uint index = (idx.y * TEX_WIDTH) + idx.x;
atomicAdd(pixels[index].intensityAtWavelengths[wavelength - MIN_WAVELENGTH], intensity);
}
}
/// Returns the rectangle at the given indices.
Rectangle getRectangleAt(ivec2 indices) {
vec2 horRect = getHorizontalRectangleAt(indices.x);
return Rectangle(horRect.x, rectangleHeight * float(indices.y), horRect.y, rectangleHeight);
}
uniform float shadowLength;
uniform float shadowHeight;
/// Returns the indices of the rectangle at the given location
ivec2 getRectangleIdxAt(vec2 location) {
int x = 0;
int y = int(location.y / rectangleHeight);
return ivec2(x, y);
}
float getRayIntersectAtX(Photon ray, float x) {
float slope = ray.direction.y / ray.direction.x;
return slope * (x - ray.position.x) + ray.position.y;
}
ivec2 getRayRectangleExitEdge(Photon ray, Rectangle rect) {
float intersectHeight = getRayIntersectAtX(ray, rect.x + rect.w);
// IF ONE OF THE FIRST TWO CONDITIONS GETS REMOVED IT WORKS WITH 1'000'000 PHOTONS OTHERWISE ONLY 100'000 WHY?
if (intersectHeight < rect.y) {
return ivec2(0, -1);
} else if (intersectHeight > rect.y + rect.h) {
return ivec2(0, 1);
} else {
return ivec2(1, 0);
}
}
void main() {
uint gid = gl_GlobalInvocationID.x;
if (gid >= photons.length()) return;
Photon photon = photons[gid];
ivec2 photonTexIndices = getRectangleIdxAt(photon.position);
while (photonTexIndices.x < TEX_WIDTH && photonTexIndices.y < TEX_HEIGHT &&
photonTexIndices.x >= 0 && photonTexIndices.y >= 0) {
// need to convert to uint for atomic add operations...
addToPixel(uvec2(photonTexIndices), photon.wavelength, uint(photon.intensity * 100.0));
ivec2 dir = getRayRectangleExitEdge(photon, getRectangleAt(photonTexIndices));
photonTexIndices += dir;
// When the ray goes out of bounds on the bottom then mirror it to simulate rays coming from
// the other side of the planet. This works because of the rotational symmetry of the system.
// IF COMMENTET OUT IT WORKS WITH 1'000'000 PHOTONS OTHERWISE ONLY 100'000 WHY?
if (photonTexIndices.y < 0) {
photonTexIndices.y = 0;
photon.position.y *= -1.0;
photon.direction.y *= -1.0;
}
}
}
Tracer.hpp
#ifndef TEXTURE_TRACER_HPP
#define TEXTURE_TRACER_HPP
#include <glm/glm.hpp>
#include <random>
namespace gpu {
// 6 * 4 = 24 Bytes
struct Photon {
glm::vec2 position; // m
glm::vec2 direction; // normalized
uint32_t waveLength; // nm
float intensity; // 0..1 should start at 1
};
class TextureTracer {
public:
TextureTracer();
uint32_t createShadowMap(size_t numPhotons);
private:
void initTextureTracer();
void traceThroughTexture(uint32_t ssboPhotons, size_t numPhotons);
Photon emitPhoton();
std::vector<Photon> generatePhotons(uint32_t count);
struct {
uint32_t uRectangleHeight;
uint32_t uShadowLength;
uint32_t uShadowHeight;
uint32_t uXAxisScalingFactor;
} mTextureTracerUniforms;
uint32_t mTextureTracerProgram;
std::mt19937_64 mRNG;
std::uniform_real_distribution<> mDistributionSun;
std::uniform_int_distribution<uint32_t> mDistributionWavelength;
std::bernoulli_distribution mDistributionBoolean;
};
} // namespace gpu
#endif // TEXTURE_TRACER_HPP
Tracer.cpp
#include "TextureTracer.hpp"
#include <GL/glew.h>
#include <algorithm>
#include <fstream>
#include <iostream>
#include <random>
#include <string>
#include <vector>
void GLAPIENTRY MessageCallback(GLenum source, GLenum type, GLuint id,
GLenum severity, GLsizei length,
const GLchar *message, const void *userParam) {
if (type == GL_DEBUG_TYPE_ERROR)
fprintf(stderr, "GL ERROR: type = 0x%x, severity = 0x%x, message = %s\n",
type, severity, message);
else
fprintf(stdout, "GL INFO: type = 0x%x, severity = 0x%x, message = %s\n",
type, severity, message);
}
namespace gpu {
const double TEX_HEIGHT_TO_RADIUS_FACTOR = 4;
const double TEX_SHADOW_LENGTH_FACTOR = 8;
const uint32_t TEX_WIDTH = 1024u;
const uint32_t TEX_HEIGHT = TEX_WIDTH;
const double RADIUS = 6'371'000.0;
const double RADIUS_FACTORED = RADIUS * TEX_HEIGHT_TO_RADIUS_FACTOR;
const double SUN_RADIUS = 695'510'000.0;
const double DIST_TO_SUN = 149'600'000'000.0;
const double ATMO_HEIGHT = 42'000.0;
std::string loadShader(const std::string &fileName) {
std::ifstream shaderFileStream(fileName, std::ios::in);
if (!shaderFileStream.is_open()) {
std::cerr << "Could not load the GLSL shader from '" << fileName << "'!"
<< std::endl;
exit(-1);
}
std::string shaderCode;
while (!shaderFileStream.eof()) {
std::string line;
std::getline(shaderFileStream, line);
shaderCode.append(line + "\n");
}
return shaderCode;
}
void TextureTracer::initTextureTracer() {
mTextureTracerProgram = glCreateProgram();
uint32_t rayTracingComputeShader = glCreateShader(GL_COMPUTE_SHADER);
std::string code = loadShader("../resources/TextureTracer.glsl");
const char *shader = code.c_str();
glShaderSource(rayTracingComputeShader, 1, &shader, nullptr);
glCompileShader(rayTracingComputeShader);
glAttachShader(mTextureTracerProgram, rayTracingComputeShader);
glLinkProgram(mTextureTracerProgram);
mTextureTracerUniforms.uRectangleHeight =
glGetUniformLocation(mTextureTracerProgram, "rectangleHeight");
mTextureTracerUniforms.uShadowHeight =
glGetUniformLocation(mTextureTracerProgram, "shadowHeight");
mTextureTracerUniforms.uShadowLength =
glGetUniformLocation(mTextureTracerProgram, "shadowLength");
mTextureTracerUniforms.uXAxisScalingFactor =
glGetUniformLocation(mTextureTracerProgram, "xAxisScalingFactor");
glDetachShader(mTextureTracerProgram, rayTracingComputeShader);
glDeleteShader(rayTracingComputeShader);
}
TextureTracer::TextureTracer()
: mRNG(1L), mDistributionSun(
std::uniform_real_distribution<>(-SUN_RADIUS, SUN_RADIUS)),
mDistributionWavelength(
std::uniform_int_distribution<uint32_t>(380, 739)),
mDistributionBoolean(std::bernoulli_distribution(0.5)) {
glEnable(GL_DEBUG_OUTPUT);
glDebugMessageCallback(MessageCallback, nullptr);
initTextureTracer();
}
double raySphereDistance(glm::dvec2 origin, glm::dvec2 direction,
glm::dvec2 center, double radius) {
glm::dvec2 m = origin - center;
double b = glm::dot(m, direction);
double c = glm::dot(m, m) - (radius * radius);
if (c > 0.0 && b > 0.0)
return -1.0;
double discr = b * b - c;
// A negative discriminant corresponds to ray missing sphere
if (discr < 0.0)
return -1.0;
// Ray now found to intersect sphere, compute smallest t value of intersection
return glm::max(0.0, -b - glm::sqrt(discr));
}
Photon TextureTracer::emitPhoton() {
std::uniform_real_distribution<> distributionEarth(0.0, ATMO_HEIGHT);
glm::dvec2 target = {0.0, RADIUS + distributionEarth(mRNG)};
double d;
do {
d = glm::length(glm::dvec2(mDistributionSun(mRNG), mDistributionSun(mRNG)));
} while (d > SUN_RADIUS);
glm::dvec2 startPosition =
glm::dvec2(-DIST_TO_SUN, mDistributionBoolean(mRNG) ? d : -d);
glm::dvec2 direction = glm::normalize(target - startPosition);
startPosition +=
direction * raySphereDistance(startPosition, direction, {0.0, 0.0},
RADIUS + ATMO_HEIGHT);
return {glm::vec2(0.0, startPosition.y), glm::vec2(direction),
mDistributionWavelength(mRNG), 1.0f};
}
std::vector<Photon> TextureTracer::generatePhotons(uint32_t count) {
std::vector<Photon> photons(count);
std::generate(photons.begin(), photons.end(),
[this]() { return emitPhoton(); });
return photons;
}
void TextureTracer::traceThroughTexture(uint32_t ssboPhotons,
size_t numPhotons) {
glUseProgram(mTextureTracerProgram);
glUniform1f(mTextureTracerUniforms.uRectangleHeight,
RADIUS_FACTORED / TEX_HEIGHT);
const double shadowLength =
TEX_SHADOW_LENGTH_FACTOR * (DIST_TO_SUN * RADIUS) / (SUN_RADIUS - RADIUS);
glUniform1f(mTextureTracerUniforms.uShadowLength, shadowLength);
glUniform1f(mTextureTracerUniforms.uShadowHeight, RADIUS_FACTORED);
const double xAxisScalingFactor =
glm::log(shadowLength) / glm::log(static_cast<double>(TEX_WIDTH));
glUniform1f(mTextureTracerUniforms.uXAxisScalingFactor,
static_cast<float>(xAxisScalingFactor));
const uint32_t MIN_WAVELENGTH = 380u;
const uint32_t MAX_WAVELENGTH = 740u;
const uint32_t NUM_WAVELENGTHS = MAX_WAVELENGTH - MIN_WAVELENGTH;
size_t pixelBufferSize =
TEX_WIDTH * TEX_HEIGHT * NUM_WAVELENGTHS * sizeof(uint32_t);
uint32_t ssboPixels;
glGenBuffers(1, &ssboPixels);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssboPixels);
glBufferData(GL_SHADER_STORAGE_BUFFER, pixelBufferSize, nullptr,
GL_DYNAMIC_COPY);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ssboPhotons);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, ssboPixels);
const uint32_t numThreads = 32u;
const uint32_t numBlocks = numPhotons / numThreads;
std::cout << "numBlocks: " << numBlocks << std::endl;
glDispatchComputeGroupSizeARB(numBlocks, 1, 1, numThreads, 1, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
struct Pixel {
uint32_t intensityAtWavelengths[NUM_WAVELENGTHS];
};
std::vector<Pixel> pixels(TEX_WIDTH * TEX_HEIGHT);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssboPixels);
glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, pixelBufferSize,
pixels.data());
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
for (int y = 0; y < TEX_HEIGHT; ++y) {
printf("%4i | ", y);
for (int x = 0; x < TEX_WIDTH; ++x) {
Pixel p = pixels[y * TEX_WIDTH + x];
int counter = 0;
for (uint32_t i : p.intensityAtWavelengths) {
counter += i;
}
if (counter == 0) {
printf(" ");
} else if (counter > 100'000'000) {
printf("%4s", "\u25A0");
} else if (counter > 10'000'000) {
printf("%4s", "\u25A3");
} else if (counter > 1'000'000) {
printf("%4s", "\u25A6");
} else if (counter > 100'000) {
printf("%4s", "\u25A4");
} else {
printf("%4s", "\u25A1");
}
}
std::cout << std::endl;
}
glDeleteBuffers(1, &ssboPixels);
glUseProgram(0);
}
uint32_t TextureTracer::createShadowMap(size_t numPhotons) {
std::vector<Photon> photons = generatePhotons(numPhotons);
uint32_t ssboPhotons;
glGenBuffers(1, &ssboPhotons);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssboPhotons);
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(Photon) * photons.size(),
photons.data(), GL_DYNAMIC_COPY);
traceThroughTexture(ssboPhotons, photons.size());
glDeleteBuffers(1, &ssboPhotons);
glDeleteProgram(mTextureTracerProgram);
glDisable(GL_DEBUG_OUTPUT);
glDebugMessageCallback(nullptr, nullptr);
return 0;
}
}
main.cpp
#include <GL/glew.h>
#include <GL/glut.h>
#include "TextureTracer.hpp"
int main(int argc, char *argv[]) {
glutInit(&argc, argv);
glutCreateWindow("OpenGL needs a window o.O");
glewInit();
auto mapper = gpu::TextureTracer();
// WITH 100'000 PHOTONS IT WORKS, WITH 1'000'000 PHOTONS NOT WHY?
mapper.createShadowMap(100'000);
return 0;
}
Operating systems cancel GPU program executions if they take too long. On Windows it is generally two seconds and on Linux it is five seconds most of the time, but it can vary.
This is to detect GPU programs that are stuck and cancel them. There are different methods to get around this timeout, but they all require admin/root privileges, which is not always available.
If possible the execution can be split up into multiple invocations like in the following snippet:
const uint32_t passSize = 2048u;
const uint32_t numPasses = (numPhotons / passSize) + 1;
const uint32_t numThreads = 64u;
const uint32_t numBlocks = passSize / numThreads;
glUniform1ui(glGetUniformLocation(mTextureTracerProgram, "passSize"), passSize);
for (uint32_t pass = 0u; pass < numPasses; ++pass) {
glUniform1ui(glGetUniformLocation(mTextureTracerProgram, "pass"), pass);
glDispatchComputeGroupSizeARB(numBlocks, 1, 1, numThreads, 1, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
glFlush();
glFinish();
}
The glFlush() and glFinish() calls are important or the executions will get bundled together and the OS triggers a timeout anyways.
In the shader you just need to access the right sections of the input data like so:
// other stuff
uniform uint pass;
uniform uint passSize;
void main() {
uint gid = gl_GlobalInvocationID.x;
uint passId = pass * passSize + gid;
if (passId >= photons.length()) return;
Photon photon = photons[passId];
// rest of program
}
This is all.
If you want to disable the OS timeouts here is a relevant post for Linux: https://stackoverflow.com/a/30520538/5543884
And here is a post regarding Windows: https://stackoverflow.com/a/29759823/5543884
Environment:
Ubuntu 16.04 (x64)
C++
ffmpeg
Use-case
Multiple MPEG-TS fragments are rapidly decoded ( numerous every sec )
The format of the TS fragments is dynamic and can't be known ahead of time
The first A/V frames of each fragment are needed to be extracted
Problem statement
The code bellow successfully decodes A/V, BUT, has a huge memory leak ( MBytes/sec )
According to the docs seems all memory is freed as it should ( does it... ? )
Why do I get this huge mem leak, what am I missing in the following code snap ?
struct MEDIA_TYPE {
ffmpeg::AVMediaType eType;
union {
struct {
ffmpeg::AVPixelFormat colorspace;
int width, height;
float fFPS;
} video;
struct : WAVEFORMATEX {
short sSampleFormat;
} audio;
} format;
};
struct FRAME {
enum { MAX_PALNES = 3 + 1 };
int iStrmId;
int64_t pts; // Duration in 90Khz clock resolution
uint8_t** ppData; // Null terminated
int32_t* pStride;// Zero terminated
};
HRESULT ProcessTS(IN Operation op, IN uint8_t* pTS, IN uint32_t uiBytes, bool(*cb)(IN const MEDIA_TYPE& mt, IN FRAME& frame, IN PVOID pCtx), IN PVOID pCbCtx)
{
uiBytes -= uiBytes % 188;// align to 188 packet size
struct CONTEXT {
uint8_t* pTS;
uint32_t uiBytes;
int32_t iPos;
} ctx = { pTS, uiBytes, 0 };
LOGTRACE(TSDecoder, "ProcessTS(%d, 0x%.8x, %d, 0x%.8x, 0x%.8x), this=0x%.8x\r\n", (int)op, pTS, uiBytes, cb, pCbCtx, this);
ffmpeg::AVFormatContext* pFmtCtx = 0;
if (0 == (pFmtCtx = ffmpeg::avformat_alloc_context()))
return E_OUTOFMEMORY;
ffmpeg::AVIOContext* pIoCtx = ffmpeg::avio_alloc_context(pTS, uiBytes, 0, &ctx
, [](void *opaque, uint8_t *buf, int buf_size)->int {
auto pCtx = (CONTEXT*)opaque;
int size = pCtx->uiBytes;
if (pCtx->uiBytes - pCtx->iPos < buf_size)
size = pCtx->uiBytes - pCtx->iPos;
if (size > 0) {
memcpy(buf, pCtx->pTS + pCtx->iPos, size);
pCtx->iPos += size;
}
return size;
}
, 0
, [](void* opaque, int64_t offset, int whence)->int64_t {
auto pCtx = (CONTEXT*)opaque;
switch (whence)
{
case SEEK_SET:
pCtx->iPos = offset;
break;
case SEEK_CUR:
pCtx->iPos += offset;
break;
case SEEK_END:
pCtx->iPos = pCtx->uiBytes - offset;
break;
case AVSEEK_SIZE:
return pCtx->uiBytes;
}
return pCtx->iPos;
});
pFmtCtx->pb = pIoCtx;
int iRet = ffmpeg::avformat_open_input(&pFmtCtx, "fakevideo.ts", m_pInputFmt, 0);
if (ERROR_SUCCESS != iRet) {
assert(false);
pFmtCtx = 0;// a user-supplied AVFormatContext will be freed on failure.
return E_FAIL;
}
struct DecodeContext {
ffmpeg::AVStream* pStream;
ffmpeg::AVCodec* pDecoder;
int iFramesProcessed;
};
HRESULT hr = S_OK;
int iStreamsProcessed = 0;
bool bVideoFound = false;
int64_t ptsLast = 0;
int64_t dtsLast = 0;
auto pContext = (DecodeContext*)alloca(sizeof(DecodeContext) * pFmtCtx->nb_streams);
for (unsigned int i = 0; i < pFmtCtx->nb_streams; i++) {
assert(pFmtCtx->streams[i]->index == i);
pContext[i].pStream = pFmtCtx->streams[i];
pContext[i].pDecoder = ffmpeg::avcodec_find_decoder(pFmtCtx->streams[i]->codec->codec_id);
pContext[i].iFramesProcessed= 0;
if (0 == pContext[i].pDecoder)
continue;
if ((iRet = ffmpeg::avcodec_open2(pFmtCtx->streams[i]->codec, pContext[i].pDecoder, NULL)) < 0) {
_ASSERT(FALSE);
hr = E_FAIL;
goto ErrExit;
}
}
while (S_OK == hr) {
ffmpeg::AVFrame* pFrame = 0;
ffmpeg::AVPacket pkt;
ffmpeg::av_init_packet(&pkt);
if (ERROR_SUCCESS != (iRet = ffmpeg::av_read_frame(pFmtCtx, &pkt))) {
hr = E_FAIL;
break;
}
if ((0 == dtsLast) && (0 != pkt.dts))
dtsLast = pkt.dts;
if ((0 == ptsLast) && (0 != pkt.pts))
ptsLast = pkt.pts;
DecodeContext& ctx = pContext[pkt.stream_index];
if (Operation::DECODE_FIRST_FRAME_OF_EACH_STREAM == op) {
if (iStreamsProcessed == pFmtCtx->nb_streams) {
hr = S_FALSE;
goto Next;
}
if (ctx.iFramesProcessed > 0)
goto Next;
iStreamsProcessed++;
}
if (0 == ctx.pDecoder)
goto Next;
if (0 == (pFrame = ffmpeg::av_frame_alloc())) {
hr = E_OUTOFMEMORY;
goto Next;
}
LOGTRACE(TSDecoder, "ProcessTS(%d, 0x%.8x, %d, 0x%.8x, 0x%.8x), this=0x%.8x, decode, S:%d, T:%d\r\n", (int)op, pTS, uiBytes, cb, pCbCtx, this, pkt.stream_index, ctx.pStream->codec->codec_type);
int bGotFrame = false;
int iBytesUsed = 0;
MEDIA_TYPE mt;
memset(&mt, 0, sizeof(mt));
mt.eType = ctx.pStream->codec->codec_type;
switch (mt.eType) {
case ffmpeg::AVMediaType::AVMEDIA_TYPE_AUDIO:
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
if((iRet = ffmpeg::avcodec_decode_audio4(ctx.pStream->codec, pFrame, &bGotFrame, &pkt)) < 0) {
hr = E_FAIL;
goto Next;
}
_ASSERT(pkt.size == iRet);
// FFMPEG AAC decoder oddity, first call to 'avcodec_decode_audio4' results mute audio where the second result the expected audio
bGotFrame = false;
if ((iRet = ffmpeg::avcodec_decode_audio4(ctx.pStream->codec, pFrame, &bGotFrame, &pkt)) < 0) {
hr = E_FAIL;
goto Next;
}
_ASSERT(pkt.size == iRet);
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
if (false == bGotFrame)
goto Next;
iBytesUsed = ctx.pStream->codec->frame_size;
mt.format.audio.nChannels = ctx.pStream->codec->channels;
mt.format.audio.nSamplesPerSec = ctx.pStream->codec->sample_rate;
mt.format.audio.wBitsPerSample = ffmpeg::av_get_bytes_per_sample(ctx.pStream->codec->sample_fmt) * 8;
mt.format.audio.nBlockAlign = mt.format.audio.nChannels * mt.format.audio.wBitsPerSample / 8;
mt.format.audio.sSampleFormat = (short)pFrame->format;
break;
case ffmpeg::AVMediaType::AVMEDIA_TYPE_VIDEO:
if ((iRet = ffmpeg::avcodec_decode_video2(ctx.pStream->codec, pFrame, &bGotFrame, &pkt)) < 0) {
hr = E_FAIL;
break;
}
if (false == bGotFrame)
goto Next;
assert(ffmpeg::AVPixelFormat::AV_PIX_FMT_YUV420P == ctx.pStream->codec->pix_fmt);// Thats is the only color space currently supported
iBytesUsed = (ctx.pStream->codec->width * ctx.pStream->codec->height * 3) / 2;
mt.format.video.width = ctx.pStream->codec->width;
mt.format.video.height = ctx.pStream->codec->height;
mt.format.video.colorspace = ctx.pStream->codec->pix_fmt;
mt.format.video.fFPS = (float)ctx.pStream->codec->framerate.num / ctx.pStream->codec->framerate.den;
bVideoFound = true;
break;
default:
goto Next;
}
ctx.iFramesProcessed++;
{
FRAME f = { ctx.pStream->index, ((0 == ptsLast) ? dtsLast : ptsLast), (uint8_t**)pFrame->data, (int32_t*)pFrame->linesize };
if ((iRet > 0) && (false == cb(mt, f, pCbCtx)))
hr = S_FALSE;// Breaks the loop
}
Next:
ffmpeg::av_free_packet(&pkt);
if (0 != pFrame) {
//ffmpeg::av_frame_unref(pFrame);
ffmpeg::av_frame_free(&pFrame);
pFrame = 0;
}
}
ErrExit:
for (unsigned int i = 0; i < pFmtCtx->nb_streams; i++)
ffmpeg::avcodec_close(pFmtCtx->streams[i]->codec);
pIoCtx->buffer = 0;// We have allocated the buffer, no need for ffmpeg to free it 4 us
pFmtCtx->pb = 0;
ffmpeg::av_free(pIoCtx);
ffmpeg::avformat_close_input(&pFmtCtx);
ffmpeg::avformat_free_context(pFmtCtx);
return hr;
}
You need to unref the packets before reusing them. And there's no need to allocate and deallocate them all the time.
Here's how I do it which might help you:
// Initialise a packet queue
std::list<AVPacket *> packets;
...
for (int c = 0; c < MAX_PACKETS; c++) {
ff->packets.push_back(av_packet_alloc());
}
while (!quit) {
... get packet from queue
int err = av_read_frame(ff->context, packet);
... process packet (audio, video, etc)
av_packet_unref(packet); // add back to queue for reuse
}
// Release packets
while (ff->packets.size()) { // free packets
AVPacket *packet = ff->packets.front();
av_packet_free(&packet);
ff->packets.pop_front();
}
In your code you've freed a packet which wasn't allocated in the first place.
I have been having problems rendering a model in opengl. I have made my own obj parser and a render function using glDrawElements but I can't see the problem. This is my parser function:
void load_obj(char *nom)
{
FILE *file = fopen(nom, "r");
int iv = 0, in = 0, it = 0, ifa = 0;
if(file != NULL)
{
while(1)
{
char lineHeader[128];
int res = fscanf(file, "%s", lineHeader);
if(res == EOF)
break;
if(strcmp(lineHeader, "v") == 0)
{
float vertex[3] = {0,0,0};
fscanf(file, "%f %f %f\n",
&vertex[0], &vertex[1], &vertex[2]);
vertices[iv] = vertex[0];
vertices[iv+1] = vertex[1];
vertices[iv+2] = vertex[2];
iv += 3;
}
else if(strcmp(lineHeader, "vt") == 0)
{
float tex_vert[2] = {0,0};
fscanf(file, "%f %f\n",
&tex_vert[0], &tex_vert[1]);
texturas[it] = tex_vert[0];
texturas[it+1] = tex_vert[1];
it += 2;
}
else if(strcmp(lineHeader, "vn") == 0)
{
float normal[3] = {0,0,0};
fscanf(file, "%f %f %f\n",
&normal[0], &normal[1], &normal[2]);
normales[in] = normal[0];
normales[in+1] = normal[1];
normales[in+2] = normal[2];
in += 3;
}
else if(strcmp(lineHeader, "f") == 0)
{
unsigned int vertexIndex[9] = {0,0,0,0,0,0,0,0,0};
fscanf(file, "%d/%d/%d %d/%d/%d %d/%d/%d\n",
&vertexIndex[0], &vertexIndex[1], &vertexIndex[2],
&vertexIndex[3], &vertexIndex[4], &vertexIndex[5],
&vertexIndex[6], &vertexIndex[7], &vertexIndex[8]);
vertTriangulos[ifa] = vertexIndex[0];
vertTriangulos[ifa+1] = vertexIndex[1];
vertTriangulos[ifa+2] = vertexIndex[2];
vertTriangulos[ifa+3] = vertexIndex[3];
vertTriangulos[ifa+4] = vertexIndex[4];
vertTriangulos[ifa+5] = vertexIndex[5];
vertTriangulos[ifa+6] = vertexIndex[6];
vertTriangulos[ifa+7] = vertexIndex[7];
vertTriangulos[ifa+8] = vertexIndex[8];
ifa += 9;
nTriangulos++;
}
}
}
And this is my draw function:
void draw()
{
glClear(GL_COLOR_BUFFER_BIT);
glMatrixMode(GL_MODELVIEW);
glEnableClientState(GL_VERTEX_ARRAY);
glEnableClientState(GL_TEXTURE_COORD_ARRAY);
glEnableClientState(GL_NORMAL_ARRAY);
glVertexPointer(3, GL_FLOAT, 0, vertices);
glTexCoordPointer(2, GL_FLOAT, 0, texturas);
glNormalPointer(GL_FLOAT, 0, normales);
glDrawElements(GL_TRIANGLES, nTriangulos*9, GL_UNSIGNED_INT, vertTriangulos);
glDisableClientState(GL_VERTEX_ARRAY);
glDisableClientState(GL_TEXTURE_COORD_ARRAY);
glDisableClientState(GL_NORMAL_ARRAY);
}
Being the variables:
long nTriangulos;
float *vertices, *normales, *texturas;
unsigned int *vertTriangulos;
The obj file I'm using is a simple cube, but the mesh I'm rendering is a complete disaster.
EDIT:
I have changed my code and now the parser looks like this:
if(strcmp(lineHeader, "f") == 0)
{
unsigned int vertexIndex[9] = {0,0,0,0,0,0,0,0,0};
fscanf(file, "%d/%d/%d %d/%d/%d %d/%d/%d\n",
&vertexIndex[0], &vertexIndex[1], &vertexIndex[2],
&vertexIndex[3], &vertexIndex[4], &vertexIndex[5],
&vertexIndex[6], &vertexIndex[7], &vertexIndex[8]);
for(int i = 0; i < 9; i += 3)
{
bool cierto = true;
int pos = 0;
if(indexbuff.size() > 0)
{
for(int i = 0; i < registro.size(); i++)
{
if(vertexIndex[0] == registro[i].in1
&& vertexIndex[1] == registro[i].in2
&& vertexIndex[2] == registro[i].in3)
{
cierto = false;
pos = i;
}
}
}
if(cierto)
{
verticebuff.push_back(vertices[vertexIndex[i]]);
verticebuff.push_back(vertices[vertexIndex[i]+1]);
verticebuff.push_back(vertices[vertexIndex[i]+2]);
textbuff.push_back(texturas[vertexIndex[i+1]]);
textbuff.push_back(texturas[vertexIndex[i+1]+1]);
normalbuff.push_back(normales[vertexIndex[i+2]]);
normalbuff.push_back(normales[vertexIndex[i+2]+1]);
normalbuff.push_back(normales[vertexIndex[i+2]+2]);
indexbuff.push_back(verticebuff.size()/3);
index3 indice;
indice.in1 = vertexIndex[i];
indice.in2 = vertexIndex[i+1];
indice.in3 = vertexIndex[i+2];
registro.push_back(indice);
}
else
{
indexbuff.push_back(i);
}
}
}
But it doesn't seem to work properly.
Classic obj to opengl buffer mistake, the element buffer index points to all attributes. In other words if opengl sees a 0 index it takes the first element out of all the enabled buffers and move on to the next index. There is no way to use the obj structure directly without duplication of the data as you are reading
you face code should be
if(/*vertexIndex[0..2] triplet has not been seen yet*/){
verticebuff.pushback(vertices[vertexIndex[0]]);
verticebuff.pushback(vertices[vertexIndex[0]+1]);
verticebuff.pushback(vertices[vertexIndex[0]+2]);
textbuff.pushback(texturas[vertexIndex[1]]);
textbuff.pushback(texturas[vertexIndex[1]+1]);
normalbuff.pushback(normales[vertexIndex[2]]);
normalbuff.pushback(normales[vertexIndex[2]+1]);
indexbuff.pushback(verticebuff.size()/3);
} else {
indexbuff.pushback(/*index of the seen triplet*/);
}
//repeat 3 times
then you can paint them with draw elements.
My current project has 2 dll's, one called Engine and another called Graphics. The Engine links to the graphics. And then the main program with links to both, but uses the Engine.
In some functions like this:
typedef const char* ce_safechar;
AssimpIntegration::GetData(ce_safechar pFile, ...
this error pops up: _pFirstBlock == pHead, just after i exited the function.
I have seen many different issues and it all seems to point at strings.
How can I avoid such behavior?
Full Function if needed:
FMESH_DATA AssimpIntegration::GetData(ce_safechar pFile, bool convLeftHanded, int maxSM)
{
// Create an instance of the Importer class
Assimp::Importer importer;
// And have it read the given file with some example post processing
// Usually - if speed is not the most important aspect for you - you'll
// probably to request more post processing than we do in this example.
int cls = 0x0;
if (convLeftHanded)
cls = aiProcess_ConvertToLeftHanded;
const aiScene* scene = importer.ReadFile( pFile, aiProcess_GenNormals | aiProcess_FindInstances | aiProcess_CalcTangentSpace | cls | aiProcess_Triangulate );
// If the import failed, report it
if (!scene)
{
char* error = (char*)importer.GetErrorString();
CE_ERROR(error, "Assimp Error");
}
// Now we can access the file's contents.
Group<MESH_STRUCT> Vertices;
Group<DWORD> indices;
//////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////PROCESS MESH///////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////
FMESH_DATA data;
if (scene->HasAnimations())
{
aiAnimation *anim = scene->mAnimations[0];
FOREACH(anim->mChannels[0]->mNumPositionKeys)
{
aiVector3D position = anim->mChannels[0]->mPositionKeys[i].mValue;
CE_ANIMATIONKEY key;
key.position = D3DXVECTOR3(position.x, position.y, position.z);
data.Keys.push_back(key);
}
}
D3DXVECTOR3 norms;
float tanx, tany, tanz;
float bitanx, bitany, bitanz;
FOREACH (scene->mNumMeshes)
{
if (i >= maxSM)
continue;
aiMesh *mesh = scene->mMeshes[i];
Vertices.group.clear();
indices.group.clear(); //dafuck?
if (mesh->HasPositions())
{
for (int v = 0; v != mesh->mNumVertices; v++)
{
norms = D3DXVECTOR3(0,0,0);
if (mesh->HasNormals())
norms = D3DXVECTOR3(mesh->mNormals[v].x,mesh->mNormals[v].y,mesh->mNormals[v].z);
tanx = tany = tanz = 0;
bitanx = bitany = bitanz = 0;
if (mesh->HasTangentsAndBitangents())
{
tanx = mesh->mTangents[v].x; tany = mesh->mTangents[v].y; tanz = mesh->mTangents[v].z;
bitanx = mesh->mBitangents[v].x; bitany = mesh->mBitangents[v].y; bitanz = mesh->mBitangents[v].z;
}
Vertices.push_back(MESH_STRUCT(
mesh->mVertices[v].x,
mesh->mVertices[v].y,
mesh->mVertices[v].z,
norms,
0,
0,
tanx, // TANGENTS
tany,
tanz
));
for (int b = 0; b < mesh->mNumBones; b++)
{
if (b > 4)
break;
float weight = 0.0f;
for ( int w = 0; w < mesh->mBones[b]->mNumWeights; w++)
if (mesh->mBones[b]->mWeights[w].mVertexId == v)
weight = mesh->mBones[b]->mWeights[w].mWeight;
Vertices.back().bWeight[b] = weight;
}
if (mesh->HasTextureCoords(0))
{
Vertices.back().U = mesh->mTextureCoords[0][v].x;
Vertices.back().V = mesh->mTextureCoords[0][v].y;
}
}
for (int f = 0; f != mesh->mNumFaces; f++)
{
for (int index = 0; index != mesh->mFaces[f].mNumIndices; index++)
{
indices.push_back(mesh->mFaces[f].mIndices[index]);
}
}
}
data.meshData.push_back(Vertices);
data.indices.push_back(indices);
// Set the required textures
const aiMaterial* pMaterial = scene->mMaterials[mesh->mMaterialIndex];
if (pMaterial->GetTextureCount(aiTextureType_DIFFUSE) > 0) {
aiString Path;
if (pMaterial->GetTexture(aiTextureType_DIFFUSE, 0, &Path, NULL, NULL, NULL, NULL, NULL) == AI_SUCCESS) {
std::string FullPath = Path.data;
if (FullPath.find("\\") != string::npos)
FullPath = FullPath.substr(FullPath.find_last_of("\\")+1, FullPath.length() - (FullPath.length() - FullPath.find_last_of("\\")-1));
else if (FullPath.find("/") != string::npos)
FullPath = FullPath.substr(FullPath.find_last_of("/")+1, FullPath.length() - FullPath.find_last_of("/")-1);
string rFile = pFile;
string spFile = std::string(pFile);
if (spFile.find("\\") != string::npos)
rFile = spFile.substr(0, spFile.find_last_of("\\")+1);
else
rFile = spFile.substr(0, spFile.find_last_of("/")+1);
FullPath = rFile + FullPath;
data.textures.push_back(FullPath);
}
}
else
{
data.textures.push_back("");
}
}
data.scene = scene;
return data;
}
You are using STL objects as parameters and/or return types of the functions/methods you are exporting on your DLL.
Example: You return FMESH_DATA. This class/struct has STL objects inside, as we can see via line:
data.Keys.push_back(key);
The thing here is: It's not a good idea to have STL stuff being exported or imported via DLLs. It requires the caller of your DLL to use the same CRT of the objects that are created in the DLL.
But if you really wanna do that, you may change the Runtime Library of your projects (all of them) to multi-threaded (/MD). Then you will be able to use STL safely across the cliend and the DLL.