How to achieve efficient 2d batching? - c++

I am trying to implement sprite batching but I am not quite sure how I should do it.
Texture batching is not very hard, I just group everything by texture id but I am not sure how I should handle the vertex data.
I could do it like this
texture.bind();
gl_quad.bind();
for(auto& quad: quads){
send(quad.matrix);
draw();
}
I would just upload 1 quad to the GPU and then send the matrix as a uniform variable and draw the quad but then I would have 1 draw call for every sprite that I want to draw which is probably not very clever.
Alternatively I could let every sprite have 4 vertices and then I would update them on the CPU, then I would gather all sprites and upload all vertices into one big buffer and bind it.
texture.bind();
auto big_buffer = create_vertex_buffers(quads).bind();
draw();
big_buffer.delete();
I could also use instanced rendering. Upload only one quad, every sprite would have a matrix and then upload all matrices into one buffer and call drawIndirect. I would have to send 9 floats instead of 8 (with the big_buffer version) and I think that drawIndirect is much more expensive than a simple drawcommand.
Are there any other ways that I have missed? What would you recommend?

I can show you a few classes that works with batches and their implementations; but they do rely on other classes. This work is protected by copyright found in the header section of each file.
CommonStructs.h
// Version: 1.0
// Copyright (c) 2012 by Marek A. Krzeminski, MASc
// http://www.MarkeKnows.com
#ifndef COMMON_STRUCTS_H
#define COMMON_STRUCTS_H
namespace vmk {
// GuiVertex ------------------------------------------------------------------
struct GuiVertex {
glm::vec2 position;
glm::vec4 color;
glm::vec2 texture;
GuiVertex( glm::vec2 positionIn, glm::vec4 colorIn, glm::vec2 textureIn = glm::vec2() ) :
position( positionIn ),
color( colorIn ),
texture( textureIn )
{}
}; // GuiVertex
// BatchConfig ----------------------------------------------------------------
struct BatchConfig {
unsigned uRenderType;
int iPriority;
unsigned uTextureId;
float fAlpha;
BatchConfig( unsigned uRenderTypeIn, int iPriorityIn, unsigned uTextureIdIn, float fAlphaIn ) :
uRenderType( uRenderTypeIn ),
iPriority( iPriorityIn ),
uTextureId( uTextureIdIn ),
fAlpha( fAlphaIn )
{}
bool operator==( const BatchConfig& other ) const {
if ( uRenderType != other.uRenderType ||
iPriority != other.iPriority ||
uTextureId != other.uTextureId ||
glm::abs( fAlpha - other.fAlpha ) > 0.004f )
{
return false;
}
return true;
}
bool operator!=( const BatchConfig& other ) const {
return !( *this == other );
}
}; // BatchConfig
} // namespace vmk
#endif // COMMON_STRUCTS_H
Batch.h
// Version: 1.0
// Copyright (c) 2012 by Marek A. Krzeminski, MASc
// http://www.MarkeKnows.com
#ifndef BATCH_H
#define BATCH_H
#include "CommonStructs.h"
namespace vmk {
class ShaderManager;
class Settings;
class Batch sealed {
private:
static Settings* m_pSettings;
static ShaderManager* m_pShaderManager;
unsigned m_uMaxNumVertices;
unsigned m_uNumUsedVertices;
unsigned m_vao;
unsigned m_vbo;
BatchConfig m_config;
GuiVertex m_lastVertex;
// For Debugging Only
unsigned m_uId; // Batch Id
std::vector<std::string> m_vIds; // Id's Of What Is Contained In This Batch
public:
Batch( unsigned uId, unsigned uMaxNumVertices );
~Batch();
bool isBatchConfig( const BatchConfig& config ) const;
bool isEmpty() const;
bool isEnoughRoom( unsigned uNumVertices ) const;
Batch* getFullest( Batch* pBatch );
int getPriority() const;
void add( const std::vector<GuiVertex>& vVertices, const BatchConfig& config );
void add( const std::vector<GuiVertex>& vVertices );
void addId( const std::string& strId );
void render();
private:
Batch( const Batch& c ); // Not Implemented
Batch& operator=( const Batch& c ); // Not Implemented
void cleanUp();
}; // Batch
} // namespace vmk
#endif // BATCH_H
Batch.cpp
// Version: 1.0
// Copyright (c) 2012 by Marek A. Krzeminski, MASc
// http://www.MarkeKnows.com
#include "stdafx.h"
#include "Batch.h"
#include "Logger.h"
#include "Property.h"
#include "Settings.h"
#include "ShaderManager.h"
namespace vmk {
Settings* Batch::m_pSettings = nullptr;
ShaderManager* Batch::m_pShaderManager = nullptr;
// ----------------------------------------------------------------------------
// Batch()
Batch::Batch( unsigned uId, unsigned uMaxNumVertices ) :
m_uMaxNumVertices( uMaxNumVertices ),
m_uNumUsedVertices( 0 ),
m_vao( 0 ),
m_vbo( 0 ),
m_config(GL_TRIANGLE_STRIP, 0, 0, 1.0f ),
m_lastVertex( glm::vec2(), glm::vec4() ),
m_uId( uId ) {
if ( nullptr == m_pSettings ) {
m_pSettings = Settings::get();
}
if ( nullptr == m_pShaderManager ) {
m_pShaderManager = ShaderManager::get();
}
// Optimal Size For A Batch Is Between 1-4MB In Size. Number Of Elements That Can Be Stored In A
// Batch Is Determined By Calculating #Bytes Used By Each Vertex
if ( uMaxNumVertices < 1000 ) {
std::ostringstream strStream;
strStream << __FUNCTION__ << " uMaxNumVertices{" << uMaxNumVertices << "} is too small. Choose a number >= 1000 ";
throw ExceptionHandler( strStream );
}
// Clear Error Codes
glGetError();
if ( m_pSettings->getOpenglVersion().x >= 3 ) {
glGenVertexArrays( 1, &m_vao );
glBindVertexArray( m_vao );
}
// Create Batch Buffer
glGenBuffers( 1, &m_vbo );
glBindBuffer( GL_ARRAY_BUFFER, m_vbo );
glBufferData( GL_ARRAY_BUFFER, uMaxNumVertices * sizeof( GuiVertex ), nullptr, GL_STREAM_DRAW );
if ( m_pSettings->getOpenglVersion().x >= 3 ) {
unsigned uOffset = 0;
m_pShaderManager->enableAttribute( A_POSITION, sizeof( GuiVertex ), uOffset );
uOffset += sizeof( glm::vec2 );
m_pShaderManager->enableAttribute( A_COLOR, sizeof( GuiVertex ), uOffset );
uOffset += sizeof( glm::vec4 );
m_pShaderManager->enableAttribute( A_TEXTURE_COORD0, sizeof( GuiVertex ), uOffset );
glBindVertexArray( 0 );
m_pShaderManager->disableAttribute( A_POSITION );
m_pShaderManager->disableAttribute( A_COLOR );
m_pShaderManager->disableAttribute( A_TEXTURE_COORD0 );
}
glBindBuffer( GL_ARRAY_BUFFER, 0 );
if ( GL_NO_ERROR != glGetError() ) {
cleanUp();
throw ExceptionHandler( __FUNCTION__ + std::string( " failed to create batch" ) );
}
} // Batch
// ----------------------------------------------------------------------------
// ~Batch()
Batch::~Batch() {
cleanUp();
} // ~Batch
// ----------------------------------------------------------------------------
// cleanUp()
void Batch::cleanUp() {
if ( m_vbo != 0 ) {
glBindBuffer( GL_ARRAY_BUFFER, 0 );
glDeleteBuffers( 1, &m_vbo );
m_vbo = 0;
}
if ( m_vao != 0 ) {
glBindVertexArray( 0 );
glDeleteVertexArrays( 1, &m_vao );
m_vao = 0;
}
} // cleanUp
// ----------------------------------------------------------------------------
// isBatchConfig()
bool Batch::isBatchConfig( const BatchConfig& config ) const {
return ( config == m_config );
} // isBatchConfigh
// ----------------------------------------------------------------------------
// isEmpty()
bool Batch::isEmpty() const {
return ( 0 == m_uNumUsedVertices );
} // isEmpty
// ----------------------------------------------------------------------------
// isEnoughRoom()
// Returns True If The Number Of Vertices Passed In Can Be Stored In This Batch
// Without Reaching The Limit Of How Many Vertices Can Fit In The Batch
bool Batch::isEnoughRoom( unsigned uNumVertices ) const {
// 2 Extra Vertices Are Needed For Degenerate Triangles Between Each Strip
unsigned uNumExtraVertices = ( GL_TRIANGLE_STRIP == m_config.uRenderType && m_uNumUsedVertices > 0 ? 2 : 0 );
return ( m_uNumUsedVertices + uNumExtraVertices + uNumVertices <= m_uMaxNumVertices );
} // isEnoughRoom
// ----------------------------------------------------------------------------
// getFullest()
// Returns The Batch That Contains The Most Number Of Stored Vertices Between
// This Batch And The One Passed In
Batch* Batch::getFullest( Batch* pBatch ) {
return ( m_uNumUsedVertices > pBatch->m_uNumUsedVertices ? this : pBatch );
} // getFullest
// ----------------------------------------------------------------------------
// getPriority()
int Batch::getPriority() const {
return m_config.iPriority;
} // getPriority
// ----------------------------------------------------------------------------
// add()
// Adds Vertices To Batch And Also Sets The Batch Config Options
void Batch::add( const std::vector<GuiVertex>& vVertices, const BatchConfig& config ) {
m_config = config;
add( vVertices );
} // add
// ----------------------------------------------------------------------------
// add()
void Batch::add( const std::vector<GuiVertex>& vVertices ) {
// 2 Extra Vertices Are Needed For Degenerate Triangles Between Each Strip
unsigned uNumExtraVertices = ( GL_TRIANGLE_STRIP == m_config.uRenderType && m_uNumUsedVertices > 0 ? 2 : 0 );
if ( uNumExtraVertices + vVertices.size() > m_uMaxNumVertices - m_uNumUsedVertices ) {
std::ostringstream strStream;
strStream << __FUNCTION__ << " not enough room for {" << vVertices.size() << "} vertices in this batch. Maximum number of vertices allowed in a batch is {" << m_uMaxNumVertices << "} and {" << m_uNumUsedVertices << "} are already used";
if ( uNumExtraVertices > 0 ) {
strStream << " plus you need room for {" << uNumExtraVertices << "} extra vertices too";
}
throw ExceptionHandler( strStream );
}
if ( vVertices.size() > m_uMaxNumVertices ) {
std::ostringstream strStream;
strStream << __FUNCTION__ << " can not add {" << vVertices.size() << "} vertices to batch. Maximum number of vertices allowed in a batch is {" << m_uMaxNumVertices << "}";
throw ExceptionHandler( strStream );
}
if ( vVertices.empty() ) {
std::ostringstream strStream;
strStream << __FUNCTION__ << " can not add {" << vVertices.size() << "} vertices to batch.";
throw ExceptionHandler( strStream );
}
// Add Vertices To Buffer
if ( m_pSettings->getOpenglVersion().x >= 3 ) {
glBindVertexArray( m_vao );
}
glBindBuffer( GL_ARRAY_BUFFER, m_vbo );
if ( uNumExtraVertices > 0 ) {
// Need To Add 2 Vertex Copies To Create Degenerate Triangles Between This Strip
// And The Last Strip That Was Stored In The Batch
glBufferSubData( GL_ARRAY_BUFFER, m_uNumUsedVertices * sizeof( GuiVertex ), sizeof( GuiVertex ), &m_lastVertex );
glBufferSubData( GL_ARRAY_BUFFER, ( m_uNumUsedVertices + 1 ) * sizeof( GuiVertex ), sizeof( GuiVertex ), &vVertices[0] );
}
// TODO: Use glMapBuffer If Moving Large Chunks Of Data > 1MB
glBufferSubData( GL_ARRAY_BUFFER, ( m_uNumUsedVertices + uNumExtraVertices ) * sizeof( GuiVertex ), vVertices.size() * sizeof( GuiVertex ), &vVertices[0] );
if ( m_pSettings->getOpenglVersion().x >= 3 ) {
glBindVertexArray( 0 );
}
glBindBuffer( GL_ARRAY_BUFFER, 0 );
m_uNumUsedVertices += vVertices.size() + uNumExtraVertices;
m_lastVertex = vVertices[vVertices.size() - 1];
} // add
// ----------------------------------------------------------------------------
// addId()
void Batch::addId( const std::string& strId ) {
m_vIds.push_back( strId );
} // addId
// ----------------------------------------------------------------------------
// render()
void Batch::render() {
if ( m_uNumUsedVertices == 0 ) {
// Nothing In This Buffer To Render
return;
}
bool usingTexture = INVALID_UNSIGNED != m_config.uTextureId;
m_pShaderManager->setUniform( U_USING_TEXTURE, usingTexture );
if ( usingTexture ) {
m_pShaderManager->setTexture( 0, U_TEXTURE0_SAMPLER_2D, m_config.uTextureId );
}
m_pShaderManager->setUniform( U_ALPHA, m_config.fAlpha );
// Draw Contents To Buffer
if ( m_pSettings->getOpenglVersion().x >= 3 ) {
glBindVertexArray( m_vao );
glDrawArrays( m_config.uRenderType, 0, m_uNumUsedVertices );
glBindVertexArray( 0 );
} else { // OpenGL v2.x
glBindBuffer( GL_ARRAY_BUFFER, m_vbo );
unsigned uOffset = 0;
m_pShaderManager->enableAttribute( A_POSITION, sizeof( GuiVertex ), uOffset );
uOffset += sizeof( glm::vec2 );
m_pShaderManager->enableAttribute( A_COLOR, sizeof( GuiVertex ), uOffset );
uOffset += sizeof( glm::vec4 );
m_pShaderManager->enableAttribute( A_TEXTURE_COORD0, sizeof( GuiVertex ), uOffset );
glDrawArrays( m_config.uRenderType, 0, m_uNumUsedVertices );
m_pShaderManager->disableAttribute( A_POSITION );
m_pShaderManager->disableAttribute( A_COLOR );
m_pShaderManager->disableAttribute( A_TEXTURE_COORD0 );
glBindBuffer( GL_ARRAY_BUFFER, 0 );
}
if ( m_pSettings->isDebugLoggingEnabled( Settings::DEBUG_RENDER ) ) {
std::ostringstream strStream;
strStream << std::setw( 2 ) << m_uId << " | "
<< std::left << std::setw( 10 );
if ( GL_LINES == m_config.uRenderType ) {
strStream << "Lines";
} else if ( GL_TRIANGLES == m_config.uRenderType ) {
strStream << "Triangles";
} else if ( GL_TRIANGLE_STRIP == m_config.uRenderType ) {
strStream << "Tri Strips";
} else if ( GL_TRIANGLE_FAN == m_config.uRenderType ) {
strStream << "Tri Fan";
} else {
strStream << "Unknown";
}
strStream << " | " << std::right
<< std::setw( 6 ) << m_config.iPriority << " | "
<< std::setw( 7 ) << m_uNumUsedVertices << " | "
<< std::setw( 5 );
if ( INVALID_UNSIGNED != m_config.uTextureId ) {
strStream << m_config.uTextureId;
} else {
strStream << "None";
}
strStream << " |";
for each( const std::string& strId in m_vIds ) {
strStream << " " << strId;
}
m_vIds.clear();
Logger::log( strStream );
}
// Reset Buffer
m_uNumUsedVertices = 0;
m_config.iPriority = 0;
} // render
} // namespace vmk
BatchManager.h
// Version: 1.0
// Copyright (c) 2012 by Marek A. Krzeminski, MASc
// http://www.MarekKnows.com
#ifndef BATCH_MANAGER_H
#define BATCH_MANAGER_H
#include "Singleton.h"
#include "CommonStructs.h"
namespace vmk {
class Batch;
class BatchManager sealed : public Singleton {
private:
std::vector<std::shared_ptr<Batch>> m_vBatches;
unsigned m_uNumBatches;
unsigned m_maxNumVerticesPerBatch;
public:
BatchManager( unsigned uNumBatches, unsigned numVerticesPerBatch );
virtual ~BatchManager();
static BatchManager* const get();
void render( const std::vector<GuiVertex>& vVertices, const BatchConfig& config, const std::string& strId );
void emptyAll();
protected:
private:
BatchManager( const BatchManager& c ); // Not Implemented
BatchManager& operator=( const BatchManager& c); // Not Implemented
void emptyBatch( bool emptyAll, Batch* pBatchToEmpty );
//void renderBatch( const std::vector<GuiVertex>& vVertices, const BatchConfig& config );
}; // BatchManager
} // namespace vmk
#endif // BATCH_MANAGER_H
BatchManager.cpp
// Version: 1.0
// Copyright (c) 2012 by Marek A. Krzeminski, MASc
// http://www.MarekKnows.com
#include "stdafx.h"
#include "BatchManager.h"
#include "Batch.h"
#include "Logger.h"
#include "Settings.h"
namespace vmk {
static BatchManager* s_pBatchManager = nullptr;
static Settings* s_pSettings = nullptr;
// ----------------------------------------------------------------------------
// BatchManager()
BatchManager::BatchManager( unsigned uNumBatches, unsigned numVerticesPerBatch ) :
Singleton( TYPE_BATCH_MANAGER ),
m_uNumBatches( uNumBatches ),
m_maxNumVerticesPerBatch( numVerticesPerBatch ) {
// Test Input Parameters
if ( uNumBatches < 10 ) {
std::ostringstream strStream;
strStream << __FUNCTION__ << " uNumBatches{" << uNumBatches << "} is too small. Choose a number >= 10 ";
throw ExceptionHandler( strStream );
}
// A Good Size For Each Batch Is Between 1-4MB In Size. Number Of Elements That Can Be Stored In A
// Batch Is Determined By Calculating #Bytes Used By Each Vertex
if ( numVerticesPerBatch < 1000 ) {
std::ostringstream strStream;
strStream << __FUNCTION__ << " numVerticesPerBatch{" << numVerticesPerBatch << "} is too small. Choose A Number >= 1000 ";
throw ExceptionHandler( strStream );
}
// Create Desired Number Of Batches
m_vBatches.reserve( uNumBatches );
for ( unsigned u = 0; u < uNumBatches; ++u ) {
m_vBatches.push_back( std::shared_ptr<Batch>( new Batch( u, numVerticesPerBatch ) ) );
}
s_pSettings = Settings::get();
s_pBatchManager = this;
} // BatchManager
// ----------------------------------------------------------------------------
// ~BatchManager()
BatchManager::~BatchManager() {
s_pBatchManager = nullptr;
m_vBatches.clear();
} // ~BatchManager
// ----------------------------------------------------------------------------
// get()
BatchManager* const BatchManager::get() {
if ( nullptr == s_pBatchManager ) {
throw ExceptionHandler( __FUNCTION__ + std::string( " failed, BatchManager has not been constructed yet" ) );
}
return s_pBatchManager;
} // get
// ----------------------------------------------------------------------------
// render()
void BatchManager::render( const std::vector<GuiVertex>& vVertices, const BatchConfig& config, const std::string& strId ) {
Batch* pEmptyBatch = nullptr;
Batch* pFullestBatch = m_vBatches[0].get();
// Determine Which Batch To Put The Vertices Into
for ( unsigned u = 0; u < m_uNumBatches; ++u ) {
Batch* pBatch = m_vBatches[u].get();
if ( pBatch->isBatchConfig( config ) ) {
if ( !pBatch->isEnoughRoom( vVertices.size() ) ) {
// First Need To Empty This Batch Before Adding Anything To It
emptyBatch( false, pBatch );
if ( s_pSettings->isDebugLoggingEnabled( Settings::DEBUG_RENDER ) ) {
Logger::log( "Forced batch to empty to make room for vertices" );
}
}
if ( s_pSettings->isDebugLoggingEnabled( Settings::DEBUG_RENDER ) ) {
pBatch->addId( strId );
}
pBatch->add( vVertices );
return;
}
// Store Pointer To First Empty Batch
if ( nullptr == pEmptyBatch && pBatch->isEmpty() ) {
pEmptyBatch = pBatch;
}
// Store Pointer To Fullest Batch
pFullestBatch = pBatch->getFullest( pFullestBatch );
}
// If We Get Here Then We Didn't Find An Appropriate Batch To Put The Vertices Into
// If We Have An Empty Batch, Put Vertices There
if ( nullptr != pEmptyBatch ) {
if ( s_pSettings->isDebugLoggingEnabled( Settings::DEBUG_RENDER ) ) {
pEmptyBatch->addId( strId );
}
pEmptyBatch->add( vVertices, config );
return;
}
// No Empty Batches Were Found Therefore We Must Empty One First And Then We Can Use It
emptyBatch( false, pFullestBatch );
if ( s_pSettings->isDebugLoggingEnabled( Settings::DEBUG_RENDER ) ) {
Logger::log( "Forced fullest batch to empty to make room for vertices" );
pFullestBatch->addId( strId );
}
pFullestBatch->add( vVertices, config );
} // render
// ----------------------------------------------------------------------------
// emptyAll()
void BatchManager::emptyAll() {
emptyBatch( true, m_vBatches[0].get() );
if ( s_pSettings->isDebugLoggingEnabled( Settings::DEBUG_RENDER ) ) {
Logger::log( "Forced all batches to empty" );
}
} // emptyAll
// ----------------------------------------------------------------------------
// CompareBatch
struct CompareBatch : public std::binary_function<Batch*, Batch*, bool> {
bool operator()( const Batch* pBatchA, const Batch* pBatchB ) const {
return ( pBatchA->getPriority() > pBatchB->getPriority() );
} // operator()
}; // CompareFunctor
// ----------------------------------------------------------------------------
// emptyBatch()
// Empties The Batches According To Priority. If emptyAll() Is False Then
// Only Empty The Batches That Are Lower Priority Than The One Specified
// AND Also Empty The One That Is Passed In
void BatchManager::emptyBatch( bool emptyAll, Batch* pBatchToEmpty ) {
// Sort Bathes By Priority
std::priority_queue<Batch*, std::vector<Batch*>, CompareBatch> queue;
for ( unsigned u = 0; u < m_uNumBatches; ++u ) {
// Add All Non-Empty Batches To Queue Which Will Be Sorted By Order
// From Lowest To Highest Priority
if ( !m_vBatches[u]->isEmpty() ) {
if ( emptyAll ) {
queue.push( m_vBatches[u].get() );
} else if ( m_vBatches[u]->getPriority() < pBatchToEmpty->getPriority() ) {
// Only Add Batches That Are Lower In Priority
queue.push( m_vBatches[u].get() );
}
}
}
// Render All Desired Batches
while ( !queue.empty() ) {
Batch* pBatch = queue.top();
pBatch->render();
queue.pop();
}
if ( !emptyAll ) {
// When Not Emptying All The Batches, We Still Want To Empty
// The Batch That Is Passed In, In Addition To All Batches
// That Have Lower Priority Than It
pBatchToEmpty->render();
}
} // emptyBatch
} // namespace vmk
Now these classes will not compile directly for they depend and rely on other class objects: Settings, Properties, ShaderManager, Logger, And those objects depend on other objects as well. This is coming from a large scale working OpenGL Graphics Rendering & Game Engine using OpenGL Shaders. This is working source code, optimally bug free.
This may serve as a guide as to how one would design a batch process. And may give insight into the things to consider for example: The types of vertices being rendering { Lines, Triangles, TriangleStrip, TriangleFan etc. }, Priority of where to draw an object based on if it has transparencies or not, Handling Degenerate Triangles with the vertices when creating a batch object.
The way that this is designed is that only matching batch types will fit in the same bucket, and the bucket will try to fill itself, if it is too full to hold the vertices it will then look for another bucket to see if one is available, if no buckets are available it will then search to see which is the fullest and it will empty them from the priority queue to send the vertices to the video card to be rendered.
This is tied into a ShaderManager that manages how OpenGL defines and sets up shader programs and linking them to a program, it is also tied in to an AssetStorage class which is not found here but found in the ShaderManager. This system handles a complete custom GUI, Sprites, Fonts, Textures etc.
If you would like to learn more I would highly suggest visiting www.MarekKnows.com and checking out his Video Tutorial Series on OpenGL; for this specific application you would need to follow his Shader Engine series!

It's worth noting that sprite rendering is only really expensive from the standpoint of the context switches in between each sprite rendered. Rendering a quad for each sprite tends to be a trivial expense in comparison.
Instancing here of the geometry data is likely to hinder more than help, since the cost of using a separate transformation matrix per quad rendered tends to outweigh the expense of just uploading a fresh set of vertex attributes per quad. Instancing works best when you have at least moderately complex geometry, like hundreds to thousands of vertices.
Typically if speed is your primary goal, the top priority here is to coalesce your texture data into "sprite sheet"-style texture atlases. The first goal is to have as few texture context switches as possible, and typically far beyond a separate texture image per sprite/frame. This would also make instancing further impractical because each quad or pair of triangles you render would then tend to vary wildly in terms of their texture coordinates.
If you actually reach this point where you have as few texture context switches as possible and want more speed for a bunch of dynamic sprites, then the next practical step (but with diminishing returns) might be to use streaming VBOs. You can fill a streaming VBO with the vertex attributes required to render the tris/quads for the current frame (with different vertex positions and texture coordinates) and then draw the VBO. For the best performance, it might help to chunk the VBOs and not fill them with all the geometry data of your entire scene per frame with a strategy where you fill and draw, fill and draw, fill and draw multiple times per frame.
Nevertheless, since you asked about instancing (which implies to me that you're using a separate image per sprite), your first and biggest gain will probably come from using texture atlases and reducing the texture context switches even further. The geometry-side optimization is a totally separate process and you might do fine for quite a while even using immediate mode here. It would be towards a finishing touch for optimization where you start optimizing that towards streaming VBOs.
This is all with the assumption of dynamic sprites that either move around on the screen or change images. For static tile-style images that never change, you can store their vertex attributes into a static VBO and potentially benefit from instancing (but there we're instancing a boatload of tiles per static VBO, and therefore each static VBO might have hundreds to thousands of vertices each).

Related

Rendering Multiline Text with NVPath Extension and Pango

I'm using Pango to layout my text and NV Path to render glyphs.
Having difficulty in finding correct methods for getting per glyph positions. As you can see at the moment I'm calculating this values according to line and glyph indexes.
But Pango has better methods for this; like per glyph, per line, extent queries. My problem is that this methods got no documentation and I wasn't able to find any samples.
How can i get correct glyph positions from Pango for this type of application?
std::vector<uint32_t> glyphs;
std::vector<GLfloat> positions;
int lineCount = pango_layout_get_line_count( pangoLayout );
for ( int l = 0; l < lineCount; ++l )
{
PangoLayoutLine* line = pango_layout_get_line_readonly( pangoLayout, l );
GSList* runs = line->runs;
float xOffset = 0.0f;
while( runs )
{
PangoLayoutRun* run = static_cast<PangoLayoutRun*>( runs->data );
glyphs.resize( run->glyphs->num_glyphs, 0 );
positions.resize( run->glyphs->num_glyphs * 2, 0 );
for( int g = 0; g < run->glyphs->num_glyphs; ++g )
{
glyphs[g] = run->glyphs->glyphs[g].glyph;
// Need Correct Values Here
positions[ g * 2 + 0 ] = xOffset * NVPATH_DEFUALT_EMSCALE;
positions[ g * 2 + 1 ] = (float)l * NVPATH_DEFUALT_EMSCALE;
xOffset += PANGO_PIXELS( run->glyphs->glyphs[g].geometry.width ) / getFontSize();
}
const Font::RefT font = getFont( pango_font_description_get_family( pango_font_describe( run->item->analysis.font ) ) );
glEnable( GL_STENCIL_TEST );
glStencilFillPathInstancedNV( run->glyphs->num_glyphs,
GL_UNSIGNED_INT,
&glyphs[0],
font->nvPath,
GL_PATH_FILL_MODE_NV,
0xFF,
GL_TRANSLATE_2D_NV,
&positions[0]
);
glStencilFunc( GL_NOTEQUAL, 0, 0xFF );
glStencilOp( GL_KEEP, GL_KEEP, GL_ZERO );
glColor3f( 0.0, 0.0, 0.0 );
glCoverFillPathInstancedNV( run->glyphs->num_glyphs,
GL_UNSIGNED_INT,
&glyphs[0],
font->nvPath,
GL_BOUNDING_BOX_OF_BOUNDING_BOXES_NV,
GL_TRANSLATE_2D_NV,
&positions[0]
);
glDisable( GL_STENCIL_TEST );
runs = runs->next;
}
}

Cocos2d-x Polymorphism

I am currently working on subdividing my cocos2dx-cpp game into a more modular system. I want to have one layer to receive all Touches and direct those touches to the affected CCSprite-derived objects.
The derived objects are stored in a CCArray in an EntityManager (which helps me create and manage the entities).
The problem I am facing is that I can't seem to access the correct virtual method for my derived CCSprites.
Here is the code from my Touch layer (called TouchManager):
void TouchManager::ccTouchesBegan( cocos2d::CCSet* pTouches , cocos2d::CCEvent* pEvents )
{
cocos2d::CCSetIterator i;
cocos2d::CCTouch* touch;
cocos2d::CCPoint tap;
auto entities = EntityManager::sharedManager()->getVisibleEntities();
for ( i = pTouches->begin() ; i != pTouches->end() ; ++i )
{
touch = ( cocos2d::CCTouch* ) ( *i );
if ( touch )
{
tap = touch->getLocation();
for ( unsigned int entityIndex = 0 ; entityIndex < entities->size() ; ++entityIndex )
{
auto entity = entities->at( entityIndex );
// OLD: auto entity = ( TouchableSprite* )entities->objectAtIndex( entityIndex );
if ( entity->boundingBox().containsPoint( tap ) )
{
entity->setTouch( touch );
entity->onTouch( tap );
}
}
}
}
}
I want to have the TouchManager detect the entity that has been touched, and send the Touch to it. But there is my problem: it detects the touch but doesn't send it further. Either I have a crash or nothing at all.
I have created a Touchable interface class:
#include "cocos2d.h"
class Touchable : public cocos2d::CCSprite
{
cocos2d::CCTouch* m_pTouch;
public:
virtual cocos2d::CCTouch* getTouch();
virtual void setTouch( cocos2d::CCTouch* touch );
virtual void onTouch( cocos2d::CCPoint location ) = 0 ;
virtual void onMoved( cocos2d::CCPoint location ) = 0 ;
virtual void onEnded( cocos2d::CCPoint location ) = 0 ;
};
as well as a TouchableSprite base class:
#include "cocos2d.h"
#include "Touchable.h"
class TouchableSprite : public Touchable
{
//cocos2d::CCTouch* m_pTouch;
public:
//virtual cocos2d::CCTouch* getTouch();
//virtual void setTouch( cocos2d::CCTouch* touch );
static TouchableSprite* createSpriteWithFile( const char* fileName );
void resetPosition( float positionX = 0.0f , float positionY = 0.0f );
virtual void onTouch( cocos2d::CCPoint location ) ;
virtual void onMoved( cocos2d::CCPoint location ) ;
virtual void onEnded( cocos2d::CCPoint location ) ;
TouchableSprite(void);
~TouchableSprite(void);
};
with simple implementation (TouchableSprite.cpp):
#include "TouchableSprite.h"
TouchableSprite::TouchableSprite(void)
{
}
TouchableSprite::~TouchableSprite(void)
{
}
TouchableSprite* TouchableSprite::createSpriteWithFile( const char* fileName )
{
auto sprite = new TouchableSprite();
if ( sprite && sprite->initWithFile( fileName ) )
{
sprite->autorelease();
return ( TouchableSprite* )sprite;
}
CC_SAFE_DELETE( sprite );
// should not reach this point
return NULL;
}
void TouchableSprite::resetPosition( float positionX , float positionY )
{
this->setPosition( ccp( positionX , positionY ) );
}
void TouchableSprite::onTouch( cocos2d::CCPoint location )
{
}
void TouchableSprite::onMoved( cocos2d::CCPoint location )
{
}
void TouchableSprite::onEnded( cocos2d::CCPoint location )
{
}
And finally, here's my derived class (in this case, ControlStickSprite):
#include "cocos2d.h"
#include "RenderSystem.h"
#include "EntityManager.h"
#include "TouchableSprite.h"
class ControlStickSprite : public TouchableSprite
{
ControlStickSprite* m_sprite;
public:
cocos2d::CCNode* create( cocos2d::CCNode* parent );
void onTouch( cocos2d::CCPoint location ) ;
void onMoved( cocos2d::CCPoint location ) ;
void onEnded( cocos2d::CCPoint location ) ;
ControlStickSprite(void);
~ControlStickSprite(void);
};
with simple implementation for testing (skipping the Create part because it works):
void ControlStickSprite::onTouch( cocos2d::CCPoint location )
{
this->setScale( 0.5f );
}
void ControlStickSprite::onMoved( cocos2d::CCPoint location )
{
this->setPosition( location );
}
void ControlStickSprite::onEnded( cocos2d::CCPoint location )
{
}
Please help me get this working! I'm not too familiar with the usage of virtual methods so maybe I missed something there. I'm also relatively new to C++ and cocos2dx programming.
Thanks in advance!
EDIT: Thanks to #musikov for fixing the first part! I updated the above code to reflect the changes. I replaced the CCArray with std::vector< TouchableSprite* > to eliminate the need for casting the from CCObject*.
Now, I am facing the problem that when touched, ControlStickSprite::onTouch() is never chosen; it's always TouchableSprite::onTouch().
Added ControlStickSprite::create and EntityManager::createEntity methods:
My ControlStickSprite::create() method is like this:
ControlStickSprite* ControlStickSprite::create( cocos2d::CCNode* parent )
{
// auto parent = this->getParent();
auto entityType = "control-stick";
auto scale = 6.0f;
auto rotation = 0.0f;
auto positionX = RenderSystem::sharedRenderSystem()->getScreenWidth() * 0.9f ;
auto positionY = RenderSystem::sharedRenderSystem()->getScreenHeight() * 0.25f ;
auto sprite = EntityManager::sharedManager()->createEntity(
parent ,
entityType ,
scale ,
rotation ,
positionX ,
positionY
);
m_sprite = ( ControlStickSprite* )sprite;
return m_sprite;
}
which makes use of my EntityManager:
cocos2d::CCNode* EntityManager::createEntity( cocos2d::CCNode* parent , const char* entityType , float scale , float rotation , float positionX , float positionY )
{
std::string extension = ".png";
std::string fileName = entityType + extension;
auto entity = TouchableSprite::createSpriteWithFile( fileName.c_str() );
entity->setRotation( rotation );
entity->setScale( scale );
entity->resetPosition( positionX , positionY );
parent->addChild( entity );
// add to VisibleEntities vector
this->addEntity( entity , true );
return entity;
}
The only thing I can think of is that the createEntity() method creates a TouchableSprite* but returns a CCNode*, which I then cast to a ControlStickSprite*. Am I doing this wrong again? :)
Thanks for all your help!
You have wrong create method implementation in TouchableSprite and maybe in ControlStickSprite too.
Your create method creates Sprite instance and casts it to TouchableSprite class. It's completely wrong :)
That's why your program crashed on setTouch method call - because your calling this method in Sprite instance.
You need to change your create method:
TouchableSprite* TouchableSprite::createSpriteWithFile( const char* fileName )
{
auto sprite = new TouchableSprite();
if ( sprite && sprite->initWithFile( fileName ) )
{
sprite->autorelease();
return sprite;
}
CC_SAFE_DELETE( sprite );
// should not reach this point
return NULL;
}
Added controlstick implementation
ControlStickSprite.h
#include "cocos2d.h"
#include "RenderSystem.h"
#include "EntityManager.h"
#include "TouchableSprite.h"
class ControlStickSprite : public TouchableSprite
{
public:
static ControlStickSprite* create();
bool init();
void onTouch( cocos2d::CCPoint location ) ;
void onMoved( cocos2d::CCPoint location ) ;
void onEnded( cocos2d::CCPoint location ) ;
ControlStickSprite(void);
~ControlStickSprite(void);
};
ControlStickSprite.cpp
bool ControlStickSprite::init()
{
auto entityType = "control-stick";
std::string extension = ".png";
std::string fileName = entityType + extension;
if (initWithFile( fileName.c_str() )) {
// auto parent = this->getParent();
auto scale = 6.0f;
auto rotation = 0.0f;
auto positionX = RenderSystem::sharedRenderSystem()->getScreenWidth() * 0.9f ;
auto positionY = RenderSystem::sharedRenderSystem()->getScreenHeight() * 0.25f ;
sprite->setRotation( rotation );
sprite->setScale( scale );
sprite->resetPosition( positionX , positionY );
return true;
}
return false;
}
ControlStickSprite* ControlStickSprite::create()
{
auto sprite = new ControlStickSprite();
if ( sprite && sprite->init() )
{
sprite->autorelease();
return sprite;
}
CC_SAFE_DELETE( sprite );
// should not reach this point
return NULL;
}
EntityManager:
void EntityManager::addEntity( cocos2d::CCNode* parent , TouchableSprite* entity )
{
parent->addChild( entity );
// add to VisibleEntities vector
this->addEntity( entity , true );
}
Remember to call EntityManager::addEntity(parent, entity) after ControlStickSprite::create(), if you decide to use this solution

DXGI_ERROR_DEVICE_HUNG resulting from C++AMP method

I am trying to implement a function which calculates the weightings and abscissae for the Gauss-Laguerre numerical integration method using C++AMP to parallelize the process and when running it I am getting a DXGI_ERROR_DEVICE_HUNG error.
This is my helper method for computing the logarithm of the gamma function on the GPU:
template <typename T>
T gammaln_fast( T tArg ) restrict( amp )
{
const T tCoefficients[] = { T( 57.1562356658629235f ), T( -59.5979603554754912f ),
T( 14.1360979747417471f ), T( -0.491913816097620199f ), T( 0.339946499848118887E-4f ),
T( 0.465236289270485756E-4f ), T( -0.983744753048795646E-4f ), T( 0.158088703224912494E-3f ),
T( -0.210264441724104883E-3f ), T( 0.217439618115212643E-3f ), T( -0.164318106536763890E-3f ),
T( 0.844182239838527433E-4f ), T( -0.261908384015814087E-4f ), T( 0.386991826595316234E-5f ) };
T y = tArg, tTemp = tArg + T( 5.2421875f );
tTemp = (tArg + T( 0.5f )) * concurrency::fast_math::log( tTemp ) - tTemp;
T tSer = T( 0.999999999999997092f );
for( std::size_t s = 0; s < (sizeof( tCoefficients ) / sizeof( T )); ++s )
{
tSer += tCoefficients[s] / ++y;
}
return tTemp + concurrency::fast_math::log( T( 2.5066282746310005f ) * tSer / tArg );
}
And here is my function which computes the weights and abscissae:
template <typename T>
ArrayPair<T> CalculateGaussLaguerreWeights_fast( const T tExponent, const std::size_t sNumPoints, T tEps = std::numeric_limits<T>::epsilon() )
{
static_assert(std::is_floating_point<T>::value, "You can only instantiate this function with a floating point data type");
static_assert(!std::is_same<T, long double>::value, "You can not instantiate this function with long double type"); // The long double type is not currently supported by C++AMP
T tCurrentGuess, tFatherGuess, tGrandFatherGuess;
std::vector<T> vecInitialGuesses( sNumPoints );
for( std::size_t s = 0; s < sNumPoints; ++s )
{
if( s == 0 )
{
tCurrentGuess = (T( 1.0f ) + tExponent) * (T( 3.0f ) + T( 0.92f ) * tExponent) / (T( 1.0f ) + T( 2.4f ) * sNumPoints + T( 1.8f ) * tExponent);
}
else if( s == 1 )
{
tFatherGuess = tCurrentGuess;
tCurrentGuess += (T( 15.0f ) + T( 6.25f ) * tExponent) / (T( 1.0f ) + T( 0.9f ) * tExponent + T( 2.5f ) * sNumPoints);
}
else
{
tGrandFatherGuess = tFatherGuess;
tFatherGuess = tCurrentGuess;
std::size_t sDec = s - 1U;
tCurrentGuess += ((T( 1.0f ) + T( 2.55f ) * sDec) / (T( 1.9f ) * sDec) + T( 1.26f ) * sDec * tExponent
/ (T( 1.0f ) + T( 3.5f ) * sDec)) * (tCurrentGuess - tGrandFatherGuess) / (T( 1.0f ) + T( 0.3f ) * tExponent);
}
vecInitialGuesses[s] = tCurrentGuess;
}
concurrency::array<T> arrWeights( sNumPoints ), arrAbsciasses( sNumPoints, std::begin(vecInitialGuesses) );
try {
concurrency::parallel_for_each( arrAbsciasses.extent, [=, &arrAbsciasses, &arrWeights]( concurrency::index<1> index ) restrict( amp ) {
T tVal = arrAbsciasses[index], tIntermediate;
T tPolynomial1 = T( 1.0f ), tPolynomial2 = T( 0.0f ), tPolynomial3, tDerivative;
std::size_t sIterationNum = 0;
do {
tPolynomial1 = T( 1.0f ), tPolynomial2 = T( 0.0f );
for( std::size_t s = 0; s < sNumPoints; ++s )
{
tPolynomial3 = tPolynomial2;
tPolynomial2 = tPolynomial1;
tPolynomial1 = ((2 * s + 1 + tExponent - tVal) * tPolynomial2 - (s + tExponent) * tPolynomial3) / (s + 1);
}
tDerivative = (sNumPoints * tPolynomial1 - (sNumPoints + tExponent) * tPolynomial2) / tVal;
tIntermediate = tVal;
tVal = tIntermediate - tPolynomial1 / tDerivative;
++sIterationNum;
} while( concurrency::fast_math::fabs( tVal - tIntermediate ) > tEps || sIterationNum < 10 );
arrAbsciasses[index] = tVal;
arrWeights[index] = -concurrency::fast_math::exp( gammaln_fast( tExponent + sNumPoints ) - gammaln_fast( T( sNumPoints ) ) ) / (tDerivative * sNumPoints * tPolynomial2);
} );
}
catch( concurrency::runtime_exception& e )
{
std::cerr << "Runtime error, code: " << e.get_error_code() << "; message: " << e.what() << std::endl;
}
return std::make_pair( std::move( arrAbsciasses ), std::move( arrWeights ) );
}
And here is the full trace from the debug console:
D3D11: Removing Device.
D3D11 ERROR: ID3D11Device::RemoveDevice: Device removal has been triggered for the following reason (DXGI_ERROR_DEVICE_HUNG: The Device took an unreasonable amount of time to execute its commands, or the hardware crashed/hung. As a result, the TDR (Timeout Detection and Recovery) mechanism has been triggered. The current Device Context was executing commands when the hang occurred. The application may want to respawn and fallback to less aggressive use of the display hardware). [ EXECUTION ERROR #378: DEVICE_REMOVAL_PROCESS_AT_FAULT]
D3D11 ERROR: ID3D11DeviceContext::Map: Returning DXGI_ERROR_DEVICE_REMOVED, when a Resource was trying to be mapped with READ or READWRITE. [ RESOURCE_MANIPULATION ERROR #2097214: RESOURCE_MAP_DEVICEREMOVED_RETURN]
My apologies for not being able to produce a small reproducible example; I hope that this is still an acceptable question, as I am unable to solve this by myself.
When using DirectCompute, the main challenge is to write computations that do not run afoul of the Direct3D automatic 'GPU hang' detection timeout. By default, the system assumes if a shader is taking more than a few seconds, the GPU is actually hung. This heuristic works for visual shaders, but you can easily create a DirectCompute shader that takes a long time to complete.
The solution is to disable the timeout detection. You can do this by creating the Direct3D 11 device with D3D11_CREATE_DEVICE_DISABLE_GPU_TIMEOUT See Disabling TDR on Windows 8 for your C++ AMP algorithms blog post. The main thing to remember is that D3D11_CREATE_DEVICE_DISABLE_GPU_TIMEOUT requires the DirectX 11.1 or later runtime which is included with Windows 8.x and can be installed on Windows 7 Service Pack 1 with KB2670838. See DirectX 11.1 and Windows 7, DirectX 11.1 and Windows 7 Update, and MSDN for some caveats of using KB2670838.

Function is ambiguos in c++

I'm trying to use a library I found for triangulations, and I'm getting a strange error. When I compile everything, I get the following errors:
'_trytoadd' is ambiguous ' Candidates are: void _trytoadd(Se<SeDcdtVertex,SeDcdtEdge,SeDcdtFace>
*, Se<SeDcdtVertex,SeDcdtEdge,SeDcdtFace> *, int, const GsVec2 &, const GsVec2 &, const GsVec2
&) '
I'm getting this error for _cantpass, _try to add and _ptreeaddent functions, but they are all defined and called properly in this .cpp file. Any idea what's wrong? Here's the code for the class in question:
/*=======================================================================
Copyright 2010 Marcelo Kallmann. All Rights Reserved.
This software is distributed for noncommercial use only, without
any warranties, and provided that all copies contain the full copyright
notice licence.txt located at the base folder of the distribution.
=======================================================================*/
# include <math.h>
# include <stdlib.h>
# include "gs_geo2.h"
# include "se_lct.h"
# include "se_triangulator_internal.h"
# define GS_TRACE_ONLY_LINES
//# define GS_USE_TRACE1 // main search method
//# define GS_USE_TRACE2 // search expansion
//# define GS_USE_TRACE3
# include "gs_trace.h"
//================================================================================
//========================== search path tree ====================================
//================================================================================
// nen/nex are the entrance/exit edges of the node being expanded, which is already in the search tree
// en/ex are the entrance/exit edge of the current traversal being evaluated for expansion
// (p1,p2) are en coordinates
bool SeLct::_canpass ( SeDcdtSymEdge* nen, SeDcdtSymEdge* nex, SeDcdtSymEdge* en, SeDcdtSymEdge* ex,
const GsPnt2& p1, const GsPnt2& p2, const GsPnt2& p3, float r, float d2 )
{
// check if can traverse en/ex traversal:
if ( _man->is_constrained(ex->edg()) ) return false;
// test if next triangle being tested has been already visited:
if ( _mesh->marked(en->fac()) ) return false;
if ( en->nxt()==ex ) // bot
{
if ( _pre_clearance )
{ float cl = ex->edg()->cl(ex);
if ( cl<d2 ) return false;
}
else
{ if ( dist2(p2,p3)<d2 ) return false;
if ( !_sector_clear(ex->nxt(),d2,p2,p3,p1) ) return false;
}
if ( nex->fac()==_fi && nex->nxt()==nen ) // top->bot departure transition
{ if ( !_local_transition_free(ex,en,d2,_xi,_yi) ) return false;
}
}
else // top
{
if ( _pre_clearance )
{ float cl = en->edg()->cl(en);
if ( cl<d2 ) return false;
}
else
{ if ( dist2(p3,p1)<d2 ) return false;
if ( !_sector_clear(en->nxt(),d2,p1,p2,p3) ) return false;
}
if ( nex->fac()==_fi && nen->nxt()==nex ) // bot->top departure transition
{ if ( !_local_transition_free(ex,en,d2,_xi,_yi) ) return false;
}
}
return true;
}
// en is the entrance edge, ex the exit edge, (p1,p2) are en coordinates
void SeLct::_trytoadd ( SeDcdtSymEdge* en, SeDcdtSymEdge* ex, int mi, const GsPnt2& p1, const GsPnt2& p2, const GsPnt2& p3 )
{
// verify if it is passable:
PathNode& n = _ptree->nodes[mi];
if ( !_canpass ( (SeDcdtSymEdge*)n.en, (SeDcdtSymEdge*)n.ex, en, ex, p1, p2, p3, _ptree->radius, _ptree->diam2 ) ) return;
// ok it is passable, compute cost:
double x, y;
if ( en->nxt()==ex )
_getcostpoint ( &n, n.x, n.y, p2.x, p2.y, p3.x, p3.y, x, y, _ptree->radius ); // bot
else
_getcostpoint ( &n, n.x, n.y, p3.x, p3.y, p1.x, p1.y, x, y, _ptree->radius ); // top
// insert:
# define PTDIST(a,b,c,d) float(sqrt(gs_dist2(a,b,c,d)))
_ptree->add_child ( mi, en, ex, n.ncost+PTDIST(n.x,n.y,x,y), PTDIST(x,y,_xg,_yg), x,y ); // A* heuristic
# undef PTDIST
}
# define ExpansionNotFinished -1
# define ExpansionBlocked -2
int SeLct::_expand_lowest_cost_leaf ()
{
int min_i;
if ( _ptree->leafs.size()>_maxfronts ) _maxfronts=_ptree->leafs.size();
min_i = _ptree->lowest_cost_leaf ();
GS_TRACE2 ( "Expanding leaf: "<<min_i );
if ( min_i<0 ) return ExpansionBlocked; // no more leafs: path could not be found!
// attention: array references may be invalidated due array reallocation during insertion
SeDcdtSymEdge* s = (SeDcdtSymEdge*) _ptree->nodes[min_i].ex->sym();
SeDcdtSymEdge* sn = s->nxt();
SeDcdtSymEdge* sp = sn->nxt();
const GsPnt2& p1 = s->vtx()->p; // note: s is in the triangle to expand
const GsPnt2& p2 = sn->vtx()->p;
const GsPnt2& p3 = sp->vtx()->p;
float d2 = _ptree->diam2;
// test if next triangle contains goal point:
if ( gs_in_triangle(p1.x,p1.y,p2.x,p2.y,p3.x,p3.y,_xg,_yg) ) // reached goal triangle !
{ GS_TRACE1 ( "Goal triangle reached..." );
double r = _ptree->radius;
if ( !pt2circfree(this,s,_xg,_yg,r) ) // we do not know if the goal location is valid, so test it now
{ GS_TRACE1 ( "Goal location is invalid." );
return ExpansionBlocked;
}
GS_TRACE1 ( "Goal location valid." );
GS_TRACE1 ( "Analyzing arrival..." );
SeDcdtSymEdge* nen = (SeDcdtSymEdge*)_ptree->nodes[min_i].en;
SeDcdtSymEdge* nex = (SeDcdtSymEdge*)_ptree->nodes[min_i].ex;
_analyze_arrival ( s, 3, r, d2, nen, nex );
if ( _ent[3].type==EntBlocked )
{ if ( (sn->edg()->is_constrained()||dist2(p2,p3)<d2) &&
(sp->edg()->is_constrained()||dist2(p3,p1)<d2) )
{ GS_TRACE1 ( "Arrival blocked from all possible entries." );
return ExpansionBlocked;
}
else
{ GS_TRACE1 ( "Arrival is blocked, but search can continue..." );
// at this point the arrival is not valid but the search will continue, and
// note that the arrival triangle may still be used as passage so we let
// the expansion tests in _trytoadd() proceed.
}
}
else
{ GS_TRACE1 ( "Arrival tests passed." );
GS_TRACE1 ( "Arrival is valid " << (_ent[3].type==EntTrivial?"and trivial.":"but non trivial.") );
return min_i; // FOUND!
}
}
int nsize = _ptree->nodes.size();
_trytoadd ( s, sn, min_i, p1, p2, p3 ); // bot
_trytoadd ( s, sp, min_i, p1, p2, p3 ); // top
if ( _ptree->nodes.size()>nsize ) _mesh->mark ( s->fac() ); // only mark traversed faces
if (_searchcb) _searchcb(_sudata);
return ExpansionNotFinished; // continue the expansion
}
void SeLct::_ptreeaddent ( SeDcdtSymEdge* s, bool top, bool edge )
{
const GsPnt2& p1 = s->vtx()->p;
const GsPnt2& p2 = s->nvtx()->p;
double x, y;
if ( edge )
{ x=_xi; y=_yi; }
else
{ _getcostpoint ( 0, _xi, _yi, p1.x, p1.y, p2.x, p2.y, x, y, _ptree->radius ); }
// insert:
# define PTDIST(a,b,c,d) (float)sqrt(gs_dist2(a,b,c,d))
_ptree->add_child ( -1, top? s->nxt():s->nxn(), s, PTDIST(_xi,_yi,x,y), PTDIST(x,y,_xg,_yg), x, y );
# undef PTDIST
}
//================================================================================
//============================== search path =====================================
//================================================================================
/* - This is the A* algorithm that takes O(nf), f is the faces in the "expansion frontier". */
bool SeLct::_search_channel ( double x1, double y1, double x2, double y2, float radius, const SeFace* iniface )
{
GS_TRACE1 ( "Starting Search Path..." );
if ( !_ptree ) _ptree = new PathTree;
_clear_path(); // clear data from previous query and set _path_result to NoPath
_channel.size(0);
_xi=x1; _yi=y1; _xg=x2; _yg=y2;
if ( !iniface ) return false;
// Even if p1 is on an edge, locate_point will return in s a face that
// can be considered to contain p1 (p1 would be invalid if in a vertex)
SeBase *s;
LocateResult res=locate_point ( iniface, x1, y1, s );
if ( res==NotFound )
{ GS_TRACE1 ( "Could not locate first point!" );
_path_result=NoPath;
return false;
}
_fi = s->fac(); // save initial face
if ( !pt1circfree(this,s,x1,y1,radius) ) { _path_result=NoPath; return false; }
// Check if we are to solve trivial or local paths, testing if both points are in the same triangle:
if ( _man->in_triangle(s->vtx(),s->nxt()->vtx(),s->nxn()->vtx(),x2,y2) )
{ GS_TRACE1 ( "Both points are in the same triangle..." );
if ( radius==0 )
{ GS_TRACE1 ( "Trivial path returned." );
_path_result=TrivialPath; return true; // this is it
}
if ( !pt2circfree(this,s,x2,y2,radius) )
{ GS_TRACE1 ( "Goal point in same triangle invalid. No path returned." );
_path_result=NoPath; return false;
}
_path_result = _analyze_local_path ( s, radius );
if ( _path_result==TrivialPath )
{ GS_TRACE1 ( "Capsule free. Trivial path returned." );
return true; // path exists
}
else if ( _path_result==LocalPath )
{ GS_TRACE1 ( "Deformable capsule is passable. Local path returned." );
return true; // path exists
}
// at this point the result may be a GlobalPath or a NoPath
GS_TRACE1 ( "Deformable capsule is not passable." );
// we then let the normal entrance analysis and search to proceed.
// the entrance that blocked the capsule will also be blocked but
// there may be a global path to get there so we just do not
// mark the initial face as visited, allowing it to be found by the global search.
}
GS_TRACE1 ( "Searching for a global path..." );
GS_TRACE1 ( "Analyzing entrances..." );
_analyze_entrances ( s, _xi, _yi, radius );
GS_TRACE1 ( "Entrance 0: "<<(_ent[0].type==EntBlocked?"blocked":_ent[0].type==EntTrivial?"trivial":"not trivial") );
GS_TRACE1 ( "Entrance 1: "<<(_ent[1].type==EntBlocked?"blocked":_ent[1].type==EntTrivial?"trivial":"not trivial") );
GS_TRACE1 ( "Entrance 2: "<<(_ent[2].type==EntBlocked?"blocked":_ent[2].type==EntTrivial?"trivial":"not trivial") );
GS_TRACE1 ( "Initializing A* search..." );
_mesh->begin_marking ();
_ptree->init ( radius );
if ( _ent[0].type!=EntBlocked ) _ptreeaddent ( _ent[0].s, _ent[0].top, res==EdgeFound? true:false );
if ( _ent[1].type!=EntBlocked ) _ptreeaddent ( _ent[1].s, _ent[1].top, false );
if ( _ent[2].type!=EntBlocked ) _ptreeaddent ( _ent[2].s, _ent[2].top, false );
if (_searchcb) _searchcb(_sudata);
GS_TRACE1 ( "Expanding leafs..." );
int found = ExpansionNotFinished;
while ( found==ExpansionNotFinished )
found = _expand_lowest_cost_leaf();
_mesh->end_marking ();
if ( found==ExpansionBlocked )
{ GS_TRACE1 ( "Points are not connectable!" );
_path_result = NoPath;
return false;
}
_finalsearchnode = found;
int n = found; // the starting leaf
s = _ptree->nodes[n].ex->sym();
do { _channel.push() = _ptree->nodes[n].ex;
n = _ptree->nodes[n].parent;
} while ( n!=-1 );
_channel.revert();
GS_TRACE1 ( "Path crosses "<<_channel.size()<<" edges." );
_path_result = GlobalPath;
return true;
}
void SeLct::get_search_nodes ( GsArray<SeBase*>& e )
{
e.size ( 0 );
for ( int i=0; i<_ptree->nodes.size(); i++ )
{ e.push() = _ptree->nodes[i].en;
e.push() = _ptree->nodes[i].ex;
}
}
int SeLct::get_search_nodes () const
{
return _ptree? _ptree->nodes.size() : 0;
}
void SeLct::get_search_metric ( GsArray<GsPnt2>& pnts )
{
pnts.size ( 0 );
if ( !_ptree ) return;
for ( int i=0; i<_ptree->nodes.size(); i++ )
{
PathNode& n = _ptree->nodes[i];
if ( n.parent<0 )
{ pnts.push().set ( _xi, _yi ); }
else
{ PathNode& np = _ptree->nodes[n.parent];
pnts.push().set ( np.x, np.y );
}
pnts.push().set ( n.x, n.y );
}
}
void SeLct::get_search_front ( GsArray<SeBase*>& e )
{
e.size ( 0 );
for ( int i=0; i<_ptree->leafs.size(); i++ )
{ e.push() = _ptree->nodes[ _ptree->leafs.elem(i) ].ex;
}
}
//============================ End of File =================================

Ramer-Douglas-Peucker path simplification algorithm

I implemented a path simplification algorithm after reading the article here:
http://losingfight.com/blog/2011/05/30/how-to-implement-a-vector-brush/
It's worked for me pretty well for generating optimized level geometry for my game. But, I'm using it now to clean up a* pathfinding paths and it's got a weird edge case that fails miserably.
Here's a screenshot of it working - optimizing the path from red circle to the blue circle. The faint green line is the a* output, and the lighter whiteish line is the optimized path.
And here's a screenshot of it failing:
Here's my code. I adapted the ObjC code from the article to c++
Note: vec2fvec is a std::vector< vec2<float> >, and 'real' is just a typedef'd float.
void rdpSimplify( const vec2fvec &in, vec2fvec &out, real threshold )
{
if ( in.size() <= 2 )
{
out = in;
return;
}
//
// Find the vertex farthest from the line defined by the start and and of the path
//
real maxDist = 0;
size_t maxDistIndex = 0;
LineSegment line( in.front(), in.back() );
for ( vec2fvec::const_iterator it(in.begin()),end(in.end()); it != end; ++it )
{
real dist = line.distance( *it );
if ( dist > maxDist )
{
maxDist = dist;
maxDistIndex = it - in.begin();
}
}
//
// If the farhtest vertex is greater than our threshold, we need to
// partition and optimize left and right separately
//
if ( maxDist > threshold )
{
//
// Partition 'in' into left and right subvectors, and optimize them
//
vec2fvec left( maxDistIndex+1 ),
right( in.size() - maxDistIndex ),
leftSimplified,
rightSimplified;
std::copy( in.begin(), in.begin() + maxDistIndex + 1, left.begin() );
std::copy( in.begin() + maxDistIndex, in.end(), right.begin() );
rdpSimplify(left, leftSimplified, threshold );
rdpSimplify(right, rightSimplified, threshold );
//
// Stitch optimized left and right into 'out'
//
out.resize( leftSimplified.size() + rightSimplified.size() - 1 );
std::copy( leftSimplified.begin(), leftSimplified.end(), out.begin());
std::copy( rightSimplified.begin() + 1, rightSimplified.end(), out.begin() + leftSimplified.size() );
}
else
{
out.push_back( line.a );
out.push_back( line.b );
}
}
I'm really at a loss as to what's going wrong. My spidey sense says it's in the std::copy calls... I must be copying garbage in some circumstances.
EDIT:
I've rewritten the algorithm dropping any use of iterators and std::copy, and the like. It still fails in the exact same way.
void rdpSimplify( const vec2fvec &in, vec2fvec &out, real threshold )
{
if ( in.size() <= 2 )
{
out = in;
return;
}
//
// Find the vertex farthest from the line defined by the start and and of the path
//
real maxDist = 0;
size_t maxDistIndex = 0;
LineSegment line( in.front(), in.back() );
for ( size_t i = 0, N = in.size(); i < N; i++ )
{
real dist = line.distance( in[i] );
if ( dist > maxDist )
{
maxDist = dist;
maxDistIndex = i;
}
}
//
// If the farthest vertex is greater than our threshold, we need to
// partition and optimize left and right separately
//
if ( maxDist > threshold )
{
//
// Partition 'in' into left and right subvectors, and optimize them
//
vec2fvec left, right, leftSimplified, rightSimplified;
for ( size_t i = 0; i < maxDistIndex + 1; i++ ) left.push_back( in[i] );
for ( size_t i = maxDistIndex; i < in.size(); i++ ) right.push_back( in[i] );
rdpSimplify(left, leftSimplified, threshold );
rdpSimplify(right, rightSimplified, threshold );
//
// Stitch optimized left and right into 'out'
//
out.clear();
for ( size_t i = 0, N = leftSimplified.size(); i < N; i++ ) out.push_back(leftSimplified[i]);
for ( size_t i = 1, N = rightSimplified.size(); i < N; i++ ) out.push_back( rightSimplified[i] );
}
else
{
out.push_back( line.a );
out.push_back( line.b );
}
}
I can't find any faults in your code.
Some things to try:
Add some debug print statements to check what maxDist is in the failing case. It should be really low, but if it comes out high then you know there's a problem with your line segment distance code.
Check that the path you are seeing actually matches the path that your algorithm returns. If not then perhaps there is something wrong with your path rendering? Maybe a bug when the path only has two points?
Check that your input path is what you expect it to be by printing out all its coordinates at the start of the algorithm.
It shouldn't take too long to find the cause of the problem if you just investigate a little. After a few minutes, staring at code is a very poor way to debug.