Casting a struct - c++

I am currently learning about memory pool implementation.
The thing is i don't quite understand something in the way the type casting is done down below :
b3BlockPool::b3BlockPool(u32 blockSize)
{
m_blockSize = blockSize;
m_chunkSize = b3_blockCount * m_blockSize;
m_chunks = NULL;
m_chunkCount = 0;
// Pre-allocate some chunks
b3Chunk* chunk = (b3Chunk*)b3Alloc(sizeof(b3Chunk) + m_chunkSize);
++m_chunkCount;
chunk->freeBlocks = (b3Block*)((unsigned char*)chunk + sizeof(b3Chunk)); //here
}
At the last line, the chunk pointer is first casted into unsigned char*,
why is that?
Why cant we just do that instead ?
chunk->freeBlocks = (b3Block*)(chunk + sizeof(b3Chunk));
here is the .h file for more info :
// Number of blocks per chunk.
const u32 b3_blockCount = 32;
// A pool of memory blocks.
class b3BlockPool
{
public:
b3BlockPool(u32 blockSize);
~b3BlockPool();
void* Allocate();
void Free(void* p);
private:
struct b3Block
{
b3Block* next;
};
struct b3Chunk
{
b3Block* freeBlocks;
b3Chunk* next;
};
u32 m_blockSize;
u32 m_chunkSize;
b3Chunk* m_chunks;
u32 m_chunkCount;
};

Related

DX12) Part of the Constants buffer is cut off

This is my Constant buffer for Object Drawing.
actually, gWorld and gOldWorld have correct values, but gCubemapOn, gMotionBlurOn, gRimLightOn values are going wrong.
gCubemapOn must be TRUE and it looks like, but actually that value is 65537.
and gRimLightOn must be TRUE, but as you see, actual value is FALSE.
The code below is how I use Constant Buffer.
template<typename Cnst>
class ConstantBuffer
{
public:
ConstantBuffer(ID3D12Device* device, UINT count, bool isConstant=true)
{
if (isConstant)
mByteSize = (sizeof(Cnst) + 255) & ~255;
else
mByteSize = sizeof(Cnst);
ThrowIfFailed(device->CreateCommittedResource(
&Extension::HeapProperties(D3D12_HEAP_TYPE_UPLOAD),
D3D12_HEAP_FLAG_NONE,
&Extension::BufferResourceDesc(D3D12_RESOURCE_DIMENSION_BUFFER, mByteSize * count),
D3D12_RESOURCE_STATE_GENERIC_READ,
nullptr, IID_PPV_ARGS(&mUploadBuffer)));
ThrowIfFailed(mUploadBuffer->Map(0, nullptr, (void**)(&mData)));
}
ConstantBuffer(const ConstantBuffer& rhs) = delete;
ConstantBuffer& operator=(const ConstantBuffer& rhs) = delete;
virtual ~ConstantBuffer()
{
if (mUploadBuffer)
mUploadBuffer->Unmap(0, nullptr);
mData = nullptr;
}
D3D12_GPU_VIRTUAL_ADDRESS GetGPUVirtualAddress(int idx) const
{
return mUploadBuffer->GetGPUVirtualAddress() + idx * mByteSize;
}
void CopyData(int index, const Cnst& data)
{
memcpy(&mData[index * mByteSize], &data, sizeof(Cnst));
}
UINT GetByteSize() const
{
return mByteSize;
}
private:
ComPtr<ID3D12Resource> mUploadBuffer = nullptr;
BYTE* mData = nullptr;
UINT mByteSize = 0;
};
and this is how I Update Constant Buffer
void Pipeline::UpdateConstants()
{
UINT matOffset = 0;
for (int i = 0; i < mRenderObjects.size(); i++)
{
mObjectCB->CopyData(i, mRenderObjects[i]->GetObjectConstants());
mRenderObjects[i]->UpdateMatConstants(mMaterialCB.get(), matOffset);
matOffset += mRenderObjects[i]->GetMeshCount();
}
}
ObjectConstants GameObject::GetObjectConstants()
{
ObjectConstants objCnst = {};
if (mReflected)
{
objCnst.World = Matrix4x4::Transpose(Matrix4x4::Multiply(mWorld, mReflectMatrix));
objCnst.oldWorld = Matrix4x4::Transpose(Matrix4x4::Multiply(mOldWorld, mReflectMatrix));
}
else
{
objCnst.World = Matrix4x4::Transpose(mWorld);
objCnst.oldWorld = Matrix4x4::Transpose(mOldWorld);
}
objCnst.cubemapOn = mCubemapOn;
objCnst.motionBlurOn = mMotionBlurOn;
objCnst.rimLightOn = mRimLightOn;
return objCnst;
}
struct ObjectConstants
{
XMFLOAT4X4 World;
XMFLOAT4X4 oldWorld;
bool cubemapOn;
bool motionBlurOn;
bool rimLightOn;
};
I believe this is the same problem as seen here. HLSL bool is 4 bytes and C++ bool is 1 byte. If you declare your CPU struct as
struct ObjectConstants
{
XMFLOAT4X4 World;
XMFLOAT4X4 oldWorld;
int32_t cubemapOn;
int32_t motionBlurOn;
int32_t rimLightOn;
};
it should work.

Efficient multi-row vector

I need an efficient implementation of a vector with multiple rows, each having the same number of columns, which is not too ugly in C++. Currently I have the following:
class BaseVector {
protected: // variables
int64_t _capacity;
int64_t _nColumns;
protected:
template<typename taItem> void Allocate(taItem * &p, const int64_t nItems) {
p = static_cast<taItem*>(MemPool::Instance().Acquire(sizeof(taItem)*nItems));
if (p == nullptr) {
__debugbreak();
}
}
template<typename taItem> void Reallocate(taItem * &p, const int64_t newCap) {
taItem *np;
Allocate(np, newCap);
Utils::AlignedNocachingCopy(np, p, _nColumns * sizeof(taItem));
MemPool::Instance().Release(p, _capacity * sizeof(taItem));
p = np;
}
// Etc for Release() operation
public:
explicit BaseVector(const int64_t initCap) : _capacity(initCap), _nColumns(0) { }
void Clear() { _nColumns = 0; }
int64_t Size() const { return _nColumns; }
};
class DerivedVector : public BaseVector {
__m256d *_pRowA;
__m256i *_pRowB;
uint64_t *_pRowC;
uint8_t *_pRowD;
// Etc. for other rows
public:
DerivedVector(const int64_t nColumns) : BaseVector(nColumns) {
Allocate(_pRowA, nColumns);
Allocate(_pRowB, nColumns);
Allocate(_pRowC, nColumns);
Allocate(_pRowD, nColumns);
// Etc. for the other rows
}
void IncSize() {
if(_nColumns >= _capacity) {
const int64_t newCap = _capacity + (_capacity >> 1) + 1;
Reallocate(_pRowA, newCap);
Reallocate(_pRowB, newCap);
Reallocate(_pRowC, newCap);
Reallocate(_pRowD, newCap);
// Etc. for other rows
_capacity = newCap;
}
_nColumns++;
}
~DerivedVector() {
// Call here the Release() operation for all rows
}
};
The problem with this approach is that there can be 30 rows, so I have to type manually (and repeat myself) 30 times Allocate, 30 times Reallocate, 30 times Release, etc.
So is there a way in C++ to keep this code DRY and fast? I am ok with macros, but not heavy polymorphism in each access to a cell in the vector because this would kill performance.

C++ object containing an array of char using unique_ptr

I am looking on a way to use unique_ptr to allocate a structure that contains an array of char with a number of bytes that set dynamically to support different types of message.
Assuming:
struct MyMessage
{
uint32_t id;
uint32_t data_size;
char data[4];
};
How can I convert send_message() below to use a smart pointer?
void send_message(void* data, const size_t data_size)
{
const auto message_size = sizeof(MyMessage) - 4 + data_size;
const auto msg = reinterpret_cast<MyMessage*>(new char[message_size]);
msg->id = 3;
msg->data_size = data_size;
memcpy(msg->data, data, data_size);
// Sending the message
// ...
delete[] msg;
}
My attempt to use smart point using the code below does not compile:
const auto message_size = sizeof(MyMessage) - 4 + data_size;
const auto msg = std::unique_ptr<MyMessage*>(new char[message_size]);
Below a complete working example:
#include <iostream>
#include <iterator>
#include <memory>
using namespace std;
struct MyMessage
{
uint32_t id;
uint32_t data_size;
char data[4];
};
void send_message(void* data, const size_t data_size)
{
const auto message_size = sizeof(MyMessage) - 4 + data_size;
const auto msg = reinterpret_cast<MyMessage*>(new char[message_size]);
if (msg == nullptr)
{
throw std::domain_error("Not enough memory to allocate space for the message to sent");
}
msg->id = 3;
msg->data_size = data_size;
memcpy(msg->data, data, data_size);
// Sending the message
// ...
delete[] msg;
}
struct MyData
{
int page_id;
char point_name[8];
};
void main()
{
try
{
MyData data{};
data.page_id = 7;
strcpy_s(data.point_name, sizeof(data.point_name), "ab332");
send_message(&data, sizeof(data));
}
catch (std::exception& e)
{
std::cout << "Error: " << e.what() << std::endl;
}
}
The data type that you pass to delete[] needs to match what new[] returns. In your example, you are new[]ing a char[] array, but are then delete[]ing a MyMessage object instead. That will not work.
The simple fix would be to change this line:
delete[] msg;
To this instead:
delete[] reinterpret_cast<char*>(msg);
However, You should use a smart pointer to manage the memory deletion for you. But, the pointer that you give to std::unique_ptr needs to match the template parameter that you specify. In your example, you are declaring a std::unique_ptr whose template parameter is MyMessage*, so the constructor is expecting a MyMessage**, but you are passing it a char* instead.
Try this instead:
// if this struct is being sent externally, consider
// setting its alignment to 1 byte, and setting the
// size of the data[] member to 1 instead of 4...
struct MyMessage
{
uint32_t id;
uint32_t data_size;
char data[4];
};
void send_message(void* data, const size_t data_size)
{
const auto message_size = offsetof(MyMessage, data) + data_size;
std::unique_ptr<char[]> buffer = std::make_unique<char[]>(message_size);
MyMessage *msg = reinterpret_cast<MyMessage*>(buffer.get());
msg->id = 3;
msg->data_size = data_size;
std::memcpy(msg->data, data, data_size);
// Sending the message
// ...
}
Or this:
using MyMessage_ptr = std::unique_ptr<MyMessage, void(*)(MyMessage*)>;
void send_message(void* data, const size_t data_size)
{
const auto message_size = offsetof(MyMessage, data) + data_size;
MyMessage_ptr msg(
reinterpret_cast<MyMessage*>(new char[message_size]),
[](MyMessage *m){ delete[] reinterpret_cast<char*>(m); }
);
msg->id = 3;
msg->data_size = data_size;
std::memcpy(msg->data, data, data_size);
// Sending the message
// ...
}
This should work, but it is still not clear if accessing msg->data out of bounds is legal (but at least it is not worst than in your original code):
const auto message_size = sizeof(MyMessage) - ( data_size < 4 ? 0 : data_size - 4 );
auto rawmsg = std::make_unique<char[]>( message_size );
auto msg = new (rawmsg.get()) MyMessage;

what does the *(void**) means

I have seen a class like this on the internet,
the head file
#ifndef _COMMON_ARRAY_OBJECT_POOL_H_
#define _COMMON_ARRAY_OBJECT_POOL_H_
#include <stdint.h>
namespace easynet
{
class ArrayObjectPool
{
public:
/** construct
* #param elem_size : element size;
* #param elem_num : element number
*/
ArrayObjectPool(uint32_t elem_size, uint32_t elem_num);
~ArrayObjectPool();
uint32_t ElemSize(){return m_ElemSize;}
uint32_t Capacity(){return m_ElemNum;}
bool IsEmpty(){return m_FreeHead==NULL;}
void* Get();
bool Recycle(void *elem);
private:
void *m_Elements;
void *m_End;
void *m_FreeHead;
uint32_t m_ElemSize;
uint32_t m_ElemNum;
};
}
#endif //_COMMON_ARRAY_OBJECT_POOL_H_
the cpp file
#include <assert.h>
#include <stddef.h>
#include <stdlib.h>
#include "ArrayObjectPool.h"
namespace easynet
{
ArrayObjectPool::ArrayObjectPool(uint32_t elem_size, uint32_t elem_num)
{
m_ElemNum = elem_num;
if(elem_size < sizeof(void*))
m_ElemSize = sizeof(void*);
else
m_ElemSize = elem_size;
m_Elements = malloc(m_ElemSize*m_ElemNum);
m_End = (void*)((char*)m_Elements+m_ElemSize*m_ElemNum);
assert(m_Elements != NULL);
//construct list
int i;
void *node = m_Elements;
for(i=0; i<m_ElemNum-1; ++i)
{
*(void**)node = (void*)((char*)node+m_ElemSize);
node = *(void**)node;
}
*(void**)node = NULL;
m_FreeHead = m_Elements; //list head
}
ArrayObjectPool::~ArrayObjectPool()
{
free(m_Elements);
}
void* ArrayObjectPool::Get()
{
if(m_FreeHead == NULL)
return NULL;
void *temp = m_FreeHead;
m_FreeHead = *(void**)m_FreeHead;
return temp;
}
bool ArrayObjectPool::Recycle(void *elem)
{
if(elem<m_Elements || elem>=m_End)
return false;
*(void**)elem = m_FreeHead;
m_FreeHead = elem;
return true;
}
}
The question is I can't understand what does this means:
int i;
void *node = m_Elements;
for(i=0; i<m_ElemNum-1; ++i)
{
*(void**)node = (void*)((char*)node+m_ElemSize);
node = *(void**)node;
}
and what the *(void**) means? thanks!
It's treating the memory as if it were a union between the user's data type, and void*. When the blocks are in the free block list, the void* is used.
You can think of it as:
union ObjectInObjectPool
{
void* ptr_next_free_block;
UserType content;
};
and then that loop is basically doing:
ObjectInObjectPool* node = m_Elements;
for(i=0; i<m_ElemNum-1; ++i) {
node->ptr_next_free_block = node + 1;
node = node->ptr_next_free_block;
}
except that the programmer did by hand all the pointer arithmetic that the compiler's type checker usually does.
A void* is a pointer value that points to untyped memory. When you do *(void**)node = ..., what it is really doing is *node = .... However, with the latter, you are trying to assign something to a void which doesn't make sense with C++'s type system; you have to do as in the former and cast it to a void** so that *node will be a void*, not a void, and you can assign to it.
node = *(void**)node is just node = *node but forcing the type system to work. It just does "assign to node the value of the memory at *node interpreted as a void*".

C++: Strict aliasing vs union abuse

Apologies in advance for what may be a silly first post on well-trodden ground. While there is plenty of material on the subject, very little of it is definitive and/or intelligible to me.
I have an AlignedArray template class to dynamically allocate memory on the heap with arbitrary alignment (I need 32-byte alignment for AVX assembly routines). This requires some ugly pointer manipulation.
Agner Fog provides a sample class in cppexamples.zip that abuses a union to do so (http://www.agner.org/optimize/optimization_manuals.zip). However, I know that writing to one member of a union and then reading from another results in UB.
AFAICT it is safe to alias any pointer type to a char *, but only in one direction. This is where my understanding gets fuzzy. Here's an abridged version of my AlignedArray
class (essentially a rewrite of Agner's, to help my understanding):
template <typename T, size_t alignment = 32>
class AlignedArray
{
size_t m_size;
char * m_unaligned;
T * m_aligned;
public:
AlignedArray (size_t const size)
: m_size(0)
, m_unaligned(0)
, m_aligned(0)
{
this->size(size);
}
~AlignedArray ()
{
this->size(0);
}
T const & operator [] (size_t const i) const { return m_aligned[i]; }
T & operator [] (size_t const i) { return m_aligned[i]; }
size_t const size () { return m_size; }
void size (size_t const size)
{
if (size > 0)
{
if (size != m_size)
{
char * unaligned = 0;
unaligned = new char [size * sizeof(T) + alignment - 1];
if (unaligned)
{
// Agner:
/*
union {
char * c;
T * t;
size_t s;
} aligned;
aligned.c = unaligned + alignment - 1;
aligned.s &= ~(alignment - 1);
*/
// Me:
T * aligned = reinterpret_cast<T *>((reinterpret_cast<size_t>(unaligned) + alignment - 1) & ~(alignment - 1));
if (m_unaligned)
{
// Agner:
//memcpy(aligned.c, m_aligned, std::min(size, m_size));
// Me:
memcpy(aligned, m_aligned, std::min(size, m_size));
delete [] m_unaligned;
}
m_size = size;
m_unaligned = unaligned;
// Agner:
//m_aligned = aligned.t;
// Me:
m_aligned = aligned;
}
return;
}
return;
}
if (m_unaligned)
{
delete [] m_unaligned;
m_size = 0;
m_unaligned = 0;
m_aligned = 0;
}
}
};
So which method is safe(r)?
I have code that implements the (replacement) new and delete operators, suitable for SIMD (i.e., SSE / AVX). It uses the following functions that you might find useful:
static inline void *G0__SIMD_malloc (size_t size)
{
constexpr size_t align = G0_SIMD_ALIGN;
void *ptr, *uptr;
static_assert(G0_SIMD_ALIGN >= sizeof(void *),
"insufficient alignment for pointer storage");
static_assert((G0_SIMD_ALIGN & (G0_SIMD_ALIGN - 1)) == 0,
"G0_SIMD_ALIGN value must be a power of (2)");
size += align; // raw pointer storage with alignment padding.
if ((uptr = malloc(size)) == nullptr)
return nullptr;
// size_t addr = reinterpret_cast<size_t>(uptr);
uintptr_t addr = reinterpret_cast<uintptr_t>(uptr);
ptr = reinterpret_cast<void *>
((addr + align) & ~(align - 1));
*(reinterpret_cast<void **>(ptr) - 1) = uptr; // (raw ptr)
return ptr;
}
static inline void G0__SIMD_free (void *ptr)
{
if (ptr != nullptr)
free(*(reinterpret_cast<void **>(ptr) - 1)); // (raw ptr)
}
This should be easy to adapt. Obviously you would replace malloc and free, since you're using the global new and delete for raw (char) storage. It assumes that size_t is sufficiently wide for address arithmetic - true in practice, but uintptr_t from <cstdint> would be more correct.
To answer your question, both of those methods are just as safe. The only two operations that are really stinky there are the cast to size_t and new char[stuff]. You should at least be using uintptr_t from <cstdint> for the first. The second operation creates your only pointer aliasing issue as technically the char constructor is run on each char element and that constitutes accessing the data through the char pointer. You should use malloc instead.
The other supposed 'pointer aliasing' isn't an issue. And that's because other than the new operation you aren't accessing any data through the aliased pointers. You are only accessing data through the T * you get after alignment.
Of course, you have to remember to construct all of your array elements. This is true even in your version. Who knows what kind of T people will put there. And, of course, if you do that, you'll have to remember to call their destructors, and have to remember to handle exceptions when you copy them (memcpy doesn't cut it).
If you have a particular C++11 feature, you do not need to do this. C++11 has a function specifically for aligning pointers to arbitrary boundaries. The interface is a little funky, but it should do the job. The call is ::std::align defined in <memory>.Thanks to R. Martinho Fernandes for pointing it out.
Here is a version of your function with the suggested fixed:
#include <cstdint> // For uintptr_t
#include <cstdlib> // For malloc
#include <algorithm>
template <typename T, size_t alignment = 32>
class AlignedArray
{
size_t m_size;
void * m_unaligned;
T * m_aligned;
public:
AlignedArray (size_t const size)
: m_size(0)
, m_unaligned(0)
, m_aligned(0)
{
this->size(size);
}
~AlignedArray ()
{
this->size(0);
}
T const & operator [] (size_t const i) const { return m_aligned[i]; }
T & operator [] (size_t const i) { return m_aligned[i]; }
size_t size() const { return m_size; }
void size (size_t const size)
{
using ::std::uintptr_t;
using ::std::malloc;
if (size > 0)
{
if (size != m_size)
{
void * unaligned = 0;
unaligned = malloc(size * sizeof(T) + alignment - 1);
if (unaligned)
{
T * aligned = reinterpret_cast<T *>((reinterpret_cast<uintptr_t>(unaligned) + alignment - 1) & ~(alignment - 1));
if (m_unaligned)
{
::std::size_t constructed = 0;
const ::std::size_t num_to_copy = ::std::min(size, m_size);
try {
for (constructed = 0; constructed < num_to_copy; ++constructed) {
new(aligned + constructed) T(m_aligned[constructed]);
}
for (; constructed < size; ++constructed) {
new(aligned + constructed) T;
}
} catch (...) {
for (::std::size_t i = 0; i < constructed; ++i) {
aligned[i].T::~T();
}
::std::free(unaligned);
throw;
}
for (size_t i = 0; i < m_size; ++i) {
m_aligned[i].T::~T();
}
free(m_unaligned);
}
m_size = size;
m_unaligned = unaligned;
m_aligned = aligned;
}
}
} else if (m_unaligned) { // and size <= 0
for (::std::size_t i = 0; i < m_size; ++i) {
m_aligned[i].T::~T();
}
::std::free(m_unaligned);
m_size = 0;
m_unaligned = 0;
m_aligned = 0;
}
}
};