I've created the following code for testing shared memory allocators and containers..
The allocator (basic allocator that just keeps a pointer to a memory block + the size:
template<typename T>
struct SharedMemoryAllocator
{
typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef T& reference;
typedef const T& const_reference;
typedef T value_type;
void* memory;
std::size_t size;
SharedMemoryAllocator(void* memory, std::size_t size) noexcept : memory(memory), size(size) {};
SharedMemoryAllocator(const SharedMemoryAllocator& other) noexcept : memory(other.memory), size(other.size) {};
template<typename U>
SharedMemoryAllocator(const SharedMemoryAllocator<U>& other) noexcept : memory(other.memory), size(other.size) {};
template<typename U>
SharedMemoryAllocator& operator = (const SharedMemoryAllocator<U>& other) { return *this; }
SharedMemoryAllocator<T>& operator = (const SharedMemoryAllocator& other) { return *this; }
~SharedMemoryAllocator() {}
pointer address(reference value) const {return &value;}
const_pointer address(const_reference value) const {return &value;}
pointer allocate(size_type n, const void* hint = 0) {return static_cast<T*>(memory);}
void deallocate(T* ptr, size_type n) {}
template<typename U, typename... Args>
void construct(U* ptr, Args&&... args) {::new(static_cast<void*>(ptr)) U(std::forward<Args>(args)...);}
void construct(pointer ptr, const T& val) {new(static_cast<T*>(ptr)) T(val);}
template<typename U>
void destroy(U* ptr) {ptr->~U();}
void destroy(pointer ptr) {ptr->~T();}
size_type max_size() const {return size / sizeof(T);}
template<typename U>
struct rebind {typedef SharedMemoryAllocator<U> other;};
};
template <typename T, typename U>
inline bool operator == (const SharedMemoryAllocator<T>& a, const SharedMemoryAllocator<U>& b)
{
return (a == b);
}
template <typename T, typename U>
inline bool operator != (const SharedMemoryAllocator<T>& a, const SharedMemoryAllocator<U>& b)
{
return !(a == b);
}
The container (Just a container that allocates memory using the SharedMemory allocator):
template<typename T, typename allocator = std::allocator<T>>
class CVector
{
private:
T* memory;
std::size_t size, capacity;
allocator alloc;
public:
CVector() : memory(nullptr), size(0), capacity(0), alloc(allocator()) {}
CVector(const allocator &alloc) : memory(nullptr), size(0), capacity(0), alloc(alloc) {}
~CVector()
{
if(memory)
{
for(std::size_t i = 0; i < this->size; ++i)
{
alloc.destroy(memory + i);
}
alloc.deallocate(memory, capacity);
memory = nullptr;
}
}
void reserve(std::size_t size)
{
if(capacity < size)
{
capacity = size;
void* mem = alloc.allocate(capacity);
if(memory && memory != mem)
{
memcpy(static_cast<char*>(mem), memory, size * sizeof(T));
for(std::size_t i = 0; i < this->size; ++i)
{
alloc.destroy(memory + i);
}
alloc.deallocate(memory, capacity);
memory = nullptr;
}
memory = static_cast<T*>(mem);
}
}
void push_back(T&& value)
{
if(capacity == 0)
{
reserve(1);
}
if(size >= capacity)
{
reserve(capacity * 2);
}
alloc.construct(memory + size++, value);
}
T& operator[](std::size_t size)
{
return *(memory + size);
}
const T& operator[](std::size_t size) const
{
return *(memory + size);
}
};
Main:
int main()
{
MemoryMap mem{"Local\\Test", 5000, std::ios::in | std::ios::out};
mem.open();
mem.map();
typedef CVector<int, SharedMemoryAllocator<int>> SHMVec;
SHMVec* vec = ::new(mem.data()) SHMVec(SharedMemoryAllocator<int>(static_cast<char*>(mem.data()) + sizeof(SHMVec), 1024 - sizeof(SHMVec)));
vec->reserve(100);
vec->push_back(100);
vec->push_back(200);
vec->push_back(300);
std::cout<<"Address: "<<mem.data()<<"\n";
std::cin.get();
SHMVec* ptrVec = reinterpret_cast<SHMVec*>(mem.data());
std::cout<<(*ptrVec)[0];
vec->~SHMVec();
}
I read somewhere that std::vector cannot be placed in SharedMemory because it might do some tracking of its own in the current process's address space. So I decided to write my own "vector" which is just a cheap class.
Next, I allocate a shared memory block and I construct the container into that block as shown above in "main".
In the other program, I do (Main):
int main()
{
MemoryMap mem{"Local\\Test", 5000, std::ios::in};
mem.open();
mem.map();
typedef CVector<int, SharedMemoryAllocator<int>> SHMVec;
std::cout<<"Address: "<<mem.data()<<"\n";
SHMVec* ptrVec = reinterpret_cast<SHMVec*>(mem.data());
std::cout<<(*ptrVec)[0];
}
When both programs map the shared memory block at 0x370000 it works. However, if one program allocates the SharedMemoryBlock at 0x370000 and the second at 0x380000, it crashes (the second program crashes trying to access the container created by the first).
Any ideas why this happens? The container is IN the shared memory block. Why does it matter that the address of the blocks have to be the EXACT same?
Here is your problem:
template<typename T, typename allocator = std::allocator<T>>
class CVector
{
private:
T* memory;
^^^^^^^^^^^
Because in your programs the SHMVector object itself is stored in shared memory, you store its data members in shared memory. Thus the pointer to the elements (memory in this case) is stored in shared memory.
If the shared memory segment is loaded at a different address, then memory will point at an invalid address in the memory space of one of the two programs.
Maybe simple solution: don't put the SHMVec object itself in shared memory, only the elements.
boost::interprocess can help you keeping containers in shared memory - see http://www.boost.org/doc/libs/1_38_0/doc/html/interprocess/allocators_containers.html#interprocess.allocators_containers.containers_explained.containers
Related
#include <cstdlib>
#include <memory>
#include <unordered_map>
template <class T>
struct allocator {
typedef size_t size_type;
typedef ptrdiff_t difference_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef T& reference;
typedef const T& const_reference;
typedef T value_type;
allocator() = default;
template <class U>
allocator(const allocator<U>&) {}
T* allocate(std::size_t n) const { return (T*)malloc(n); } // debugger breaks here
void deallocate(T* p, std::size_t) const { free(p); }
};
using allocations_map =
std::unordered_map<void*, std::size_t, std::hash<void*>,
std::equal_to<void*>,
allocator<std::pair<void* const, std::size_t>>>;
allocations_map allocations; // heap corruption in the constructor
void* operator new(std::size_t n) {
auto p = malloc(n);
allocations.emplace(p, n);
return p;
}
void operator delete(void* p) noexcept {
allocations.erase(p);
free(p);
}
int main() { std::vector<int> v(5); }
Why do i corrupt the heap in the constructor of allocations_map? The debugger detects the first heap corruption in a malloc call of the custom allocator, called inside the constructor.
Is there a more elegant solution then to write a non-logging custom allocator for allocations_map? The container shall obviously not log its own allocations.
I also tried two singleton approaches, as suggested in the comments, without success:
allocations_map& get_allocations_map()
{
static allocations_map* allocations_ptr = nullptr;
if (allocations_ptr == nullptr)
{
allocations_ptr = (allocations_map*) malloc(sizeof(allocations_map));
allocations_ptr = new(allocations_ptr)allocations_map;
}
return *allocations_ptr;
}
allocations_map& get_allocations_map()
{
static allocations_map allocations;
return allocations;
}
From std::allocator::allocate allocator allocates n "things" not n bytes. You should change:
T* allocate(std::size_t n) const { return (T*)malloc(n); }
to:
T* allocate(std::size_t n) const { return (T*)malloc(sizeof(T) * n); }
Why do i corrupt the heap in the constructor of allocations_map?
Because the constructor of elements stored in that map access allocated memory out-of-bounds.
Is it possible to make std::vector of custom structs allocate aligned memory for further processing with SIMD instructions? If it is possible to do with Allocator, does anyone happen to have such an allocator he could share?
Edit: I removed the inheritance of std::allocator as suggested by GManNickG and made the alignment parameter a compile time thing.
I recently wrote this piece of code. It's not tested as much as I would like it so go on and report errors. :-)
enum class Alignment : size_t
{
Normal = sizeof(void*),
SSE = 16,
AVX = 32,
};
namespace detail {
void* allocate_aligned_memory(size_t align, size_t size);
void deallocate_aligned_memory(void* ptr) noexcept;
}
template <typename T, Alignment Align = Alignment::AVX>
class AlignedAllocator;
template <Alignment Align>
class AlignedAllocator<void, Align>
{
public:
typedef void* pointer;
typedef const void* const_pointer;
typedef void value_type;
template <class U> struct rebind { typedef AlignedAllocator<U, Align> other; };
};
template <typename T, Alignment Align>
class AlignedAllocator
{
public:
typedef T value_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef T& reference;
typedef const T& const_reference;
typedef size_t size_type;
typedef ptrdiff_t difference_type;
typedef std::true_type propagate_on_container_move_assignment;
template <class U>
struct rebind { typedef AlignedAllocator<U, Align> other; };
public:
AlignedAllocator() noexcept
{}
template <class U>
AlignedAllocator(const AlignedAllocator<U, Align>&) noexcept
{}
size_type
max_size() const noexcept
{ return (size_type(~0) - size_type(Align)) / sizeof(T); }
pointer
address(reference x) const noexcept
{ return std::addressof(x); }
const_pointer
address(const_reference x) const noexcept
{ return std::addressof(x); }
pointer
allocate(size_type n, typename AlignedAllocator<void, Align>::const_pointer = 0)
{
const size_type alignment = static_cast<size_type>( Align );
void* ptr = detail::allocate_aligned_memory(alignment , n * sizeof(T));
if (ptr == nullptr) {
throw std::bad_alloc();
}
return reinterpret_cast<pointer>(ptr);
}
void
deallocate(pointer p, size_type) noexcept
{ return detail::deallocate_aligned_memory(p); }
template <class U, class ...Args>
void
construct(U* p, Args&&... args)
{ ::new(reinterpret_cast<void*>(p)) U(std::forward<Args>(args)...); }
void
destroy(pointer p)
{ p->~T(); }
};
template <typename T, Alignment Align>
class AlignedAllocator<const T, Align>
{
public:
typedef T value_type;
typedef const T* pointer;
typedef const T* const_pointer;
typedef const T& reference;
typedef const T& const_reference;
typedef size_t size_type;
typedef ptrdiff_t difference_type;
typedef std::true_type propagate_on_container_move_assignment;
template <class U>
struct rebind { typedef AlignedAllocator<U, Align> other; };
public:
AlignedAllocator() noexcept
{}
template <class U>
AlignedAllocator(const AlignedAllocator<U, Align>&) noexcept
{}
size_type
max_size() const noexcept
{ return (size_type(~0) - size_type(Align)) / sizeof(T); }
const_pointer
address(const_reference x) const noexcept
{ return std::addressof(x); }
pointer
allocate(size_type n, typename AlignedAllocator<void, Align>::const_pointer = 0)
{
const size_type alignment = static_cast<size_type>( Align );
void* ptr = detail::allocate_aligned_memory(alignment , n * sizeof(T));
if (ptr == nullptr) {
throw std::bad_alloc();
}
return reinterpret_cast<pointer>(ptr);
}
void
deallocate(pointer p, size_type) noexcept
{ return detail::deallocate_aligned_memory(p); }
template <class U, class ...Args>
void
construct(U* p, Args&&... args)
{ ::new(reinterpret_cast<void*>(p)) U(std::forward<Args>(args)...); }
void
destroy(pointer p)
{ p->~T(); }
};
template <typename T, Alignment TAlign, typename U, Alignment UAlign>
inline
bool
operator== (const AlignedAllocator<T,TAlign>&, const AlignedAllocator<U, UAlign>&) noexcept
{ return TAlign == UAlign; }
template <typename T, Alignment TAlign, typename U, Alignment UAlign>
inline
bool
operator!= (const AlignedAllocator<T,TAlign>&, const AlignedAllocator<U, UAlign>&) noexcept
{ return TAlign != UAlign; }
The implementation for the actual allocate calls is posix only but you can extent that easily.
void*
detail::allocate_aligned_memory(size_t align, size_t size)
{
assert(align >= sizeof(void*));
assert(nail::is_power_of_two(align));
if (size == 0) {
return nullptr;
}
void* ptr = nullptr;
int rc = posix_memalign(&ptr, align, size);
if (rc != 0) {
return nullptr;
}
return ptr;
}
void
detail::deallocate_aligned_memory(void *ptr) noexcept
{
return free(ptr);
}
Needs C++11, btw.
In the upcoming version 1.56, the Boost library will include Boost.Align. Among other memory alignment helpers it provides boost::alignment::aligned_allocator, which can be used a drop-in replacement for std::allocator and allows you to specify an alignment. See the documentation on https://boostorg.github.io/align/
Starting in C++17, just use std::vector<__m256i> or with any other aligned type. There's aligned version of operator new, it is used by std::allocator for aligned types (as well as by plain new-expression, so new __m256i[N] is also safe starting in C++17).
There's a comment by #MarcGlisse saying this, making this an answer to make it more visible.
Yes, it should be possible. If you put this question on google then you will get lots of sample code, below is some promising results:
https://bitbucket.org/marten/alignedallocator/wiki/Home
http://code.google.com/p/mastermind-strategy/source/browse/trunk/src/util/aligned_allocator.hpp?r=167
https://gist.github.com/1471329
I've started to code FixedAllocator class that allocates memory by chunks of fixed size and works as stack, so that it works in constant time to allocate/deallocate. Actually, I'll need this class to use it with std::vector, so that I have to implement all std::allocator methods.
Everything here is for learning purposes so that - I don't need any complete implementations or headers - the real ones have a lot of code over my problem.
And I got stuck on allocate/deallocate methods - I understand that I should somehow reserve some memory pool - for example using vector, I understand that I should use static_cast to convert char type into T-type, but I don't completely understand how to rebuild this two ideas into list. Deallocate takes pointer as argument, not TNode - that's maybe the main problem.
If someone already wrote this kind of allocator - answer with code will be perfect.
Any suggestions, links and other source of knowledge are welcome. Thank you.
Here is the skeleton of code:
template <typename T, unsigned int nodeSize>
class FixedAllocator : public std::allocator<T>{
private:
static size_t Used;
static const size_t MAX_SIZE = 100000;
struct TNode {
TNode* next;
char data[nodeSize];
};
TNode* head;
public:
typedef T* pointer;
typedef const T* const_pointer;
typedef T & reference;
typedef const T & const_reference;
typedef T value_type;
template <typename U> struct rebind { typedef allocator<U> other; };
FixedAllocator() {
if (Pool.empty()) {
Pool.resize(MAX_SIZE * sizeof(T));
Used = 0;
}
}
FixedAllocator(const FixedAllocator &) {
}
template<typename U>
FixedAllocator(const FixedAllocator<U> &) {
if (Pool.empty()) {
Pool.resize(MAX_SIZE * sizeof(T));
Used = 0;
}
}
pointer address(reference x) const {
return &x;
}
const_pointer address(const_reference x) const {
return &x;
}
pointer allocate(size_t n, FixedAllocator<void>::const_pointer = 0) {}
void deallocate(pointer, size_t) {}
size_t max_size() const throw() {
return MAX_SIZE - size;
}
void construct(pointer p, const_reference val) {
new (static_cast<void*>(p)) value_type(val);
}
void destroy(pointer p) {
p->~value_type();
}
};
Is it possible to make std::vector of custom structs allocate aligned memory for further processing with SIMD instructions? If it is possible to do with Allocator, does anyone happen to have such an allocator he could share?
Edit: I removed the inheritance of std::allocator as suggested by GManNickG and made the alignment parameter a compile time thing.
I recently wrote this piece of code. It's not tested as much as I would like it so go on and report errors. :-)
enum class Alignment : size_t
{
Normal = sizeof(void*),
SSE = 16,
AVX = 32,
};
namespace detail {
void* allocate_aligned_memory(size_t align, size_t size);
void deallocate_aligned_memory(void* ptr) noexcept;
}
template <typename T, Alignment Align = Alignment::AVX>
class AlignedAllocator;
template <Alignment Align>
class AlignedAllocator<void, Align>
{
public:
typedef void* pointer;
typedef const void* const_pointer;
typedef void value_type;
template <class U> struct rebind { typedef AlignedAllocator<U, Align> other; };
};
template <typename T, Alignment Align>
class AlignedAllocator
{
public:
typedef T value_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef T& reference;
typedef const T& const_reference;
typedef size_t size_type;
typedef ptrdiff_t difference_type;
typedef std::true_type propagate_on_container_move_assignment;
template <class U>
struct rebind { typedef AlignedAllocator<U, Align> other; };
public:
AlignedAllocator() noexcept
{}
template <class U>
AlignedAllocator(const AlignedAllocator<U, Align>&) noexcept
{}
size_type
max_size() const noexcept
{ return (size_type(~0) - size_type(Align)) / sizeof(T); }
pointer
address(reference x) const noexcept
{ return std::addressof(x); }
const_pointer
address(const_reference x) const noexcept
{ return std::addressof(x); }
pointer
allocate(size_type n, typename AlignedAllocator<void, Align>::const_pointer = 0)
{
const size_type alignment = static_cast<size_type>( Align );
void* ptr = detail::allocate_aligned_memory(alignment , n * sizeof(T));
if (ptr == nullptr) {
throw std::bad_alloc();
}
return reinterpret_cast<pointer>(ptr);
}
void
deallocate(pointer p, size_type) noexcept
{ return detail::deallocate_aligned_memory(p); }
template <class U, class ...Args>
void
construct(U* p, Args&&... args)
{ ::new(reinterpret_cast<void*>(p)) U(std::forward<Args>(args)...); }
void
destroy(pointer p)
{ p->~T(); }
};
template <typename T, Alignment Align>
class AlignedAllocator<const T, Align>
{
public:
typedef T value_type;
typedef const T* pointer;
typedef const T* const_pointer;
typedef const T& reference;
typedef const T& const_reference;
typedef size_t size_type;
typedef ptrdiff_t difference_type;
typedef std::true_type propagate_on_container_move_assignment;
template <class U>
struct rebind { typedef AlignedAllocator<U, Align> other; };
public:
AlignedAllocator() noexcept
{}
template <class U>
AlignedAllocator(const AlignedAllocator<U, Align>&) noexcept
{}
size_type
max_size() const noexcept
{ return (size_type(~0) - size_type(Align)) / sizeof(T); }
const_pointer
address(const_reference x) const noexcept
{ return std::addressof(x); }
pointer
allocate(size_type n, typename AlignedAllocator<void, Align>::const_pointer = 0)
{
const size_type alignment = static_cast<size_type>( Align );
void* ptr = detail::allocate_aligned_memory(alignment , n * sizeof(T));
if (ptr == nullptr) {
throw std::bad_alloc();
}
return reinterpret_cast<pointer>(ptr);
}
void
deallocate(pointer p, size_type) noexcept
{ return detail::deallocate_aligned_memory(p); }
template <class U, class ...Args>
void
construct(U* p, Args&&... args)
{ ::new(reinterpret_cast<void*>(p)) U(std::forward<Args>(args)...); }
void
destroy(pointer p)
{ p->~T(); }
};
template <typename T, Alignment TAlign, typename U, Alignment UAlign>
inline
bool
operator== (const AlignedAllocator<T,TAlign>&, const AlignedAllocator<U, UAlign>&) noexcept
{ return TAlign == UAlign; }
template <typename T, Alignment TAlign, typename U, Alignment UAlign>
inline
bool
operator!= (const AlignedAllocator<T,TAlign>&, const AlignedAllocator<U, UAlign>&) noexcept
{ return TAlign != UAlign; }
The implementation for the actual allocate calls is posix only but you can extent that easily.
void*
detail::allocate_aligned_memory(size_t align, size_t size)
{
assert(align >= sizeof(void*));
assert(nail::is_power_of_two(align));
if (size == 0) {
return nullptr;
}
void* ptr = nullptr;
int rc = posix_memalign(&ptr, align, size);
if (rc != 0) {
return nullptr;
}
return ptr;
}
void
detail::deallocate_aligned_memory(void *ptr) noexcept
{
return free(ptr);
}
Needs C++11, btw.
In the upcoming version 1.56, the Boost library will include Boost.Align. Among other memory alignment helpers it provides boost::alignment::aligned_allocator, which can be used a drop-in replacement for std::allocator and allows you to specify an alignment. See the documentation on https://boostorg.github.io/align/
Starting in C++17, just use std::vector<__m256i> or with any other aligned type. There's aligned version of operator new, it is used by std::allocator for aligned types (as well as by plain new-expression, so new __m256i[N] is also safe starting in C++17).
There's a comment by #MarcGlisse saying this, making this an answer to make it more visible.
Yes, it should be possible. If you put this question on google then you will get lots of sample code, below is some promising results:
https://bitbucket.org/marten/alignedallocator/wiki/Home
http://code.google.com/p/mastermind-strategy/source/browse/trunk/src/util/aligned_allocator.hpp?r=167
https://gist.github.com/1471329
Per suggestion from #BenVoigt in response to my question regarding stack allocated stringstream storage, I designed a stack_allocator (code follows below), and declared a basic_ostringstream type using it.
I am experiencing a strange bug though.
The first character I place into the stream is omitted when I print the resulting string!
Here is an example:
template<typename T, size_t capacity, size_t arr_size>
__thread bool stack_allocator<T, capacity, arr_size>::_used[arr_size] = {};
template<typename T, size_t capacity, size_t arr_size>
__thread T stack_allocator<T, capacity, arr_size>::_buf[capacity][arr_size] = {};
typedef std::basic_ostringstream<char,
std::char_traits<char>,
stack_allocator<char, 1024, 5> > stack_ostringstream;
int main()
{
stack_ostringstream _os;
_os << "hello world";
std::cout << _os.str() << std::endl;
return 0;
}
The resulting output is:
ello world
Can anyone elaborate on what is happening to the first character?
The stack_allocator impl follows: It's pretty simplistic, and I'm sure has lots of room for improvement (not withstanding fixing the bug!)
#include <cstddef>
#include <limits>
#include <bits/allocator.h>
template<typename T, size_t capacity = 1024, size_t arr_size = 5>
class stack_allocator
{
public:
typedef T value_type;
typedef value_type* pointer;
typedef const value_type* const_pointer;
typedef value_type& reference;
typedef const value_type& const_reference;
typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;
inline explicit stack_allocator() { }
template<typename U>
inline explicit stack_allocator(const stack_allocator<U, capacity, arr_size>& that) { }
inline ~stack_allocator() {}
template<typename U>
struct rebind
{
typedef stack_allocator<U, capacity, arr_size> other;
};
inline pointer allocate(size_type cnt, typename std::allocator<void>::const_pointer = 0)
{
if (cnt > capacity)
return reinterpret_cast<pointer>(::operator new(cnt * sizeof (T)));
for (size_t i = 0; i < arr_size; ++i)
{
if (!_used[i])
{
_used[i] = true;
return reinterpret_cast<pointer>(_buf[i]);
}
}
}
inline void deallocate(pointer p, size_type)
{
for (size_t i = 0; i < arr_size; ++i)
{
if (p != _buf[i])
continue;
_used[i] = false;
return;
}
::operator delete(p);
}
inline pointer address(reference r) { return &r; }
inline const_pointer address(const_reference r) { return &r; }
inline size_type max_size() const
{
return std::numeric_limits<size_type>::max() / sizeof(T);
}
inline void construct(pointer p, const T& t) { new(p) T(t); }
inline void destroy(pointer p) { p->~T(); }
inline bool operator==(const stack_allocator&) const { return true; }
inline bool operator!=(const stack_allocator& a) const { return !operator==(a); }
private:
static __thread bool _used[arr_size];
static __thread T _buf[capacity][arr_size];
};
Your allocate function can fall off the end if you allocate more than arr_size items. If you use g++ -Wall it will warn you about those sorts of things.
The other problem is that your _buf array indexes are backwards. It should be static T _buf[arr_size][capacity]; which has the arr_size as the row, not the other order that you have it in the original code which makes the capacity be the first index.
Also as a side note, just avoid identifiers that start with leading _ because some such identifiers are reserved for the implementation and it's easier to never use them than to remember the precise rules. Finally, never include the bits/ headers directly, just use the real headers. In this case, memory. I also had to add includes for <iostream> and <sstream> to get it to compile.