efficent way to save objects into binary files - c++

I've a class that consists basically of a matrix of vectors: vector< MyFeatVector<T> > m_vCells, where the outer vector represents the matrix. Each element in this matrix is then a vector (I extended the stl vector class and named it MyFeatVector<T>).
I'm trying to code an efficient method to store objects of this class in binary files.
Up to now, I require three nested loops:
foutput.write( reinterpret_cast<char*>( &(this->at(dy,dx,dz)) ), sizeof(T) );
where this->at(dy,dx,dz) retrieves the dz element of the vector at position [dy,dx].
Is there any possibility to store the m_vCells private member without using loops? I tried something like: foutput.write(reinterpret_cast<char*>(&(this->m_vCells[0])), (this->m_vCells.size())*sizeof(CFeatureVector<T>)); which seems not to work correctly. We can assume that all the vectors in this matrix have the same size, although a more general solution is also welcomed :-)
Furthermore, following my nested-loop implementation, storing objects of this class in binary files seem to require more physical space than storing the same objects in plain-text files. Which is a bit weird.
I was trying to follow the suggestion under http://forum.allaboutcircuits.com/showthread.php?t=16465 but couldn't arrive into a proper solution.
Thanks!
Below a simplified example of my serialization and unserialization methods.
template < typename T >
bool MyFeatMatrix<T>::writeBinary( const string & ofile ){
ofstream foutput(ofile.c_str(), ios::out|ios::binary);
foutput.write(reinterpret_cast<char*>(&this->m_nHeight), sizeof(int));
foutput.write(reinterpret_cast<char*>(&this->m_nWidth), sizeof(int));
foutput.write(reinterpret_cast<char*>(&this->m_nDepth), sizeof(int));
//foutput.write(reinterpret_cast<char*>(&(this->m_vCells[0])), nSze*sizeof(CFeatureVector<T>));
for(register int dy=0; dy < this->m_nHeight; dy++){
for(register int dx=0; dx < this->m_nWidth; dx++){
for(register int dz=0; dz < this->m_nDepth; dz++){
foutput.write( reinterpret_cast<char*>( &(this->at(dy,dx,dz)) ), sizeof(T) );
}
}
}
foutput.close();
return true;
}
template < typename T >
bool MyFeatMatrix<T>::readBinary( const string & ifile ){
ifstream finput(ifile.c_str(), ios::in|ios::binary);
int nHeight, nWidth, nDepth;
finput.read(reinterpret_cast<char*>(&nHeight), sizeof(int));
finput.read(reinterpret_cast<char*>(&nWidth), sizeof(int));
finput.read(reinterpret_cast<char*>(&nDepth), sizeof(int));
this->resize(nHeight, nWidth, nDepth);
for(register int dy=0; dy < this->m_nHeight; dy++){
for(register int dx=0; dx < this->m_nWidth; dx++){
for(register int dz=0; dz < this->m_nDepth; dz++){
finput.read( reinterpret_cast<char*>( &(this->at(dy,dx,dz)) ), sizeof(T) );
}
}
}
finput.close();
return true;
}

A most efficient method is to store the objects into an array (or contiguous space), then blast the buffer to the file. An advantage is that the disk platters don't have waste time ramping up and also the writing can be performed contiguously instead of in random locations.
If this is your performance bottleneck, you may want to consider using multiple threads, one extra thread to handle the output. Dump the objects into a buffer, set a flag, then the writing thread will handle the output, releaving your main task to perform more important tasks.
Edit 1: Serializing Example
The following code has not been compiled and is for illustrative purposes only.
#include <fstream>
#include <algorithm>
using std::ofstream;
using std::fill;
class binary_stream_interface
{
virtual void load_from_buffer(const unsigned char *& buf_ptr) = 0;
virtual size_t size_on_stream(void) const = 0;
virtual void store_to_buffer(unsigned char *& buf_ptr) const = 0;
};
struct Pet
: public binary_stream_interface,
max_name_length(32)
{
std::string name;
unsigned int age;
const unsigned int max_name_length;
void load_from_buffer(const unsigned char *& buf_ptr)
{
age = *((unsigned int *) buf_ptr);
buf_ptr += sizeof(unsigned int);
name = std::string((char *) buf_ptr);
buf_ptr += max_name_length;
return;
}
size_t size_on_stream(void) const
{
return sizeof(unsigned int) + max_name_length;
}
void store_to_buffer(unsigned char *& buf_ptr) const
{
*((unsigned int *) buf_ptr) = age;
buf_ptr += sizeof(unsigned int);
std::fill(buf_ptr, 0, max_name_length);
strncpy((char *) buf_ptr, name.c_str(), max_name_length);
buf_ptr += max_name_length;
return;
}
};
int main(void)
{
Pet dog;
dog.name = "Fido";
dog.age = 5;
ofstream data_file("pet_data.bin", std::ios::binary);
// Determine size of buffer
size_t buffer_size = dog.size_on_stream();
// Allocate the buffer
unsigned char * buffer = new unsigned char [buffer_size];
unsigned char * buf_ptr = buffer;
// Write / store the object into the buffer.
dog.store_to_buffer(buf_ptr);
// Write the buffer to the file / stream.
data_file.write((char *) buffer, buffer_size);
data_file.close();
delete [] buffer;
return 0;
}
Edit 2: A class with a vector of strings
class Many_Strings
: public binary_stream_interface
{
enum {MAX_STRING_SIZE = 32};
size_t size_on_stream(void) const
{
return m_string_container.size() * MAX_STRING_SIZE // Total size of strings.
+ sizeof(size_t); // with room for the quantity variable.
}
void store_to_buffer(unsigned char *& buf_ptr) const
{
// Treat the vector<string> as a variable length field.
// Store the quantity of strings into the buffer,
// followed by the content.
size_t string_quantity = m_string_container.size();
*((size_t *) buf_ptr) = string_quantity;
buf_ptr += sizeof(size_t);
for (size_t i = 0; i < string_quantity; ++i)
{
// Each string is a fixed length field.
// Pad with '\0' first, then copy the data.
std::fill((char *)buf_ptr, 0, MAX_STRING_SIZE);
strncpy(buf_ptr, m_string_container[i].c_str(), MAX_STRING_SIZE);
buf_ptr += MAX_STRING_SIZE;
}
}
void load_from_buffer(const unsigned char *& buf_ptr)
{
// The actual coding is left as an exercise for the reader.
// Psuedo code:
// Clear / empty the string container.
// load the quantity variable.
// increment the buffer variable by the size of the quantity variable.
// for each new string (up to the quantity just read)
// load a temporary string from the buffer via buffer pointer.
// push the temporary string into the vector
// increment the buffer pointer by the MAX_STRING_SIZE.
// end-for
}
std::vector<std::string> m_string_container;
};

I'd suggest you to read C++ FAQ on Serialization and you can choose what best fits for your
When you're working with structures and classes, you've to take care of two things
Pointers inside the class
Padding bytes
Both of these could make some notorious results in your output. IMO, the object must implement to serialize and de-serialize the object. The object can know well about the structures, pointers data etc. So it can decide which format can be implemented efficiently.
You will have to iterate anyway or has to wrap it somewhere. Once you finished implementing the serialization and de-serialization function (either you can write using operators or functions). Especially when you're working with stream objects, overloading << and >> operators would be easy to pass the object.
Regarding your question about using underlying pointers of vector, it might work if it's a single vector. But it's not a good idea in the other way.
Update according to the question update.
There are few things you should mind before overriding STL members. They're not really a good candidate for inheritance because it doesn't have any virtual destructors. If you're using basic data types and POD like structures it wont make much issues. But if you use it truly object oriented way, you may face some unpleasant behavior.
Regarding your code
Why you're typecasting it to char*?
The way you serialize the object is your choice. IMO what you did is a basic file write operation in the name of serialization.
Serialization is down to the object. i.e the parameter 'T' in your template class. If you're using POD, or basic types no need of special synchronization. Otherwise you've to carefully choose the way to write the object.
Choosing text format or binary format is your choice. Text format has always has a cost at the same time it's easy to manipulate it rather than binary format.
For example the following code is for simple read and write operation( in text format).
fstream fr("test.txt", ios_base::out | ios_base::binary );
for( int i =0;i <_countof(arr);i++)
fr << arr[i] << ' ';
fr.close();
fstream fw("test.txt", ios_base::in| ios_base::binary);
int j = 0;
while( fw.eof() || j < _countof(arrout))
{
fw >> arrout[j++];
}

It seems to me, that the most direct root to generate a binary file containing a vector is to memory map the file and place it in the mapped region. As pointed out by sarat, you need to worry about how pointers are used within the class. But, boost-interprocess library has a tutorial on how to do this using their shared memory regions which include memory mapped files.

First off, have you looked at Boost.multi_array? Always good to take something ready-made rather than reinventing the wheel.
That said, I'm not sure if this is helpful, but here's how I would implement the basic data structure, and it'd be fairly easy to serialize:
#include <array>
template <typename T, size_t DIM1, size_t DIM2, size_t DIM3>
class ThreeDArray
{
typedef std::array<T, DIM1 * DIM2 * DIM3> array_t;
array_t m_data;
public:
inline size_t size() const { return data.size(); }
inline size_t byte_size() const { return sizeof(T) * data.size(); }
inline T & operator()(size_t i, size_t j, size_t k)
{
return m_data[i + j * DIM1 + k * DIM1 * DIM2];
}
inline const T & operator()(size_t i, size_t j, size_t k) const
{
return m_data[i + j * DIM1 + k * DIM1 * DIM2];
}
inline const T * data() const { return m_data.data(); }
};
You can serialize the data buffer directly:
ThreeDArray<int, 4, 6 11> arr;
/* ... */
std::ofstream outfile("file.bin");
outfile.write(reinterpret_cast<char*>(arr.data()), arr.byte_size());

Related

Standard way of overlay flexible array member

So the server sends the data just as packed structures, so what only need to decode is to overlay the structure pointer on the buffer. However one of the structure is a dynamic array kind of data, but I learned that flexible array member is not a C++ standard feature. How can I do it in standard C++ way, but without copying like a vector?
// on wire format: | field a | length | length of struct b |
// the sturcts are defined packed
__pragma(pack(1))
struct B {
//...
};
struct Msg {
int32_t a;
uint32_t length;
B *data; // how to declare this?
};
__pragma(pack())
char *buf = readIO();
// overlay, without copy and assignments of each field
const Msg *m = reinterpret_cast<const Msg *>(buf);
// access m->data[i] from 0 to length
The common way to do this in C was to declare data as an array of length one as the last struct member. You then allocate the space needed as if the array was larger.
Seems to work fine in C++ as well. You should perhaps wrap access to the data in a span or equivalent, so the implementation details don't leak outside your class.
#include <string>
#include <span>
struct B {
float x;
float y;
};
struct Msg {
int a;
std::size_t length;
B data[1];
};
char* readIO()
{
constexpr int numData = 3;
char* out = new char[sizeof(Msg) + sizeof(B) * (numData - 1)];
return out;
}
int main(){
char *buf = readIO();
// overlay, without copy and assignments of each field
const Msg *m = reinterpret_cast<const Msg *>(buf);
// access m->data[i] from 0 to length
std::span<const B> data(m->data, m->length);
for(auto& b: data)
{
// do something
}
return 0;
}
https://godbolt.org/z/EoMbeE8or
A standard solution is to not represent the array as a member of the message, but rather as a separate object.
struct Msg {
int a;
size_t length;
};
const Msg& m = *reinterpret_cast<const Msg*>(buf);
span<const B> data = {
reinterpret_cast<const B*>(buf + sizeof(Msg)),
m.length,
};
Note that reinterpretation / copying of bytes is not portable between systems with different representations (byte endianness, integer sizes, alignments, subobject packing etc.), and same representation is typically not something that can be assumed in network communication.
// on wire format: | field a | length | length of struct b |
You can't overlay the struct, because you can't guarantee that the binary representation of Msg will match the on wire format. Also int is at least 16 bits, can be any number of bits greater than 16, and size_t has various size depending on architecture.
Write actual accessors to the data. Use fixed width integer types. It will only work if the data actually point to a properly aligned region. This method allows you to write assertions and throw exceptions when stuff goes bad (for example, you can throw on out-of-bounds access to the array).
struct Msg {
constexpr static size_t your_required_alignment = alingof(uint32_t);
char *buf;
Msg (char *buf) : buf(buf) {
assert((uintptr_t)buf % your_required_alignment == 0);
}
int32_t& get_a() { return *reinterpret_cast<int32_t*>(buf); }
uint32_t& length() { return *reinterpret_cast<uint32_t *>(buf + sizeof(int32_t)); }
struct Barray {
char *buf;
Barray(char *buf) : buf(buf) {}
int16_t &operator[](size_t idx) {
return *reinterpret_cast<int16_t*>(buf + idx * sizeof(int16_t));
}
}
Barray data() {
return buf + sizeof(int32_t) + sizoef(uint32_t);
}
};
int main() {
Msg msg(readIO());
std::cout << msg.a() << msg.length();
msg.data()[1] = 5;
// or maybe even implement straight operator[]:
// msg[1] = 5;
}
If the data do not point to a properly aligned region, you have to copy the data, there is no possibility to access them using other types then char.

arithmetic std::unique_ptr and void* (get position of element by address)

I have a class:
Allocate blockSize*maxSize bytes of memory. Has a method which return ptr to free block of memory.
I fill this block in main() for example (see usage below) and send it back to my class.
PROBLEM: How can I get position of sent me back address of initialized data? Because in main() I have void* ptr, not std::unique_ptr, and arithmetic with method memoryPool.get() not possible to use.
class A {
private:
size_t maxBlocks;
size_t blockSize;
std::unique_ptr<unsigned char[]> memoryPool;
void *getObjPtr(const size_t pos) const {
return reinterpret_cast<void *>(memoryPool.get() + pos * blockSize);
}
public:
A(size_t blockSize, size_t maxBlocks) : blockSize(blockSize), maxBlocks(maxBlocks),
memoryPool(new unsigned char[maxBlocks * blockSize]) {}
void *getFree() {
for (size_t i = 0; i < maxBlocks; ++i) {
//check if this block not use (I cut this part)
return getObjPtr(i);
}
}
size_t getPosition(void *data) {
//how can I get position of element?
// auto pos = ((char*)data - memoryPool.get()) / blockSize; - not works
// ok there should be C++ style reinterpret_cast, but to short code I skip it
}
}
Example of usage:
int main() {
A queue(sizeof(int), 10);
int *a = static_cast<int *>(queue.getFree());
*a = 4;
auto pos = queue.getPosition(a);//want to get position
}
What is proper way to do it? Without using std::unique_ptr in main?
When I compile your code with Visual C++ 2019, I get this error:
error C2440: '-': cannot convert from 'unsigned char *' to 'char *'
If I change your code to cast to unsigned char* as per the error message, then it compiles:
auto pos = ((unsigned char*)data - memoryPool.get()) / blockSize;
Whether that does what you intend - well, it appears to, but you haven't clearly specified what getPosition does, so I can only guess.
Please post the error message in the future, not just say it doesn't work! It'll help us help you.

Pointer type casting altering unintended memory

#define ARRAY_SIZE 20
float DataSource[ARRAY_SIZE];
void Read(unsigned char const *Source, unsigned char *Destination, unsigned long DataSize)
{
for ( unsigned long i = 0; i < DataSize; i++)
{
*(Destination + i*DataSize) = *(Source + i*DataSize);
}
}
void fun()
{
int Index;
float Dest;
for ( Index = 0; Index < ARRAY_SIZE; Index++ )
{
Read((unsigned char *)&DataSource[Index], (unsigned char *)&Dest, sizeof(DataSource[Index]));
}
}
I'm having an issue with the above code where upon calling Read(), my Index variable gets overwritten and I am certain the ugly pointer casting is the culprit, but I'm having trouble understanding exactly what is happening here.
The unsigned char pointer types are mandatory because the above code is intended to simulate some driver level software and maintain the same prototype.
Can someone help me to understand the issue here? All the above code is changeable except for the prototype of Read().
The error is here:
for ( unsigned long i = 0; i < DataSize; i++)
{
// vvvvvvvvvv vvvvvvvvvv
*(Destination + i*DataSize) = *(Source + i*DataSize);
}
i * DataSize is always greater than i => "out of bound" access.
Replace with:
for ( unsigned long i = 0; i < DataSize; i++)
{
*(Destination + i) = *(Source + i);
}
You pass in a single float's address to Read (&Dest) and then proceed to write many valuese to consecutive memory locations. Since you're writing random memory at that point it's not unlikely that it could have overwritten index (and other stuff) because stacks usually grow downwards.
This is wrong:
*(Destination + i*DataSize) = *(Source + i*DataSize);
You want to copy DataSize adjacent bytes, not bytes DataSize apart (total span DataSize*DataSize)
Just say
Destination[i] = Source[i];
An amusing (to me) C++ way.
template<typename Data>
struct MemBlockRefHelper {
typedef Data value_type;
Data* data;
size_t size;
MemBlockRefHelper( Data* d, size_t s):data(d), size(s) {}
template<typename Target, typename Other=typename Target::value_type>
Target& Assign( MemBlockRefHelper<Other> const& other ) {
Assert(size == other.size);
for (size_t i = 0; i < size; ++i) {
if (i < other.size) {
data[i] = other.data[i];
} else {
data[i] = 0;
}
}
Target* self = static_cast<Target*>(this);
return *self;
}
};
struct MemBlockRef;
struct MemBlockCRef:MemBlockRefHelper<const unsigned char> {
MemBlockCRef( const unsigned char* d, size_t s ):MemBlockRefHelper<const unsigned char>( d, s ) {}
MemBlockCRef( const MemBlockRef& other );
};
struct MemBlockRef:MemBlockRefHelper<unsigned char> {
MemBlockRef( unsigned char* d, size_t s ):MemBlockRefHelper<unsigned char>( d, s ) {}
MemBlockRef& operator=( MemBlockRef const& other ) {
return Assign< MemBlockRef >( other );
}
MemBlockRef& operator=( MemBlockCRef const& other ) {
return Assign< MemBlockRef, const unsigned char >( other );
}
};
inline MemBlockCRef::MemBlockCRef( const MemBlockRef& other ): MemBlockRefHelper<const unsigned char>( other.data, other.size ) {}
void Read( unsigned char const* Source, unsigned char* Dest, unsigned long DataSize ) {
MemBlockCRef src( Source, DataSize );
MemBlockRef dest( Dest, DataSize );
dest = src;
}
massively over engineered, but the idea is to wrap up the idea of a block of POD memory of a certain size, and provide reference semantics to its contents (initialization is creating a new reference to the same data, assignment does a copy over the referred to data).
Once you have such classes, the code for Read becomes a 3 liner. Well, you can do it in one:
MemBlockRef( Dest, DataSize ) = MemBlockCRef( Source, DataSize );
but that is needless.
Well, so it this entire framework.
But I was amused by writing it.
Let's take a closer look at your Read(): i changes from 0 to DataSize-1; each time you access memory by an offset of i*DataSize... that is, by an offset from 0 to DataSize*(DataSize-1). Looks wrong, as DataSize**2-DataSize makes no sense.
Unlike other answers, I don't want to guess what you wanted. Just showing a kind of "dimensional analysis" that can help spotting the wrongest part of code without reading the author's mind.
You are treating the scalar variable Dest declared inside fun() as an array inside Read(). It seems that both Dest and your Index variable are placed adjacent on the stack which explains that Index gets overwritten exactly when the loop inside Read() is executed for i==1.
So the solution is: declare Dest as an array, too:
float Dest[ARRAY_SIZE];

How to cast from char pointer to custom object pointer

I'm using leveldb to store key-value pairs of integer and MyClass objects. Actually, a key can contain more then one of theses objects.
The problem I have appears when retrieving the data from the database. It compiles, however the values of the MyClass members are not the one I put into the database.
std::string value;
leveldb::Slice keySlice = ANYKEY;
levelDBObj->Get(leveldb::ReadOptions(), keySlice, &value);
The std::string value1 can now contain only one MyClass object or more. So how do I get them?
I already tried the following which didn't work;
1.) directly typecasting and memcpy
std::vector<MyClass> vObjects;
MyClass* obj = (MyClass*)malloc( value.size());
memcpy((void*)obj, (void*) (value.c_str()), value.size());
MyClass dummyObj;
int numValues = value.size()/sizeof(MyClass);
for( int i=0; i<numValues; ++i) {
dummyObj = *(obj+i);
vObjects.push_back(dummyObj);
}
2.) reinterpret_cast to void pointer
MyClass* obj = (MyClass*)malloc( value.size());
const void* vobj = reinterpret_cast<const void*>( value.c_str() );
int numValues = value.size()/sizeof(MyClass);
for( int i=0; i<numValues; ++i) {
const MyClass dummyObj = *(reinterpret_cast<const MyClass*>(vobj)+i);
vObjects.push_back(dummyObj);
}
MyClass is a collection of several public members, e.g. unsigned int and unsigned char and it has a stable size.
I know that there are similar problems with only one object. But in my case the vector can contain more then one and it comes from the leveldb database.
EDIT: SOLUTION
I wrote (de)serialization method for MyClass which then made it working. Thanks for the hint!
void MyClass::serialize( char* outBuff ) {
memcpy(outBuff, (const void*) &aVar, sizeof(aVar));
unsigned int c = sizeof(aVar);
memcpy(outBuff+c, (const void*) &bVar, sizeof(bVar));
c += sizeof(bVAr);
/* and so on */
}
void MyClass::deserialize( const char* inBuff ) {
memcpy((void*) &aVar, inBuff, sizeof(aVar));
unsigned int c = sizeof(aVar);
memcpy((void*) &aVar, inBuff+c, sizeof(aVar));
c += sizeof(aVar);
/* and so on */
}
The get method is as follows (put analogously):
int getValues(leveldb::Slice keySlice, std::vector<MyObj>& values) const {
std::string value;
leveldb::Status status = levelDBObj->Get(leveldb::ReadOptions(), keySlice, &value);
if (!status.ok()) {
values.clear();
return -1;
}
int nValues = value1.size()/sizeof(CHit);
MyObj dummyObj;
for( int i=0; i<nValues; ++i) {
dummyObj.deserialize(value.c_str()+i*sizeof(MyObj));
values.push_back(dummyObj);
}
return 0;
}
You have to serialize your class... otherwise, you're just taking some memory and writing it in leveldb. Whatever you get back is not only going to be different, but it will probably be completely useless too. Check out this question for more info on serialization: How do you serialize an object in C++?
LevelDB does support multiple objects under one key, however, try to avoid doing that unless you have a really good reason. I would recommend that you hash each object with a unique hash (see Google's CityHash if you want a hashing function) and store the serialized objects with their corresponding hash. If your objects is a collection in itself, then you have to serialize all of your objects to an array of bytes and have some method that allows you to determine where each object begins/ends.
Update
A serializable class would look something like this:
class MyClass
{
private:
int _numeric;
string _text;
public:
// constructors
// mutators
void SetNumeric(int num);
void SetText(string text);
static unsigned int SerializableSize()
{
// returns the serializable size of the class with the schema:
// 4 bytes for the numeric (integer)
// 4 bytes for the unsigned int (the size of the text)
// n bytes for the text (it has a variable size)
return sizeof(int) + sizeof(unsigned int) + _text.size();
}
// serialization
int Serialize(const char* buffer, const unsigned int bufferLen, const unsigned int position)
{
// check if the object can be serialized in the available buffer space
if(position+SerializableSize()>bufferLen)
{
// don't write anything and return -1 signaling that there was an error
return -1;
}
unsigned int finalPosition = position;
// write the numeric value
*(int*)(buffer + finalPosition) = _numeric;
// move the final position past the numeric value
finalPosition += sizeof(int);
// write the size of the text
*(unsigned int*)(buffer + finalPosition) = (unsigned int)_text.size();
// move the final position past the size of the string
finalPosition += sizeof(unsigned int);
// write the string
memcpy((void*)(buffer+finalPosition), _text.c_str(), (unsigned int)_text.size());
// move the final position past the end of the string
finalPosition += (unsigned int)_text.size();
// return the number of bytes written to the buffer
return finalPosition-position;
}
// deserialization
static int Deserialize(MyClass& myObject,
const char* buffer,
const unsigned int buffSize,
const unsigned int position)
{
insigned int currPosition = position;
// copy the numeric value
int numeric = *(int*)(buffer + currentPosition);
// increment the current position past the numeric value
currentPosition += sizeof(int);
// copy the size of the text
unsigned int textSize = *(unsigned int*)(buffer + currentPosition);
// increment the current position past the size of the text
currentPosition += sizeof(unsigned int);
// copy the text
string text((buffer+currentPosition), textSize);
if(currentPosition > buffSize)
{
// you decide what to do here
}
// Set your object's values
myObject.SetNumeric(numeric);
myObject.SetText(text);
// return the number of bytes deserialized
return currentPosition - position;
}
};

boost::dynamic_bitset concat performance

I want to concat a big bitset with a smaller one in a way that wont kill performance. Currently my application spends 20% of cpu time in just the following code:
boost::dynamic_bitset<> encode(const std::vector<char>& data)
{
boost::dynamic_bitset<> result;
std::for_each(data.begin(), data.end(), [&](unsigned char symbol)
{
for(size_t n = 0; n < codes_[symbol].size(); ++n)
result.push_back(codes_[symbol][n]); // codes_[symbol][n].size() avarage ~5 bits
});
return result;
}
I have read this post which proposes a solution, which unfortunately will not work for me as the size difference between the sizes of destination bitset and the source bitset is very large.
Any ideas?
If this is not possible to do efficiently with boost::dynamic_bitset then I'm open for other suggestions.
This is because you keep using push_back(), but in actual fact, you already know the size in advance. This means lots of redundant copying and reallocating. You should resize it first. In addition, you don't have to push_back() every value- it should be possible for you to use some form of insert() (I don't actually know it's exact interface, but I think append() is the name) to insert the whole target vector at once, which should be significantly better.
In addition, you're leaving the dynamic_bitset as unsigned long, but as far as I can see, you're only actually inserting unsigned char into it. Changing that could make life easier for you.
I'm also curious as to what type codes_ is- if it's a map you could replace it with a vector, or infact since it's statically sized maximally (256 entries is the max of an unsigned char) , a static array.
I've tried using boost bitset in performance code before and been disappointed. I dug into it a bit, and concluded I'd be better off implementing my own bit-buffer class, although I forget the details of what convinced me boost's class was never going to be fast (I did get as far as inspecting the assembly produced).
I still don't know what the fastest way of building bit-buffers/bitsets/bitstreams or whatever you want to call them is. A colleague is trying to find out with this related question, but at time of writing it's still awaiting a good answer.
I wrote my own bitset class. I appreciate any suggestions for improvements. I will try to look into SSE and see if there is anything useful there.
With my very rough benchmark I got a 11x performance increase while appending 6 bits at a time.
class fast_bitset
{
public:
typedef unsigned long block_type;
static const size_t bits_per_block = sizeof(block_type)*8;
fast_bitset()
: is_open_(true)
, blocks_(1)
, space_(blocks_.size()*bits_per_block){}
void append(const fast_bitset& other)
{
assert(!other.is_open_);
for(size_t n = 0; n < other.blocks_.size()-1; ++n)
append(other.blocks_[n], bits_per_block);
append(other.blocks_.back() >> other.space_, bits_per_block - other.space_);
}
void append(block_type value, size_t n_bits)
{
assert(is_open_);
assert(n_bits < bits_per_block);
if(space_ < n_bits)
{
blocks_.back() = blocks_.back() << space_;
blocks_.back() = blocks_.back() | (value >> (n_bits - space_));
blocks_.push_back(value);
space_ = bits_per_block - (n_bits - space_);
}
else
{
blocks_.back() = blocks_.back() << n_bits;
blocks_.back() = blocks_.back() | value;
space_ -= n_bits;
}
}
void push_back(bool bit)
{
append(bit, 1);
}
bool operator[](size_t index) const
{
assert(!is_open_);
static const size_t high_bit = 1 << (bits_per_block-1);
const size_t block_index = index / bits_per_block;
const size_t bit_index = index % bits_per_block;
const size_t bit_mask = high_bit >> bit_index;
return blocks_[block_index] & bit_mask;
}
void close()
{
blocks_.back() = blocks_.back() << space_;
is_open_ = false;
}
size_t size() const
{
return blocks_.size()*bits_per_block-space_;
}
const std::vector<block_type>& blocks() const {return blocks_;}
class reader
{
public:
reader(const fast_bitset& bitset)
: bitset_(bitset)
, index_(0)
, size_(bitset.size()){}
bool next_bit(){return bitset_[index_++];}
bool eof() const{return index_ >= size_;}
private:
const fast_bitset& bitset_;
size_t index_;
size_t size_;
};
private:
bool is_open_;
std::vector<block_type> blocks_;
size_t space_;
};