I am implementing a MidiReader
And it need me to read weather MSB First or LSB First UInts(8, 16, 32 or 64).
I know little about binary and types so I'm currently copying other's code from C#.
class ByteArrayReader
{
public:
unsigned char* ByteArray;
unsigned int Size;
unsigned int Index = 0;
ByteArrayReader(unsigned char* byteArray)
{
if (byteArray == NULL)
{
throw byteArray;
}
ByteArray = byteArray;
Size = (unsigned int)sizeof(byteArray);
Index = 0;
}
char inline Read()
{
return ByteArray[Index++];
}
void inline Forward(unsigned int length = 1)
{
Index += length;
}
void inline Backward(unsigned int length = 1)
{
if (length > Index)
{
throw length;
}
Index -= length;
}
bool operator==(ByteArrayReader) = delete;
};
These are what I copied:
uint16_t inline ReadUInt16()
{
return (uint16_t)((Read() << 8) | Read());
}
uint32_t inline ReadUInt32()
{
return (uint32_t)((((((Read() << 8) | Read()) << 8) | Read()) << 8) | Read());
}
But it's said that one of it reads MSB First UInt. So I want to ask how to read UInt types from binaries elegantly, also learning how uint is represented in bytes.
The part
(uint32_t)((((((Read() << 8) | Read()) << 8) | Read()) << 8) | Read());
is undefined behavior because each call to Read method increments a counter called Index and there is no strict order of computation of them by compiler.
It would be better if they were computed in order like this:
auto chunk1 = Read(); // Index=x
auto chunk2 = Read(); // Index=x+1
auto chunk3 = Read(); // Index=x+2
...
auto result = chunk1 << 8 | chunk2<<8 ...
to be sure incrementations are happening in order.
Order of bytes is different between little-endian and big-endian systems. Here it is asked: Detecting endianness programmatically in a C++ program
Try this:
uint32_t inline ReadUInt32MSBfirst()
{
auto b1 = Read();
auto b2 = Read();
auto b3 = Read();
auto b4 = Read();
return (uint32_t)((b1 << 24) | (b2 << 16) | (b3 << 8) | b4);
}
uint32_t inline ReadUInt32LSBfirst()
{
auto b1 = Read();
auto b2 = Read();
auto b3 = Read();
auto b4 = Read();
return (uint32_t)(b1 | (b2 << 8) | (b3 << 16) | (b4 << 24));
}
I have been working on a program that can connect to a Minecraft Server and exchange packets with it but Minecraft's server packets heavily rely on signed VarInts. On the site documenting how their communication works is explanation of VarInt and even an example implementation of them in Java:
So i followed it and it works for non negative numbers but for negative numbers it's just goes infinitely long. I serched for other means of implementing it but i couldn't find any.
Here is my version it (works as i said only for positive numbers which is not fully what i want)
class VarInt
{
public:
char* data = NULL;
int length = 0;
int Read();
void Write(int value);
~VarInt();
private:
int SEGMENT_BIT_MASK = 0x7F;
int CONTINUE_BIT_MASK = 0x80;
};
int VarInt::Read()
{
int value = 0;
int position = 0;
byte currentByte;
int i = 0;
while (true)
{
currentByte = data[i];
value |= (currentByte & SEGMENT_BIT_MASK) << position;
if ((currentByte & CONTINUE_BIT_MASK) == 0)
break;
i++;
position += 7;
if (position >= 32)
std::cout << "VarInt is too Big" << std::endl;
}
return value;
};
void VarInt::Write(int value)
{
bool state = true;
std::vector<byte> bytes;
while (state)
{
if ((value & ~SEGMENT_BIT_MASK) == 0)
{
bytes.push_back(value);
state = false;
break;
}
bytes.push_back(((value & SEGMENT_BIT_MASK) | CONTINUE_BIT_MASK));
value >>= 7;
}
int bytes_size = bytes.size();
length = bytes_size;
data = (char*)malloc(bytes_size);
int i = 0;
while (i != bytes_size)
{
data[i] = bytes.at(i);
i++;
}
};
VarInt::~VarInt()
{
};
And here are my means of testing it:
#include <iostream>
#include "MDatatypes.h"
int main()
{
ndt::VarInt test;
//Sets value of test to -127
test.Write(-127);
//Sets value of test2 to 255
ndt::VarInt test2;
test2.Write(255);
//Outputing length of each Varint in bytes
std::cout << test.length << '|' << test2.length << std::endl;
//Outputing the values of each Varint
std::cout << test.Read() << '|' << test2.Read() << std::endl;
}
I have write the code to save some data with structure like CIOParams_b_t (see declaration) to file and load them. Number fields (>1 bits) load from file successfully, however boolean fields (1 bit) load with mistakes, maybe rendomely.
save data procedure:
//...
std::ofstream file;
file.open(path, std::fstream::out|std::ios::binary|std::ofstream::trunc);
//...
union {
CIOParams_b_t params_b_t;
char raw[6];
} p_data;
p_data.params_b_t.scr_width = params.scr_width;
p_data.params_b_t.scr_hight = params.scr_hight;
p_data.params_b_t.bits = static_cast<int>(params.bits);
p_data.params_b_t.brightness = params.brightness;
p_data.params_b_t.contrast = params.contrast;
p_data.params_b_t.fullscr = getFlag(params.flags, FLAG_FULLSCREEN)?1:0;
p_data.params_b_t.vsync = getFlag(params.flags, FLAG_VSYNC)?1:0;
p_data.params_b_t.mipmap = getFlag(params.flags, FLAG_MIPMAP)?1:0;
p_data.params_b_t.skybox = getFlag(params.flags, FLAG_SKYBOX)?1:0;
file.write(p_data.raw, 6);
//...
load data procedure:
//...
std::ifstream file(path);
//...
union {
CIOParams_b_t params_b_t;
char raw[6];
} p_data;
file.read(p_data.raw, 6);
retVal.scr_width = p_data.params_b_t.scr_width;
retVal.scr_hight = p_data.params_b_t.scr_hight;
retVal.bits = static_cast<CIOParams::cbits>(p_data.params_b_t.bits);
retVal.brightness = p_data.params_b_t.brightness;
retVal.contrast = p_data.params_b_t.contrast;
setFlag(&retVal.flags, FLAG_FULLSCREEN, p_data.params_b_t.fullscr==1?true:false);
setFlag(&retVal.flags, FLAG_VSYNC, p_data.params_b_t.vsync==1?true:false);
setFlag(&retVal.flags, FLAG_MIPMAP, p_data.params_b_t.mipmap==1?true:false);
setFlag(&retVal.flags, FLAG_SKYBOX, p_data.params_b_t.skybox==1?true:false);
file.close();
//...
structs declaration:
struct CIOParams
{
unsigned short int scr_width;
unsigned short int scr_hight;
unsigned char brightness;
unsigned char contrast;
enum cbits
{
b8 = 0,
b16,
b32,
b64
} bits;
unsigned char flags;
bool hasError = false;
};
struct CIOParams_b_t
{
unsigned scr_width : 10;
unsigned scr_hight : 10;
unsigned bits : 2;
unsigned fullscr : 1;
unsigned vsync : 1;
unsigned brightness : 8;
unsigned contrast : 8;
unsigned mipmap : 1;
unsigned skybox : 1;
unsigned __unused : 6;
};
Notice: functions setFlag and getFlag are working successfully. Anyway, they performed well in unit tests.
The problem comes with padding over byte boundaries. So, you think that your "CIOParams_b_t" type is 6 bytes long. But that is not guaranteed. In my environment it is padded by the compiler to 8 bytes length:
#include <iostream>
struct CIOParams_b_t
{
unsigned scr_width : 10;
unsigned scr_hight : 10;
unsigned bits : 2;
unsigned fullscr : 1;
unsigned vsync : 1;
unsigned brightness : 8;
unsigned contrast : 8;
unsigned mipmap : 1;
unsigned skybox : 1;
unsigned __unused : 6;
};
int main() {
CIOParams_b_t c;
std::cout << sizeof(c) << '\n';
}
Then, of course, your function cannot work.
You may have the idea to try with 8 bytes now, and it might work. But this is not the correct solution.
What you need is so called serialization. There are many ready to use libraries available for free. But for your few data, you can also simply overwrite the inserter << and extraction >> operator for your class.
Short example:
#include <iostream>
#include <string>
#include <fstream>
struct CIOParams_b_t
{
unsigned scr_width : 10;
unsigned scr_hight : 10;
unsigned bits : 2;
unsigned fullscr : 1;
unsigned vsync : 1;
unsigned brightness : 8;
unsigned contrast : 8;
unsigned mipmap : 1;
unsigned skybox : 1;
unsigned __unused : 6;
friend std::istream& operator >> (std::istream& is, CIOParams_b_t& c) {
unsigned tmp;
is >> tmp; c.scr_width = tmp;
is >> tmp; c.scr_hight = tmp;
is >> tmp; c.bits = tmp;
is >> tmp; c.fullscr = tmp;
is >> tmp; c.vsync = tmp;
is >> tmp; c.brightness = tmp;
is >> tmp; c.contrast = tmp;
is >> tmp; c.mipmap = tmp;
is >> tmp; c.skybox = tmp;
return is;
}
friend std::ostream& operator << (std::ostream& os, const CIOParams_b_t& c) {
return os << c.scr_width << '\n' << c.scr_hight << '\n' << c.bits << '\n' << c.fullscr << '\n' << c.vsync << '\n'
<< c.brightness << '\n' << c.contrast << '\n' << c.mipmap << '\n' << c.skybox << '\n';
}
};
const std::string fileName{ "tmp.txt" };
int main() {
CIOParams_b_t c1{1,3,3,0,1,4,5,0,1,0};
if (std::ofstream outFileStream{ fileName }; outFileStream)
outFileStream << c1;
else
std::cerr << "\nError: Could not open '" << fileName << "' for writing\n";
if (std::ifstream inFileStream{ fileName }; inFileStream) {
CIOParams_b_t c2{};
inFileStream >> c2;
std::cout << c2;
}
else
std::cerr << "\nError: Could not open '" << fileName << "' for reading\n";
}
I'm getting no known conversion from 'vlq' to 'uint32_t' (compile log below) when compiling the following code:
// vlq.h
#ifndef __VARIABLE_LENGTH_QUANTITY_H__
#define __VARIABLE_LENGTH_QUANTITY_H__
#include <memory>
class vlq {
private:
std::unique_ptr<uint8_t[]> valueBytes; // pointer to value
size_t valueLength; // length of value
static size_t encodedLength(uint32_t a);
static uint8_t* encodeVlq(uint32_t a);
static uint32_t decodeVlq(uint8_t* valueBytes, size_t valueLength);
public:
vlq(); // defualt constructor
vlq(uint32_t value);
size_t length() const; // returns number of bytes to store vlq
uint32_t value() const; // returns encoded value of vlq
vlq operator+ (vlq const &obj);
};
#endif
// vlq.cpp
#include "vlq.h"
#include <stdexcept> // std::out_of_range
#include <stdint.h> // uint8_t, uint32_t
#include <cmath> // log2, floor
vlq::vlq() {
this->valueBytes.reset();
this->valueLength = 0;
}
vlq::vlq(uint32_t value) {
size_t valueLength = encodedLength(value);
uint8_t* valueBytes = encodeVlq(value);
this->valueBytes.reset(valueBytes);
this->valueLength = valueLength;
}
// returns required length for passed value
size_t vlq::encodedLength(uint32_t a) {
// calculate number of bits required
size_t numBits = std::floor(std::log2(a)) + 1;
// set length
size_t length;
if (numBits % 7 == 0) {
length = numBits / 7;
} else {
length = numBits / 7 + 1;
}
return length;
}
// encodes bytes of a vlq
uint8_t* vlq::encodeVlq(uint32_t a) {
size_t length = encodedLength(a);
uint8_t *formattedVlqBytes = new uint8_t[length];
// turn a into 7 bit chunks
int chunkOffset;
for (int i = 0; i < length; i++) {
chunkOffset = (length - i - 1) * 7;
formattedVlqBytes[i] = (a & (0x7F << chunkOffset)) >> chunkOffset;
// set bit if this isn't the last bit
if (i != length - 1) {
formattedVlqBytes[i] |= 0x80;
}
}
return formattedVlqBytes;
}
// decodes bytes of a vlq
uint32_t vlq::decodeVlq(uint8_t* valueBytes, size_t valueLength) {
size_t numBits = valueLength * 7;
uint32_t decodedVlq = 0x0;
size_t byteOffset, bitOffset;
for (size_t i = 0; i < numBits; i++) {
byteOffset = i / 7;
bitOffset = 6 - (i % 7);
decodedVlq |= valueBytes[byteOffset] & (0x1 << bitOffset) << (valueLength - byteOffset - 1) * 7;
}
return decodedVlq;
}
// returns encoded value of vlq
uint32_t vlq::value() const {
return decodeVlq(this->valueBytes.get(), this->valueLength);
}
vlq vlq::operator+(vlq const &obj) {
uint32_t sum = this->value() + obj.value();
return vlq(sum);
}
// compiler output
vlq.cpp:81:9: error: no matching constructor for initialization of 'vlq'
return vlq(sum);
^~~~~~~~
./vlq.h:6:7: note: candidate constructor (the implicit copy constructor) not viable: expects an l-value for 1st argument
class vlq {
^
vlq.cpp:11:6: note: candidate constructor not viable: no known conversion from 'vlq' to 'uint32_t' (aka 'unsigned int') for 1st argument
vlq::vlq(uint32_t value) {
^
vlq.cpp:6:6: note: candidate constructor not viable: requires 0 arguments, but 1 was provided
vlq::vlq() {
^
1 error generated.
Why am I getting no known conversion from 'vlq' to 'uint32_t' when passing a uint32_t?
I have a procedure looks like this:
void Process1(unsigned char* data)
{
}
void Process2(unsigned char* data)
{
}
void Process3(unsigned char* data)
{
}
#define FLAG1 (1 << 1)
#define FLAG2 (1 << 2)
#define FLAG3 (1 << 3)
void ProcessData(unsigned char* data, unsigned int bytes, unsigned int flags)
{
bool b1 = !!(flags & FLAG1);
bool b2 = !!(flags & FLAG2);
bool b3 = !!(flags & FLAG3);
for (unsigned int i = 0; i < bytes; i ++)
{
if (b1) Process1(data + i);
if (b2) Process2(data + i);
if (b3) Process3(data + i);
}
}
As it looks, flags & FLAG1 A.K.A b1 will not change in all the loops. But we still have to do branch in every loop. I just wondering if there's a way to avoid this unnecessary branch dynamically.
here is a demo of Lundin's solution.
#include <windows.h>
#include <stdio.h>
#include <time.h>
LARGE_INTEGER ls, le, ll;
#define START_CLOCK() QueryPerformanceCounter(&ls)
#define END_CLOCK() printf ("%.0lf ns\n", (QueryPerformanceCounter(&le), ((double)le.QuadPart - ls.QuadPart) / ll.QuadPart * 1000000));
void Process1(unsigned char* data)
{
(*data)++;
}
void Process2(unsigned char* data)
{
(*data)--;
}
void Process3(unsigned char* data)
{
(*data) *= (*data);
}
#define FLAG1 (1 << 1)
#define FLAG2 (1 << 2)
#define FLAG3 (1 << 3)
void ProcessData(unsigned char* data, unsigned int bytes, unsigned int flags)
{
bool b1 = !!(flags & FLAG1);
bool b2 = !!(flags & FLAG2);
bool b3 = !!(flags & FLAG3);
for (unsigned int i = 0; i < bytes; i ++)
{
if (b1) Process1(data + i);
if (b2) Process2(data + i);
if (b3) Process3(data + i);
}
}
typedef void (*proc_t)(unsigned char*);
inline static void do_nothing (unsigned char* ptr)
{
(void)ptr;
}
void ProcessData_x(unsigned char* data, unsigned int bytes, unsigned int flags)
{
bool b1 = (flags & FLAG1) != 0; // de-obfuscate the boolean logic
bool b2 = (flags & FLAG2) != 0;
bool b3 = (flags & FLAG3) != 0;
proc_t p1 = b1 ? Process1 : do_nothing;
proc_t p2 = b2 ? Process2 : do_nothing;
proc_t p3 = b3 ? Process3 : do_nothing;
for (unsigned int i = 0; i<bytes; i++)
{
p1(data + i);
p2(data + i);
p3(data + i);
}
}
int main()
{
if (!QueryPerformanceFrequency(&ll)) return 1;
const unsigned int bytes = 0xffff;
srand((unsigned int)time(NULL));
unsigned int flags = rand() & 0x7;
unsigned char* data = new unsigned char[bytes];
for (unsigned int i = 0; i < bytes; i++)
{
data[i] = (unsigned char)(rand() & 0xff);
}
START_CLOCK();
ProcessData(data, bytes, flags);
END_CLOCK();
START_CLOCK();
ProcessData_x(data, bytes, flags);
END_CLOCK();
}
here is the output:
134 ns
272 ns
I've run it several times but, unexpectedly, it costs even more time:(.. it is also compiled 'vs2010 Release x86'
First of all, it doesn't any sense to speak about optimizations without a particular system in mind...
That being said, I'd optimize away the branches in the following way:
typedef void (*proc_t)(unsigned char*);
inline static void do_nothing (unsigned char* ptr)
{
(void)ptr;
}
...
void ProcessData(unsigned char* data, unsigned int bytes, unsigned int flags)
{
bool b1 = (flags & FLAG1) != 0; // de-obfuscate the boolean logic
bool b2 = (flags & FLAG2) != 0;
bool b3 = (flags & FLAG3) != 0;
proc_t p1 = b1 ? Process1 : do_nothing;
proc_t p2 = b2 ? Process2 : do_nothing;
proc_t p3 = b3 ? Process3 : do_nothing;
for (unsigned int i = 0; i<bytes; i++)
{
p1(data + i);
p2(data + i);
p3(data + i);
}
}
A c++ solution. Similar to Lundin's answer but without calls to empty function. I'm not sure if that makes any difference in performance, the main advantage is that you don't need to manually list all the process calls in the loop. If you want to micro optimize or want c, you could use an array on stack, but you'll have to manage some counters yourself.
typedef void (*proc_t)(unsigned char*);
std::vector<proc_t> processes;
if (b1) processes.push_back(Process1);
if (b2) processes.push_back(Process2);
if (b3) processes.push_back(Process3);
for(auto p : processes)
for (unsigned int i = 0; i<bytes; i++)
p(data + i);
bool b1 = !!(flags & FLAG1);
bool b2 = !!(flags & FLAG2);
bool b3 = !!(flags & FLAG3);
int caseNow=SelectCaseAtOnce(b1,b2,b3);
if(caseNow==0)
for (unsigned int i = 0; i < bytes; i ++)
{
Process1(data + i);
}
else if(caseNow==1)
for (unsigned int i = 0; i < bytes; i ++)
{
Process2(data + i);
}
else if(caseNow==2)
for (unsigned int i = 0; i < bytes; i ++)
{
Process3(data + i);
}
else if(caseNow==3)
for (unsigned int i = 0; i < bytes; i ++)
{
Process1(data + i);
Process2(data + i);
}
if(caseNow==4)
for (unsigned int i = 0; i < bytes; i ++)
{
Process1(data + i);
Process3(data + i);
}
else if(caseNow==5)
for (unsigned int i = 0; i < bytes; i ++)
{
Process2(data + i);
Process3(data + i);
}
else if(caseNow==6)
for (unsigned int i = 0; i < bytes; i ++)
{
Process1(data + i);
Process2(data + i);
Process3(data + i);
}
else {}
Here's another solution using templates - this way you'll get an optimized version of the inner loop for each variant. If the ProcessN functions are short / simple enough to be worth inlining then this could be a worthwhile optimization.
#include <tuple>
#include <map>
#include <utility>
using namespace std;
inline void Process1(unsigned char* data) {}
inline void Process2(unsigned char* data) {}
inline void Process3(unsigned char* data) {}
#define FLAG1 (1 << 1)
#define FLAG2 (1 << 2)
#define FLAG3 (1 << 3)
template <bool b1, bool b2, bool b3>
void ProcessData(unsigned char* data, unsigned int bytes) {
for (unsigned int i = 0; i < bytes; i++) {
if (b1) Process1(data + i);
if (b2) Process2(data + i);
if (b3) Process3(data + i);
}
}
void ProcessData(unsigned char* data, unsigned int bytes, unsigned int flags) {
typedef void (*ProcessFunc)(unsigned char*, unsigned int bytes);
static map<tuple<bool, bool, bool>, ProcessFunc> funcs{
{make_tuple(false, false, false), ProcessData<false, false, false>},
{make_tuple(false, false, true), ProcessData<false, false, true>},
{make_tuple(false, true, false), ProcessData<false, true, false>},
{make_tuple(false, true, true), ProcessData<false, true, true>},
{make_tuple(true, false, false), ProcessData<true, false, false>},
{make_tuple(true, false, true), ProcessData<true, false, true>},
{make_tuple(true, true, false), ProcessData<true, true, false>},
{make_tuple(true, true, true), ProcessData<true, true, true>}};
bool b1 = !!(flags & FLAG1);
bool b2 = !!(flags & FLAG2);
bool b3 = !!(flags & FLAG3);
funcs[make_tuple(b1, b2, b3)](data, bytes);
}