I have been working on a program that can connect to a Minecraft Server and exchange packets with it but Minecraft's server packets heavily rely on signed VarInts. On the site documenting how their communication works is explanation of VarInt and even an example implementation of them in Java:
So i followed it and it works for non negative numbers but for negative numbers it's just goes infinitely long. I serched for other means of implementing it but i couldn't find any.
Here is my version it (works as i said only for positive numbers which is not fully what i want)
class VarInt
{
public:
char* data = NULL;
int length = 0;
int Read();
void Write(int value);
~VarInt();
private:
int SEGMENT_BIT_MASK = 0x7F;
int CONTINUE_BIT_MASK = 0x80;
};
int VarInt::Read()
{
int value = 0;
int position = 0;
byte currentByte;
int i = 0;
while (true)
{
currentByte = data[i];
value |= (currentByte & SEGMENT_BIT_MASK) << position;
if ((currentByte & CONTINUE_BIT_MASK) == 0)
break;
i++;
position += 7;
if (position >= 32)
std::cout << "VarInt is too Big" << std::endl;
}
return value;
};
void VarInt::Write(int value)
{
bool state = true;
std::vector<byte> bytes;
while (state)
{
if ((value & ~SEGMENT_BIT_MASK) == 0)
{
bytes.push_back(value);
state = false;
break;
}
bytes.push_back(((value & SEGMENT_BIT_MASK) | CONTINUE_BIT_MASK));
value >>= 7;
}
int bytes_size = bytes.size();
length = bytes_size;
data = (char*)malloc(bytes_size);
int i = 0;
while (i != bytes_size)
{
data[i] = bytes.at(i);
i++;
}
};
VarInt::~VarInt()
{
};
And here are my means of testing it:
#include <iostream>
#include "MDatatypes.h"
int main()
{
ndt::VarInt test;
//Sets value of test to -127
test.Write(-127);
//Sets value of test2 to 255
ndt::VarInt test2;
test2.Write(255);
//Outputing length of each Varint in bytes
std::cout << test.length << '|' << test2.length << std::endl;
//Outputing the values of each Varint
std::cout << test.Read() << '|' << test2.Read() << std::endl;
}
I have write the code to save some data with structure like CIOParams_b_t (see declaration) to file and load them. Number fields (>1 bits) load from file successfully, however boolean fields (1 bit) load with mistakes, maybe rendomely.
save data procedure:
//...
std::ofstream file;
file.open(path, std::fstream::out|std::ios::binary|std::ofstream::trunc);
//...
union {
CIOParams_b_t params_b_t;
char raw[6];
} p_data;
p_data.params_b_t.scr_width = params.scr_width;
p_data.params_b_t.scr_hight = params.scr_hight;
p_data.params_b_t.bits = static_cast<int>(params.bits);
p_data.params_b_t.brightness = params.brightness;
p_data.params_b_t.contrast = params.contrast;
p_data.params_b_t.fullscr = getFlag(params.flags, FLAG_FULLSCREEN)?1:0;
p_data.params_b_t.vsync = getFlag(params.flags, FLAG_VSYNC)?1:0;
p_data.params_b_t.mipmap = getFlag(params.flags, FLAG_MIPMAP)?1:0;
p_data.params_b_t.skybox = getFlag(params.flags, FLAG_SKYBOX)?1:0;
file.write(p_data.raw, 6);
//...
load data procedure:
//...
std::ifstream file(path);
//...
union {
CIOParams_b_t params_b_t;
char raw[6];
} p_data;
file.read(p_data.raw, 6);
retVal.scr_width = p_data.params_b_t.scr_width;
retVal.scr_hight = p_data.params_b_t.scr_hight;
retVal.bits = static_cast<CIOParams::cbits>(p_data.params_b_t.bits);
retVal.brightness = p_data.params_b_t.brightness;
retVal.contrast = p_data.params_b_t.contrast;
setFlag(&retVal.flags, FLAG_FULLSCREEN, p_data.params_b_t.fullscr==1?true:false);
setFlag(&retVal.flags, FLAG_VSYNC, p_data.params_b_t.vsync==1?true:false);
setFlag(&retVal.flags, FLAG_MIPMAP, p_data.params_b_t.mipmap==1?true:false);
setFlag(&retVal.flags, FLAG_SKYBOX, p_data.params_b_t.skybox==1?true:false);
file.close();
//...
structs declaration:
struct CIOParams
{
unsigned short int scr_width;
unsigned short int scr_hight;
unsigned char brightness;
unsigned char contrast;
enum cbits
{
b8 = 0,
b16,
b32,
b64
} bits;
unsigned char flags;
bool hasError = false;
};
struct CIOParams_b_t
{
unsigned scr_width : 10;
unsigned scr_hight : 10;
unsigned bits : 2;
unsigned fullscr : 1;
unsigned vsync : 1;
unsigned brightness : 8;
unsigned contrast : 8;
unsigned mipmap : 1;
unsigned skybox : 1;
unsigned __unused : 6;
};
Notice: functions setFlag and getFlag are working successfully. Anyway, they performed well in unit tests.
The problem comes with padding over byte boundaries. So, you think that your "CIOParams_b_t" type is 6 bytes long. But that is not guaranteed. In my environment it is padded by the compiler to 8 bytes length:
#include <iostream>
struct CIOParams_b_t
{
unsigned scr_width : 10;
unsigned scr_hight : 10;
unsigned bits : 2;
unsigned fullscr : 1;
unsigned vsync : 1;
unsigned brightness : 8;
unsigned contrast : 8;
unsigned mipmap : 1;
unsigned skybox : 1;
unsigned __unused : 6;
};
int main() {
CIOParams_b_t c;
std::cout << sizeof(c) << '\n';
}
Then, of course, your function cannot work.
You may have the idea to try with 8 bytes now, and it might work. But this is not the correct solution.
What you need is so called serialization. There are many ready to use libraries available for free. But for your few data, you can also simply overwrite the inserter << and extraction >> operator for your class.
Short example:
#include <iostream>
#include <string>
#include <fstream>
struct CIOParams_b_t
{
unsigned scr_width : 10;
unsigned scr_hight : 10;
unsigned bits : 2;
unsigned fullscr : 1;
unsigned vsync : 1;
unsigned brightness : 8;
unsigned contrast : 8;
unsigned mipmap : 1;
unsigned skybox : 1;
unsigned __unused : 6;
friend std::istream& operator >> (std::istream& is, CIOParams_b_t& c) {
unsigned tmp;
is >> tmp; c.scr_width = tmp;
is >> tmp; c.scr_hight = tmp;
is >> tmp; c.bits = tmp;
is >> tmp; c.fullscr = tmp;
is >> tmp; c.vsync = tmp;
is >> tmp; c.brightness = tmp;
is >> tmp; c.contrast = tmp;
is >> tmp; c.mipmap = tmp;
is >> tmp; c.skybox = tmp;
return is;
}
friend std::ostream& operator << (std::ostream& os, const CIOParams_b_t& c) {
return os << c.scr_width << '\n' << c.scr_hight << '\n' << c.bits << '\n' << c.fullscr << '\n' << c.vsync << '\n'
<< c.brightness << '\n' << c.contrast << '\n' << c.mipmap << '\n' << c.skybox << '\n';
}
};
const std::string fileName{ "tmp.txt" };
int main() {
CIOParams_b_t c1{1,3,3,0,1,4,5,0,1,0};
if (std::ofstream outFileStream{ fileName }; outFileStream)
outFileStream << c1;
else
std::cerr << "\nError: Could not open '" << fileName << "' for writing\n";
if (std::ifstream inFileStream{ fileName }; inFileStream) {
CIOParams_b_t c2{};
inFileStream >> c2;
std::cout << c2;
}
else
std::cerr << "\nError: Could not open '" << fileName << "' for reading\n";
}
How can I verify the thread-safety of the bit manipulation in a multi-threaded application? I am trying to set and reset the bits of an atomic variable shared among various threads.
std::atomic<uint8_t> bitset_;
void set(unsigned n) {
uint8_t val = bitset_.load();
while (!bitset_.compare_exchange_weak(val, val | (1 << n), std::memory_order_release));
}
void reset(unsigned n) {
uint8_t val = bitset_.load();
while (!bitset_.compare_exchange_weak(val, val & ~(1 << n), std::memory_order_release));
}
bool test(unsigned i) { return (bitset_.load(std::memory_order_acquire) >> i) & 1; }
bool allset() { return bitset_.load(std::memory_order_acquire) == 255; }
I want to directly modify a bit in a byte.
In GCC, you can do it as follow:
struct virtualByte {
unsigned char b0 : 1;
unsigned char b1 : 1;
unsigned char b2 : 1;
unsigned char b3 : 1;
unsigned char b4 : 1;
unsigned char b5 : 1;
unsigned char b6 : 1;
unsigned char b7 : 1;
} __attribute__((__packed__));
#define sbit(_byte, _pos) (((volatile struct virtualByte *)&_byte)->b ## _pos)
Usage:
unsigned char myByte = 0x00;
#define firstBit sbit(myByte, 0)
firstBit = 1; // Implicit myByte |= 0x01;
To make things neater I want to have class that does this for me. I came up with the following concept:
unsigned char myByteRef = 0x00;
Byte myByte(&myByteRef);
myByte[0] = 1; // Implicit myByteRef |= 0x01;
fprintf(stderr, "%2.2X\n", myByteRef);
But this does not work because in c++ you cannot return a reference to a single bit. Overloading the constructor does not work either.
Is there a possibility to implement such behaviour? The assignment operator should directly modify its underlying byte (not a set of bytes).
You want to use std::bitset:
std::bitset<12> someBits; // 12 bits
someBits[0] = true; // set 1st bit
std::cout << someBits.count() << '\n'; // prints 1
std::bitset<12>::reference bit5 = someBits[5];
bit5 = true;
std::cout << someBits.count() << '\n'; // prints 2
You can use the index operator to return a reference to a bit in the way you want. Note that this reference is not a bool& but rather a std::bitset::reference:
Finally came to a solution, many thanks to #doc!
My solution:
class Bit {
private:
volatile uint8_t *byte;
uint8_t bitPos;
public:
Bit(void)
{
}
void init(volatile uint8_t *const byte, uint8_t const bitPos)
{
this->byte = byte;
this->bitPos = (bitPos > 7u ? 7u : bitPos);
}
void setValue(bool const bitValue)
{
if (!this->byte) return;
if (bitValue) {
*this->byte |= (1u << this->bitPos);
} else {
*this->byte &= ~(1u << this->bitPos);
}
}
};
class BitReference {
private:
Bit &ref;
public:
BitReference(Bit &ref) : ref(ref)
{
}
void operator=(bool const bitValue)
{
this->ref.setValue(bitValue);
}
};
class Byte {
private:
Bit bits[8];
public:
Byte(volatile uint8_t *const byte)
{
for (unsigned i = 0; i < 8; ++i) {
this->bits[i].init(byte, i);
}
}
/* This did the trick :)! */
BitReference operator[](size_t index)
{
if (index > 7) index = 7;
return BitReference(this->bits[index]);
}
};
Usage:
uint8_t myPort = 0x00;
int main(int const argc, const char **const argv)
{
Byte abc(&myPort);
abc[0] = 1;
abc[1] = 1;
abc[2] = 1;
abc[3] = 1;
fprintf(stderr, "%2.2X\n", myPort);
return 0;
}
I have a procedure looks like this:
void Process1(unsigned char* data)
{
}
void Process2(unsigned char* data)
{
}
void Process3(unsigned char* data)
{
}
#define FLAG1 (1 << 1)
#define FLAG2 (1 << 2)
#define FLAG3 (1 << 3)
void ProcessData(unsigned char* data, unsigned int bytes, unsigned int flags)
{
bool b1 = !!(flags & FLAG1);
bool b2 = !!(flags & FLAG2);
bool b3 = !!(flags & FLAG3);
for (unsigned int i = 0; i < bytes; i ++)
{
if (b1) Process1(data + i);
if (b2) Process2(data + i);
if (b3) Process3(data + i);
}
}
As it looks, flags & FLAG1 A.K.A b1 will not change in all the loops. But we still have to do branch in every loop. I just wondering if there's a way to avoid this unnecessary branch dynamically.
here is a demo of Lundin's solution.
#include <windows.h>
#include <stdio.h>
#include <time.h>
LARGE_INTEGER ls, le, ll;
#define START_CLOCK() QueryPerformanceCounter(&ls)
#define END_CLOCK() printf ("%.0lf ns\n", (QueryPerformanceCounter(&le), ((double)le.QuadPart - ls.QuadPart) / ll.QuadPart * 1000000));
void Process1(unsigned char* data)
{
(*data)++;
}
void Process2(unsigned char* data)
{
(*data)--;
}
void Process3(unsigned char* data)
{
(*data) *= (*data);
}
#define FLAG1 (1 << 1)
#define FLAG2 (1 << 2)
#define FLAG3 (1 << 3)
void ProcessData(unsigned char* data, unsigned int bytes, unsigned int flags)
{
bool b1 = !!(flags & FLAG1);
bool b2 = !!(flags & FLAG2);
bool b3 = !!(flags & FLAG3);
for (unsigned int i = 0; i < bytes; i ++)
{
if (b1) Process1(data + i);
if (b2) Process2(data + i);
if (b3) Process3(data + i);
}
}
typedef void (*proc_t)(unsigned char*);
inline static void do_nothing (unsigned char* ptr)
{
(void)ptr;
}
void ProcessData_x(unsigned char* data, unsigned int bytes, unsigned int flags)
{
bool b1 = (flags & FLAG1) != 0; // de-obfuscate the boolean logic
bool b2 = (flags & FLAG2) != 0;
bool b3 = (flags & FLAG3) != 0;
proc_t p1 = b1 ? Process1 : do_nothing;
proc_t p2 = b2 ? Process2 : do_nothing;
proc_t p3 = b3 ? Process3 : do_nothing;
for (unsigned int i = 0; i<bytes; i++)
{
p1(data + i);
p2(data + i);
p3(data + i);
}
}
int main()
{
if (!QueryPerformanceFrequency(&ll)) return 1;
const unsigned int bytes = 0xffff;
srand((unsigned int)time(NULL));
unsigned int flags = rand() & 0x7;
unsigned char* data = new unsigned char[bytes];
for (unsigned int i = 0; i < bytes; i++)
{
data[i] = (unsigned char)(rand() & 0xff);
}
START_CLOCK();
ProcessData(data, bytes, flags);
END_CLOCK();
START_CLOCK();
ProcessData_x(data, bytes, flags);
END_CLOCK();
}
here is the output:
134 ns
272 ns
I've run it several times but, unexpectedly, it costs even more time:(.. it is also compiled 'vs2010 Release x86'
First of all, it doesn't any sense to speak about optimizations without a particular system in mind...
That being said, I'd optimize away the branches in the following way:
typedef void (*proc_t)(unsigned char*);
inline static void do_nothing (unsigned char* ptr)
{
(void)ptr;
}
...
void ProcessData(unsigned char* data, unsigned int bytes, unsigned int flags)
{
bool b1 = (flags & FLAG1) != 0; // de-obfuscate the boolean logic
bool b2 = (flags & FLAG2) != 0;
bool b3 = (flags & FLAG3) != 0;
proc_t p1 = b1 ? Process1 : do_nothing;
proc_t p2 = b2 ? Process2 : do_nothing;
proc_t p3 = b3 ? Process3 : do_nothing;
for (unsigned int i = 0; i<bytes; i++)
{
p1(data + i);
p2(data + i);
p3(data + i);
}
}
A c++ solution. Similar to Lundin's answer but without calls to empty function. I'm not sure if that makes any difference in performance, the main advantage is that you don't need to manually list all the process calls in the loop. If you want to micro optimize or want c, you could use an array on stack, but you'll have to manage some counters yourself.
typedef void (*proc_t)(unsigned char*);
std::vector<proc_t> processes;
if (b1) processes.push_back(Process1);
if (b2) processes.push_back(Process2);
if (b3) processes.push_back(Process3);
for(auto p : processes)
for (unsigned int i = 0; i<bytes; i++)
p(data + i);
bool b1 = !!(flags & FLAG1);
bool b2 = !!(flags & FLAG2);
bool b3 = !!(flags & FLAG3);
int caseNow=SelectCaseAtOnce(b1,b2,b3);
if(caseNow==0)
for (unsigned int i = 0; i < bytes; i ++)
{
Process1(data + i);
}
else if(caseNow==1)
for (unsigned int i = 0; i < bytes; i ++)
{
Process2(data + i);
}
else if(caseNow==2)
for (unsigned int i = 0; i < bytes; i ++)
{
Process3(data + i);
}
else if(caseNow==3)
for (unsigned int i = 0; i < bytes; i ++)
{
Process1(data + i);
Process2(data + i);
}
if(caseNow==4)
for (unsigned int i = 0; i < bytes; i ++)
{
Process1(data + i);
Process3(data + i);
}
else if(caseNow==5)
for (unsigned int i = 0; i < bytes; i ++)
{
Process2(data + i);
Process3(data + i);
}
else if(caseNow==6)
for (unsigned int i = 0; i < bytes; i ++)
{
Process1(data + i);
Process2(data + i);
Process3(data + i);
}
else {}
Here's another solution using templates - this way you'll get an optimized version of the inner loop for each variant. If the ProcessN functions are short / simple enough to be worth inlining then this could be a worthwhile optimization.
#include <tuple>
#include <map>
#include <utility>
using namespace std;
inline void Process1(unsigned char* data) {}
inline void Process2(unsigned char* data) {}
inline void Process3(unsigned char* data) {}
#define FLAG1 (1 << 1)
#define FLAG2 (1 << 2)
#define FLAG3 (1 << 3)
template <bool b1, bool b2, bool b3>
void ProcessData(unsigned char* data, unsigned int bytes) {
for (unsigned int i = 0; i < bytes; i++) {
if (b1) Process1(data + i);
if (b2) Process2(data + i);
if (b3) Process3(data + i);
}
}
void ProcessData(unsigned char* data, unsigned int bytes, unsigned int flags) {
typedef void (*ProcessFunc)(unsigned char*, unsigned int bytes);
static map<tuple<bool, bool, bool>, ProcessFunc> funcs{
{make_tuple(false, false, false), ProcessData<false, false, false>},
{make_tuple(false, false, true), ProcessData<false, false, true>},
{make_tuple(false, true, false), ProcessData<false, true, false>},
{make_tuple(false, true, true), ProcessData<false, true, true>},
{make_tuple(true, false, false), ProcessData<true, false, false>},
{make_tuple(true, false, true), ProcessData<true, false, true>},
{make_tuple(true, true, false), ProcessData<true, true, false>},
{make_tuple(true, true, true), ProcessData<true, true, true>}};
bool b1 = !!(flags & FLAG1);
bool b2 = !!(flags & FLAG2);
bool b3 = !!(flags & FLAG3);
funcs[make_tuple(b1, b2, b3)](data, bytes);
}