Casting __fp16 to float fails to link on Clang 9 - c++

I need to read a file containing floating point numbers stored in binary16 format and convert them to float. Based on https://releases.llvm.org/9.0.0/tools/clang/docs/LanguageExtensions.html#half-precision-floating-point, I read the data into __fp16* fp16_weights_buf and then simply did
for (int i = 0; i < config_.weights_buf_size; i++) {
buf_weights_[i] = static_cast<T>(fp16_weights_buf[i]);
}
This compiles, but linking fails:
: && /usr/bin/clang++-9 -g -fsanitize=address,undefined -fno-omit-frame-pointer -fno-limit-debug-info CMakeFiles/run_model.dir/src/run_model.cc.o -o run_model libfused_transformer.a ../thirdparty/OpenBLAS/libopenblas.a ../thirdparty/icu/icu4c/linux/prebuilt/lib/libicui18n.a ../thirdparty/icu/icu4c/linux/prebuilt/lib/libicuuc.a ../thirdparty/icu/icu4c/linux/prebuilt/lib/libicudata.a -lpthread /usr/lib/llvm-9/lib/libomp.so -lpthread && :
CMakeFiles/run_model.dir/src/run_model.cc.o: In function `Pipeline':
/mnt/e/MyProgramming/fused-transformer-mobile-1/build/../include/pipeline.h:424: undefined reference to `__gnu_h2f_ieee'
Do I need to pass some additional options for this to work?

As a workaround, I added the code for __gnu_h2f_ieee from https://gist.github.com/whchung/25875271922806e58ac21ad7d707e3cd:
#ifdef __x86_64__
#include <limits.h>
#include <stdint.h>
typedef uint16_t src_t;
typedef uint16_t src_rep_t;
#define SRC_REP_C UINT16_C
static const int srcSigBits = 10;
#define src_rep_t_clz __builtin_clz
typedef float dst_t;
typedef uint32_t dst_rep_t;
#define DST_REP_C UINT32_C
static const int dstSigBits = 23;
// End of specialization parameters. Two helper routines for conversion to and
// from the representation of floating-point data as integer values follow.
static __inline src_rep_t srcToRep(src_t x) {
const union { src_t f; src_rep_t i; } rep = {.f = x};
return rep.i;
}
static __inline dst_t dstFromRep(dst_rep_t x) {
const union { dst_t f; dst_rep_t i; } rep = {.i = x};
return rep.f;
}
// End helper routines. Conversion implementation follows.
static __inline dst_t __extendXfYf2__(src_t a) {
// Various constants whose values follow from the type parameters.
// Any reasonable optimizer will fold and propagate all of these.
const int srcBits = sizeof(src_t)*CHAR_BIT;
const int srcExpBits = srcBits - srcSigBits - 1;
const int srcInfExp = (1 << srcExpBits) - 1;
const int srcExpBias = srcInfExp >> 1;
const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits;
const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits;
const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits);
const src_rep_t srcAbsMask = srcSignMask - 1;
const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1);
const src_rep_t srcNaNCode = srcQNaN - 1;
const int dstBits = sizeof(dst_t)*CHAR_BIT;
const int dstExpBits = dstBits - dstSigBits - 1;
const int dstInfExp = (1 << dstExpBits) - 1;
const int dstExpBias = dstInfExp >> 1;
const dst_rep_t dstMinNormal = DST_REP_C(1) << dstSigBits;
// Break a into a sign and representation of the absolute value
const src_rep_t aRep = srcToRep(a);
const src_rep_t aAbs = aRep & srcAbsMask;
const src_rep_t sign = aRep & srcSignMask;
dst_rep_t absResult;
// If sizeof(src_rep_t) < sizeof(int), the subtraction result is promoted
// to (signed) int. To avoid that, explicitly cast to src_rep_t.
if ((src_rep_t)(aAbs - srcMinNormal) < srcInfinity - srcMinNormal) {
// a is a normal number.
// Extend to the destination type by shifting the significand and
// exponent into the proper position and rebiasing the exponent.
absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits);
absResult += (dst_rep_t)(dstExpBias - srcExpBias) << dstSigBits;
}
else if (aAbs >= srcInfinity) {
// a is NaN or infinity.
// Conjure the result by beginning with infinity, then setting the qNaN
// bit (if needed) and right-aligning the rest of the trailing NaN
// payload field.
absResult = (dst_rep_t)dstInfExp << dstSigBits;
absResult |= (dst_rep_t)(aAbs & srcQNaN) << (dstSigBits - srcSigBits);
absResult |= (dst_rep_t)(aAbs & srcNaNCode) << (dstSigBits - srcSigBits);
}
else if (aAbs) {
// a is denormal.
// renormalize the significand and clear the leading bit, then insert
// the correct adjusted exponent in the destination type.
const int scale = src_rep_t_clz(aAbs) - src_rep_t_clz(srcMinNormal);
absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits + scale);
absResult ^= dstMinNormal;
const int resultExponent = dstExpBias - srcExpBias - scale + 1;
absResult |= (dst_rep_t)resultExponent << dstSigBits;
}
else {
// a is zero.
absResult = 0;
}
// Apply the signbit to (dst_t)abs(a).
const dst_rep_t result = absResult | (dst_rep_t)sign << (dstBits - srcBits);
return dstFromRep(result);
}
// Use a forwarding definition and noinline to implement a poor man's alias,
// as there isn't a good cross-platform way of defining one.
__attribute__((noinline)) float __extendhfsf2(uint16_t a) {
return __extendXfYf2__(a);
}
extern "C" float __gnu_h2f_ieee(uint16_t a) {
return __extendhfsf2(a);
}
#endif
in a separate source file (#ifdef because on ARM this function should be defined).

Related

Access bits in memory

I want to assemble a message bit by bit, then handle the message as a vector of unsigned characters ( e.g. to calculate the CRC )
I can assemble the message OK, using either a std::vector<bool> or a std::bitset
I can copy the assembled message to a std::vector doing it bit by bit. ( Note: the meesage is padded so that its length is an integer number of bytes )
// assemble message
std::vector<bool> bitMessage;
...
// copy the bits one by one into bytes and add them to the message
std::vector<unsigned char> myMessage;
// loop over bytes
for (int kbyte = 0;
kbyte < bitMessage.size() / 8;
kbyte++)
{
unsigned char byte = 0;
// loop over bits
for (int kbit = 0;
kbit < 8;
kbit++)
{
// add bit to byte
byte += bitMessage[8 * kbyte + kbit] << kbit;
}
// add byte to message
myMessage.push_back(byte);
}
This works.
But it seems awfully slow! I would like to use std::memcpy.
For a 'normal' vector I would do
memcpy(
myMessage.data(),
bitMessage.data(),
bitMessage.size() / 8 );
or
memcpy(
&myMessage[0],
&bitMessage[0],
bitMessage.size() / 8 );
but neither of these methods is possible with either a vector<bool> or bitset
Question: Is there a way to get a pointer to the memory where the bits are stored?
The answer is: not with std::vector<bool> or std::bitset
However, with some hints , especially from #Ayxan Haqverdili, it is possible to write a small class that will accept single bits and construct a well mannered std::vector<unsigned char> as we go along.
/** Build a message bit by bit, creating an unsigned character vector of integer length
*
* Hides the messy bit twiddling required,
* allowing bits to be added to the end of the message
*
* The message is automatically padded at the end with zeroes
*/
class cTwiddle
{
public:
std::vector<unsigned char> myMessage;
cTwiddle() : myBitLength(0) {}
/** add a bit to end of message
* #param[in] bit
*/
void add(bool bit)
{
// check if message vector is full
if (!(myBitLength % 8))
{
// add byte to end of message
myMessage.push_back(0);
}
// control order bits are added to a byte
int shift = 7 - (myBitLength % 8); // add bits from left to right ( MSB first )
// int shift = (myBitLength % 8); // add bits from right to left ( LSB first )
myMessage.back() += (1 & bit) << shift;
myBitLength++;
}
private:
int myBitLength;
};
Apparently neither of those classes define the layout. Just write your own class and define the layout you want:
template <int size>
class BitSet final {
private:
unsigned char buffer[size / 8 + (size % 8 != 0)] = {};
public:
constexpr bool get(size_t index) const noexcept {
return (buffer[index / 8] >> (index % 8)) & 1U;
}
constexpr void set(size_t index) noexcept {
buffer[index / 8] |= (1U << (index % 8));
}
constexpr void clear(size_t index) noexcept {
buffer[index / 8] &= ~(1U << (index % 8));
}
};
Memcpy-ing this class is perfectly fine. Otherwise, you might also provide direct access to the byte array.
Alternatively, you can dynamically allocate the buffer:
#include <memory>
class DynBitSet final {
private:
size_t size = 0;
std::unique_ptr<unsigned char[]> buffer;
public:
explicit DynBitSet(size_t bitsize)
: size(bitsize / 8 + (bitsize % 8 != 0)),
buffer(new unsigned char[size]{}) {}
bool get(size_t index) const noexcept {
return (buffer[index / 8] >> (index % 8)) & 1U;
}
void set(size_t index) noexcept { buffer[index / 8] |= (1U << (index % 8)); }
void clear(size_t index) noexcept {
buffer[index / 8] &= ~(1U << (index % 8));
}
auto bitSize() const noexcept { return size * 8; }
auto byteSize() const noexcept { return size; }
auto const* byteBuffer() const noexcept { return buffer.get(); }
};
Is there a way to get a pointer to the memory where the bits are stored [in std::vector]?
No. The idea is that it should be not possible.
Is there a way
Fun fact: In glibc the member in iterator is public.
#include <vector>
#include <iostream>
int main() {
std::vector<bool> vec{1,0,1,0,1,1,1,1};
std::cout << *vec.begin()._M_p << '\n';
}

Is there a way I can use a 2-bit size type instead of an int, by just plugging in the new type name instead of int?

I have an application where I need to save as much of memory as possible. I need to store a large amount of data that can take exactly three possible values. So, I have been trying to use a 2 bit sized type.
One possibility is using bit fields. I could do
struct myType {
uint8_t twoBits : 2;
}
This is a suggestion from this thread.
However, everywhere where I have used int variables prior to this, I would need to change their usage by appending a .twoBits. I checked if I can create a bit field outside of a struct, such as
uint8_t twoBits : 2;
but this thread says it is not possible. However,that thread is specific to C, so I am not sure if it applied to C++.
Is there a clean way I can define a 2-bit type, so that by simply replacing int with my type, I can run the program correctly? Or is using bit fields the only possible way?
CPU, and thus the memory, the bus, and the compiler too, uses only bytes or groups of bytes. There's no way to store a 2-bits type without storing also the other 6 remaining bits.
What you can so is define a struct that only uses some bits. But we aware that it will not save memory.
You can pack several x-bits types in a struct, as you already know. Or you can do bits operations to pack/unpack them into a integer type.
Is there a clean way I can define a 2-bit type, so that by simply
replacing int with my type, I can run the program correctly? Or is
using bit fields the only possible way?
You can try to make the struct as transparent as possible by providing implicit conversion operators and constructors:
#include <cstdint>
#include <iostream>
template <std::size_t N, typename T = unsigned>
struct bit_field {
T rep : N;
operator T() { return rep; }
bit_field(T i) : rep{ i } { }
bit_field() = default;
};
using myType = bit_field<2, std::uint8_t>;
int main() {
myType mt;
mt = 3;
std::cout << mt << "\n";
}
So objects of type my_type somewhat behave like real 3-bit unsigned integers, despite having more than 3 bits.
Of course, the residual bits are unused, but as single bits are not addressable on most systems, this is the best way to go.
I'm not convinced that you will save anything with your existing structure, as the surrounding structure still gets rounded up to a whole number of bytes.
You can write the following to squeeze 4 2-bit counters into 1 byte, but as you say, you have to name them myInst.f0:
struct MyStruct
{
ubyte_t f0:2,
f1:2,
f2:2,
f3:2;
} myInst;
In c and c++98, you can declare this anonymous, but this usage is deprecated. You can now access the 4 values directly by name:
struct
{ // deprecated!
ubyte_t f0:2,
f1:2,
f2:2,
f3:2;
};
You could declare some sort of template that wraps a single instance with an operator int and operator =(int), and then define a union to put the 4 instances at the same location, but again anonymous unions are deprecated. However you could then declare references to your 4 values, but then you are paying for the references, which are bigger than the bytes you were trying to save!
template <class Size,int offset,int bits>
struct Bitz
{
Size ignore : offset,
value : bits;
operator Size()const { return value; }
Size operator = (Size val) { return (value = val); }
};
template <class Size,int bits>
struct Bitz0
{ // I know this can be done better
Size value : bits;
operator Size()const { return value; }
Size operator = (Size val) { return (value = val); }
};
static union
{ // Still deprecated!
Bitz0<char, 2> F0;
Bitz<char, 2, 2> F1;
Bitz<char, 4, 2> F2;
Bitz<char, 6, 2> F3;
};
union
{
Bitz0<char, 2> F0;
Bitz<char, 2, 2> F1;
Bitz<char, 4, 2> F2;
Bitz<char, 6, 2> F3;
} bitz;
Bitz0<char, 2>& F0 = bitz.F0; /// etc...
Alternatively, you could simply declare macros to replace the the dotted name with a simple name (how 1970s):
#define myF0 myInst.f0
Note that you can't pass bitfields by reference or pointer, as they don't have a byte address, only by value and assignment.
A very minimal example of a bit array with a proxy class that looks (for the most part) like you were dealing with an array of very small integers.
#include <cstdint>
#include <iostream>
#include <vector>
class proxy
{
uint8_t & byte;
unsigned int shift;
public:
proxy(uint8_t & byte,
unsigned int shift):
byte(byte),
shift(shift)
{
}
proxy(const proxy & src):
byte(src.byte),
shift(src.shift)
{
}
proxy & operator=(const proxy &) = delete;
proxy & operator=(unsigned int val)
{
if (val <=3)
{
uint8_t wipe = 3 << shift;
byte &= ~wipe;
byte |= val << shift;
}
// might want to throw std::out_of_range here
return *this;
}
operator int() const
{
return (byte >> shift) &0x03;
}
};
Proxy holds a reference to a byte and knows how to extract two specific bits and look like an int to anyone who uses it.
If we wrap an array of bits packed into bytes with a class that returns this proxy object wrapped around the appropriate byte, we now have something that looks a lot like an array of very small ints.
class bitarray
{
size_t size;
std::vector<uint8_t> data;
public:
bitarray(size_t size):
size(size),
data((size + 3) / 4)
{
}
proxy operator[](size_t index)
{
return proxy(data[index/4], (index % 4) * 2);
}
};
If you want to extend this and go the distance, Writing your own STL Container should help you make a fully armed and operational bit-packed array.
There's room for abuse here. The caller can hold onto a proxy and get up to whatever manner of evil this allows.
Use of this primitive example:
int main()
{
bitarray arr(10);
arr[0] = 1;
arr[1] = 2;
arr[2] = 3;
arr[3] = 1;
arr[4] = 2;
arr[5] = 3;
arr[6] = 1;
arr[7] = 2;
arr[8] = 3;
arr[9] = 1;
std::cout << arr[0] << std::endl;
std::cout << arr[1] << std::endl;
std::cout << arr[2] << std::endl;
std::cout << arr[3] << std::endl;
std::cout << arr[4] << std::endl;
std::cout << arr[5] << std::endl;
std::cout << arr[6] << std::endl;
std::cout << arr[7] << std::endl;
std::cout << arr[8] << std::endl;
std::cout << arr[9] << std::endl;
}
Simply, build on top of bitset, something like:
#include<bitset>
#include<iostream>
using namespace std;
template<int N>
class mydoublebitset
{
public:
uint_least8_t operator[](size_t index)
{
return 2 * b[index * 2 + 1] + b[index * 2 ];
}
void set(size_t index, uint_least8_t store)
{
switch (store)
{
case 3:
b[index * 2] = 1;
b[index * 2 + 1] = 1;
break;
case 2:
b[index * 2] = 0;
b[index * 2 + 1] = 1;
break;
case 1:
b[index * 2] = 0;
b[index * 2 + 1] = 1;
break;
case 0:
b[index * 2] = 0;
b[index * 2 + 1] = 0;
break;
default:
throw exception();
}
}
private:
bitset<N * 2> b;
};
int main()
{
mydoublebitset<12> mydata;
mydata.set(0, 0);
mydata.set(1, 2);
mydata.set(2, 2);
cout << (unsigned int)mydata[0] << (unsigned int)mydata[1] << (unsigned int)mydata[2] << endl;
system("pause");
return 0;
}
Basically use a bitset with twice the size and index it accordingly. its simpler and memory efficient as is required by you.

c++ computing compile time constants while preventing integral constant overflow

I am sort of new to the language features of meta programming and I am trying to make a simple class with public static const variables that will set its values by compile time constants:
What I'm trying to achieve: I want to compute values that are the power of some exponent that are measured in number of bytes converted to number of bits with a base of 2. All calculations are in base 2.
Examples:
1 byte(s) = 8 bits: value = pow(2, 8) = 256;
2 byte(s) = 16 bits: value = pow(2, 16) = 65536
4 byte(s) = 32 bits: value = pow(2, 32) = 4294967296
8 byte(s) = 64 bits: value = pow(2, 64) = 18446744073709551616
I've tried writing a function to do the calculations to compute the values needed while trying to use constexpr or const, and I've tried using templates. I would like to use the const function, constexpr function or function template as such:
// constexpr function
constexpr std::uint64_t pow2( const std::uint32_t expInBytes, const std::uint32_t base = 2 ) {
const std::uint32_t expInBits = expInBytes * CHAR_BIT;
return static_cast<std::uint64_t>( expInBits == 0 ? 1 : base * pow2( base, expInBits - 1 ) );
}
// or function template
template<std::uint32_t expInbytes>
constexpr std::uint64_t pow2() {
const std::uint32_t base = 2;
const std::uint32_t expInBits = expInBytes * CHAR_BIT;
return (expInBits == 0 ? 1 : base * pow2<expInBytes-1>() );
}
template<>
constexpr std::uint64_t pow2<0>() {
return 0;
};
// template parameter T not used but needed to use the class as such:
// BitCombinations<>::static_member;
template<typename T = const std::uint32_t>
class BitCombinations {
public: // template // non template
static const std::uint64_t ONE_BYTE = pow2<1>(); // pow2( 1 );
static const std::uint64_t TWO_BYTES = pow2<2>(); // pow2( 2 );
static const std::uint64_t FOUR_BYTES = pow2<4>(); // pow2( 4 );
static const std::uint64_t EIGHT_BYTES = pow2<8>(); // pow2( 8 );
};
Through my efforts I've generated all sorts of compile time, run time errors ect. The latest attempt I was able to get the template version of the pow2<>() above to compile and run, however I'm not getting the correct results.
I'm not sure if my implementation of pow2 is wrong or if my syntax is wrong, or if I'm not using const or constexpr correctly and in some cases I kept getting the integral constant overflow as a compile time error from MS Visual Studio 2017 CE compiler.
I've been following these patterns for the pow2() function:
nullptr.me:C++11 constexpr : computing exp at compile time
prosepoetrycode.potterpcs.net : A simple constexpr power function (C++)
cppreference.com : math::exp2
reformatcode.com : c++ power of integer, template meta programming
I can not seem to wrap my mind around this and don't know what else to try.
Notice that your last case isn't possible currently. You can't store 2^64 in an 8 byte type, the maximum is 2^64 - 1. At least on mainstream architectures, don't know which one you are using.
I see two problems with your function template.
You multiply the result with base only once, but you decrement the bits count by 8 by doing expInBytes - 1. So, you need to multiply it eight times:
return (expInBits == 0 ? 1 : base * base * base * base * base * base * base * base * pow2<expInBytes-1>() );
The specialization for 0 returns 0, and any number multiplied with 0 is 0. :) If you think that you handled the case with expInBits == 0, think again: The only way for expInBits to be 0 is if expInBytes is 0, but that cannot be in the primary template because you have a specialization for when expInBytes is 0! That means that that branch is never taken, it literally has no effect.
Your function has the same problem described in 1), and in addition you are passing the wrong value to it when recursing (expInBits instead of expInBytes) and the order is wrong (base comes last).
In my opinion, a loop is easier to understand and less error-prone:
constexpr std::uint64_t pow2(const std::uint32_t expInBytes, const std::uint32_t base = 2) {
const std::uint32_t expInBits = expInBytes * CHAR_BIT;
std::uint64_t result = 1;
for (std::uint32_t i = 0; i < expInBits; ++i)
result *= base;
return result;
}
With the help of Rakete111 and his positive feed back; I was able to work through my problem as he pointed out a few mistakes. In return I was able to achieve some resemblance of what I wanted. To compute 2^n at compile time.
Here is the working code:
inline constexpr std::uint64_t powerOfBits( const std::uint64_t base, std::uint64_t const exponent ) {
return (exponent == 0) ? 1 : (base * powerOfBits( base, exponent - 1 ));
}
/*template<typename T = const std::uint32_t>*/
class BitCombinations {
public:
// Because I don't care for "magic numbers"
static const std::uint64_t binaryBase = std::uint64_t(2);
static const std::uint64_t eightBits = std::uint64_t( 8 );
static const std::uint64_t sixteenBits = std::uint64_t( 16 );
static const std::uint64_t thirtyTwoBits = std::uint64_t( 32 );
static const std::uint64_t sixtyFourBits = std::uint64_t( 64 );
// Now Generate Our Compile Time Constants
static const std::uint64_t ONE_BYTE = powerOfBits( binaryBase , eightBits ); //
static const std::uint64_t TWO_BYTES = powerOfBits( binaryBase , sixteenBits ); //
static const std::uint64_t FOUR_BYTES = powerOfBits( binaryBase , thirtyTwoBits ); //
// For 64bit int need to subtract 1 from the exponent otherwise you will have integral overflow
// To prevent this we just take 2^63, then in any output display we will have to append the
// string or characters `x 2` so that the user knows the value is double what they are seeing.
static const std::uint64_t EIGHT_BYTES = powerOfBits( binaryBase , sixtyFourBits - 1 );
};
int main() {
std::cout << BitCombinations::ONE_BYTE << std::endl;
std::cout << BitCombinations::TWO_BYTES << std::endl;
std::cout << BitCombinations::FOUR_BYTES << std::endl;
// Remember that 2^64 causes overflow: need to append characters to user.
std::cout << BitCombinations::EIGHT_BYTES << " x 2" << std::endl;
std::cout << std::endl;
std::cout << "\nPress any key and enter to quite." << std::endl;
char q;
std::cin >> q;
return 0;
}
Thank you so much for your help and pointing me in the right direction. I will accept your answer.

Automatically generate bit positions

I have some bitmasks that look like this:
namespace bits {
const unsigned bit_one = 1u << 0;
const unsigned bit_two = 1u << 1;
const unsigned bit_three = 1u << 2;
......
const unsigned bit_ten = 1u << 10;
}
except that there are more bits and the names are actually meaningful flags for my program. But sometimes I remove bits, add bits, regroup similar bits, etc. Ideally I could do something like this:
namespace bits {
const unsigned bit_one = 1u << COUNTER;
const unsigned bit_two = 1u << COUNTER;
const unsigned bit_three = 1u << COUNTER;
......
const unsigned bit_ten = 1u << COUNTER;
}
Is there some template / macro do automate this process? I know about __COUNTER__, but this is a header so if it gets included in some other source that uses __COUNTER__ too it may break. I'm working in a framework which is pre-C++11, so while upgrading my compiler will happen eventually, a solution that doesn't use C++11 would be ideal.
Why not use a macro with an argument?
#define BIT(n) (1 << (n))
You can use the __LINE__ macro, which is part of standard C and C++. Use with caution and document your intent so that somebody else reading the code will understand.
#include <iostream>
namespace Bits
{
const unsigned Base = __LINE__ + 1;
const unsigned BitOne = 1u << __LINE__-Base;
const unsigned BitTwo = 1u << __LINE__-Base;
const unsigned BitThree = 1u << __LINE__-Base;
}
int main(void)
{
std::cout << Bits::BitOne << '\n';
std::cout << Bits::BitTwo << '\n';
std::cout << Bits::BitThree << '\n';
return 0;
}
The following will do the trick:
#define NEXT_MASK(x) \
DUMMY1_##x, \
x = (1U << DUMMY1_##x), \
DUMMY2_##x = DUMMY1_##x
enum {
NEXT_MASK(one),
NEXT_MASK(two),
NEXT_MASK(three),
NEXT_MASK(four)
};
#include <stdio.h>
int main()
{
printf("%x\n", one);
printf("%x\n", two);
printf("%x\n", three);
printf("%x\n", four);
return 0;
}
The program will emit:
1
2
4
8
The idea is that the first dummy enum steps up one step from the one before. The x is the mask, and the second dummy restores the value, so that the next macro will have a good starting point.
The classic solution would be an enumeration of the fields:
enum foo_flags {
alpha,
beta,
gamma,
count
};
and then using either std::bitset<count> or the BIT macro as H2CO3 suggested:
BIT(alpha)
Microsoft C++ has the
__COUNTER__
predefined macro, so you could...
#define NEXTBIT (1u << __COUNTER__)
namespace bits {
const unsigned bit_one = NEXTBIT;
const unsigned bit_two = NEXTBIT;
const unsigned bit_three = NEXTBIT;
}

Binary literals?

In code, I sometimes see people specify constants in hex format like this:
const int has_nukes = 0x0001;
const int has_bio_weapons = 0x0002;
const int has_chem_weapons = 0x0004;
// ...
int arsenal = has_nukes | has_bio_weapons | has_chem_weapons; // all of them
if(arsenal &= has_bio_weapons){
std::cout << "BIO!!"
}
But it doesn't make sense to me to use the hex format here. Is there a way to do it directly in binary? Something like this:
const int has_nukes = 0b00000000000000000000000000000001;
const int has_bio_weapons = 0b00000000000000000000000000000010;
const int has_chem_weapons = 0b00000000000000000000000000000100;
// ...
I know the C/C++ compilers won't compile this, but there must be a workaround? Is it possible in other languages like Java?
In C++14 you will be able to use binary literals with the following syntax:
0b010101010 /* more zeros and ones */
This feature is already implemented in the latest clang and gcc. You can try it if you run those compilers with -std=c++1y option.
I'd use a bit shift operator:
const int has_nukes = 1<<0;
const int has_bio_weapons = 1<<1;
const int has_chem_weapons = 1<<2;
// ...
int dangerous_mask = has_nukes | has_bio_weapons | has_chem_weapons;
bool is_dangerous = (country->flags & dangerous_mask) == dangerous_mask;
It is even better than flood of 0's.
By the way, the next C++ version will support user defined literals. They are already included into the working draft. This allows that sort of stuff (let's hope i don't have too many errors in it):
template<char... digits>
constexpr int operator "" _b() {
return conv2bin<digits...>::value;
}
int main() {
int const v = 110110110_b;
}
conv2bin would be a template like this:
template<char... digits>
struct conv2bin;
template<char high, char... digits>
struct conv2bin<high, digits...> {
static_assert(high == '0' || high == '1', "no bin num!");
static int const value = (high - '0') * (1 << sizeof...(digits)) +
conv2bin<digits...>::value;
};
template<char high>
struct conv2bin<high> {
static_assert(high == '0' || high == '1', "no bin num!");
static int const value = (high - '0');
};
Well, what we get are binary literals that evaluate fully at compile time already, because of the "constexpr" above. The above uses a hard-coded int return type. I think one could even make it depend on the length of the binary string. It's using the following features, for anyone interested:
Generalized Constant Expressions.
Variadic Templates. A brief introduction can be found here
Static Assertions (static_assert)
User defined Literals
Actually, current GCC trunk already implements variadic templates and static assertions. Let's hope it will support the other two soon. I think C++1x will rock the house.
The C++ Standard Library is your friend:
#include <bitset>
const std::bitset <32> has_nukes( "00000000000000000000000000000001" );
GCC supports binary constants as an extension since 4.3. See the announcement (look at the section "New Languages and Language specific improvements").
You can use << if you like.
int hasNukes = 1;
int hasBioWeapons = 1 << 1;
int hasChemWeapons = 1 << 2;
This discussion may be interesting... Might have been, as the link is dead unfortunately. It described a template based approach similar to other answers here.
And also there is a thing called BOOST_BINARY.
The term you want is binary literals
Ruby has them with the syntax you give.
One alternative is to define helper macros to convert for you. I found the following code at http://bytes.com/groups/c/219656-literal-binary
/* Binary constant generator macro
* By Tom Torfs - donated to the public domain
*/
/* All macro's evaluate to compile-time constants */
/* *** helper macros *** */
/* turn a numeric literal into a hex constant
* (avoids problems with leading zeroes)
* 8-bit constants max value 0x11111111, always fits in unsigned long
*/
#define HEX_(n) 0x##n##LU
/* 8-bit conversion function */
#define B8_(x) ((x & 0x0000000FLU) ? 1:0) \
| ((x & 0x000000F0LU) ? 2:0) \
| ((x & 0x00000F00LU) ? 4:0) \
| ((x & 0x0000F000LU) ? 8:0) \
| ((x & 0x000F0000LU) ? 16:0) \
| ((x & 0x00F00000LU) ? 32:0) \
| ((x & 0x0F000000LU) ? 64:0) \
| ((x & 0xF0000000LU) ? 128:0)
/* *** user macros *** /
/* for upto 8-bit binary constants */
#define B8(d) ((unsigned char) B8_(HEX_(d)))
/* for upto 16-bit binary constants, MSB first */
#define B16(dmsb, dlsb) (((unsigned short) B8(dmsb) << 8) \
| B8(dlsb))
/* for upto 32-bit binary constants, MSB first */
#define B32(dmsb, db2, db3, dlsb) (((unsigned long) B8(dmsb) << 24) \
| ((unsigned long) B8( db2) << 16) \
| ((unsigned long) B8( db3) << 8) \
| B8(dlsb))
/* Sample usage:
* B8(01010101) = 85
* B16(10101010,01010101) = 43605
* B32(10000000,11111111,10101010,01010101) = 2164238933
*/
The next version of C++, C++0x, will introduce user defined literals. I'm not sure if binary numbers will be part of the standard but at the worst you'll be able to enable it yourself:
int operator "" _B(int i);
assert( 1010_B == 10);
I write binary literals like this:
const int has_nukes = 0x0001;
const int has_bio_weapons = 0x0002;
const int has_chem_weapons = 0x0004;
It's more compact than your suggested notation, and easier to read. For example:
const int upper_bit = 0b0001000000000000000;
versus:
const int upper_bit = 0x04000;
Did you notice that the binary version wasn't an even multiple of 4 bits? Did you think it was 0x10000?
With a little practice hex or octal are easier for a human than binary. And, in my opinion, easier to read that using shift operators. But I'll concede that my years of assembly language work may bias me on that point.
If you want to use bitset, auto, variadic templates, user-defined literals, static_assert, constexpr, and noexcept try this:
template<char... Bits>
struct __checkbits
{
static const bool valid = false;
};
template<char High, char... Bits>
struct __checkbits<High, Bits...>
{
static const bool valid = (High == '0' || High == '1')
&& __checkbits<Bits...>::valid;
};
template<char High>
struct __checkbits<High>
{
static const bool valid = (High == '0' || High == '1');
};
template<char... Bits>
inline constexpr std::bitset<sizeof...(Bits)>
operator"" bits() noexcept
{
static_assert(__checkbits<Bits...>::valid, "invalid digit in binary string");
return std::bitset<sizeof...(Bits)>((char []){Bits..., '\0'});
}
Use it like this:
int
main()
{
auto bits = 0101010101010101010101010101010101010101010101010101010101010101bits;
std::cout << bits << std::endl;
std::cout << "size = " << bits.size() << std::endl;
std::cout << "count = " << bits.count() << std::endl;
std::cout << "value = " << bits.to_ullong() << std::endl;
// This triggers the static_assert at compile-time.
auto badbits = 2101010101010101010101010101010101010101010101010101010101010101bits;
// This throws at run-time.
std::bitset<64> badbits2("2101010101010101010101010101010101010101010101010101010101010101bits");
}
Thanks to #johannes-schaub-litb
Java doesn't support binary literals either, unfortunately. However, it has enums which can be used with an EnumSet. An EnumSet represents enum values internally with bit fields, and presents a Set interface for manipulating these flags.
Alternatively, you could use bit offsets (in decimal) when defining your values:
const int HAS_NUKES = 0x1 << 0;
const int HAS_BIO_WEAPONS = 0x1 << 1;
const int HAS_CHEM_WEAPONS = 0x1 << 2;
There's no syntax for literal binary constants in C++ the way there is for hexadecimal and octal. The closest thing for what it looks like you're trying to do would probably be to learn and use bitset.
As an aside:
Especially if you're dealing with a large set, instead of going through the [minor] mental effort of writing a sequence of shift amounts, you can make each constant depend on the previously defined constant:
const int has_nukes = 1;
const int has_bio_weapons = has_nukes << 1;
const int has_chem_weapons = has_bio_weapons << 1;
const int has_nunchuks = has_chem_weapons << 1;
// ...
Looks a bit redundant, but it's less typo-prone. Also, you can simply insert a new constant in the middle without having to touch any other line except the one immediately following it:
const int has_nukes = 1;
const int has_gravity_gun = has_nukes << 1; // added
const int has_bio_weapons = has_gravity_gun << 1; // changed
const int has_chem_weapons = has_bio_weapons << 1; // unaffected from here on
const int has_nunchuks = has_chem_weapons << 1;
// ...
Compare to:
const int has_nukes = 1 << 0;
const int has_bio_weapons = 1 << 1;
const int has_chem_weapons = 1 << 2;
const int has_nunchuks = 1 << 3;
// ...
const int has_scimatar = 1 << 28;
const int has_rapier = 1 << 28; // good luck spotting this typo!
const int has_katana = 1 << 30;
And:
const int has_nukes = 1 << 0;
const int has_gravity_gun = 1 << 1; // added
const int has_bio_weapons = 1 << 2; // changed
const int has_chem_weapons = 1 << 3; // changed
const int has_nunchuks = 1 << 4; // changed
// ... // changed all the way
const int has_scimatar = 1 << 29; // changed *sigh*
const int has_rapier = 1 << 30; // changed *sigh*
const int has_katana = 1 << 31; // changed *sigh*
As an aside to my aside, it's probably equally hard to spot a typo like this:
const int has_nukes = 1;
const int has_gravity_gun = has_nukes << 1;
const int has_bio_weapons = has_gravity_gun << 1;
const int has_chem_weapons = has_gravity_gun << 1; // oops!
const int has_nunchuks = has_chem_weapons << 1;
So, I think the main advantage of this cascading syntax is when dealing with insertions and deletions of constants.
Another method:
template<unsigned int N>
class b
{
public:
static unsigned int const x = N;
typedef b_<0> _0000;
typedef b_<1> _0001;
typedef b_<2> _0010;
typedef b_<3> _0011;
typedef b_<4> _0100;
typedef b_<5> _0101;
typedef b_<6> _0110;
typedef b_<7> _0111;
typedef b_<8> _1000;
typedef b_<9> _1001;
typedef b_<10> _1010;
typedef b_<11> _1011;
typedef b_<12> _1100;
typedef b_<13> _1101;
typedef b_<14> _1110;
typedef b_<15> _1111;
private:
template<unsigned int N2>
struct b_: public b<N << 4 | N2> {};
};
typedef b<0> _0000;
typedef b<1> _0001;
typedef b<2> _0010;
typedef b<3> _0011;
typedef b<4> _0100;
typedef b<5> _0101;
typedef b<6> _0110;
typedef b<7> _0111;
typedef b<8> _1000;
typedef b<9> _1001;
typedef b<10> _1010;
typedef b<11> _1011;
typedef b<12> _1100;
typedef b<13> _1101;
typedef b<14> _1110;
typedef b<15> _1111;
Usage:
std::cout << _1101::_1001::_1101::_1101::x;
Implemented in CityLizard++ (citylizard/binary/b.hpp).
I agree that it's useful to have an option for binary literals, and they are present in many programming languages. In C, I've decided to use a macro like this:
#define bitseq(a00,a01,a02,a03,a04,a05,a06,a07,a08,a09,a10,a11,a12,a13,a14,a15, \
a16,a17,a18,a19,a20,a21,a22,a23,a24,a25,a26,a27,a28,a29,a30,a31) \
(a31|a30<< 1|a29<< 2|a28<< 3|a27<< 4|a26<< 5|a25<< 6|a24<< 7| \
a23<< 8|a22<< 9|a21<<10|a20<<11|a19<<12|a18<<13|a17<<14|a16<<15| \
a15<<16|a14<<17|a13<<18|a12<<19|a11<<20|a10<<21|a09<<22|a08<<23| \
a07<<24|a06<<25|a05<<26|a04<<27|a03<<28|a02<<29|a01<<30|(unsigned)a00<<31)
The usage is pretty much straightforward =)
One, slightly horrible way you could do it is by generating a .h file with lots of #defines...
#define b00000000 0
#define b00000001 1
#define b00000010 2
#define b00000011 3
#define b00000100 4
etc.
This might make sense for 8-bit numbers, but probably not for 16-bit or larger.
Alternatively, do this (similar to Zach Scrivena's answer):
#define bit(x) (1<<x)
int HAS_NUKES = bit(HAS_NUKES_OFFSET);
int HAS_BIO_WEAPONS = bit(HAS_BIO_WEAPONS_OFFSET);
Binary literals are part of the C++ language since C++14. It’s literals that start with 0b or 0B. Reference
Maybe less relevant to binary literals, but this just looks as if it can be solved better with a bit field.
struct DangerCollection : uint32_t {
bool has_nukes : 1;
bool has_bio_weapons : 1;
bool has_chem_weapons : 1;
// .....
};
DangerCollection arsenal{
.has_nukes = true,
.has_bio_weapons = true,
.has_chem_weapons = true,
// ...
};
if(arsenal.has_bio_weapons){
std::cout << "BIO!!"
}
You would still be able to fill it with binary data, since its binary footprint is just a uint32. This is often used in combination with a union, for compact binary serialisation:
union DangerCollectionUnion {
DangerCollection collection;
uint8_t data[sizeof(DangerCollection)];
};
DangerCollectionUnion dc;
std::memcpy(dc.data, bitsIGotFromSomewhere, sizeof(DangerCollection));
if (dc.collection.has_bio_weapons) {
// ....
In my experience less error prone and easy to understand what's going on.