Currently I have a few lines of code for working with binary strings in their decimal representation, namely I have functions to rotate the binary string to the left, flip a specific bit, flip all bits and reverse order of the binary string all working on the decimal representation. They are defined as follows:
inline u64 rotate_left(u64 n, u64 maxPower) {
return (n >= maxPower) ? (((int64_t)n - (int64_t)maxPower) * 2 + 1) : n * 2;
}
inline bool checkBit(u64 n, int k) {
return n & (1ULL << k);
}
inline u64 flip(u64 n, u64 maxBinaryNum) {
return maxBinaryNum - n - 1;
}
inline u64 flip(u64 n, u64 kthPower, int k) {
return checkBit(n, k) ? (int64_t(n) - (int64_t)kthPower) : (n + kthPower);
}
inline u64 reverseBits(u64 n, int L) {
u64 rev = (lookup[n & 0xffULL] << 56) | // consider the first 8 bits
(lookup[(n >> 8) & 0xffULL] << 48) | // consider the next 8 bits
(lookup[(n >> 16) & 0xffULL] << 40) | // consider the next 8 bits
(lookup[(n >> 24) & 0xffULL] << 32) | // consider the next 8 bits
(lookup[(n >> 32) & 0xffULL] << 24) | // consider the next 8 bits
(lookup[(n >> 40) & 0xffULL] << 16) | // consider the next 8 bits
(lookup[(n >> 48) & 0xffULL] << 8) | // consider the next 8 bits
(lookup[(n >> 54) & 0xffULL]); // consider last 8 bits
return (rev >> (64 - L)); // get back to the original maximal number
}
WIth the lookup[] list defined as:
#define R2(n) n, n + 2*64, n + 1*64, n + 3*64
#define R4(n) R2(n), R2(n + 2*16), R2(n + 1*16), R2(n + 3*16)
#define R6(n) R4(n), R4(n + 2*4 ), R4(n + 1*4 ), R4(n + 3*4 )
#define REVERSE_BITS R6(0), R6(2), R6(1), R6(3)
const u64 lookup[256] = { REVERSE_BITS };
All but the last one are easy to implement.
My question is whether you know any generalization of the above functions for the octal string of a number, while working only on the decimal representation as above? Obviously without doing a conversion and storing the octal string itself (mainly due to performance boost)
With flip() in octal code a would need to return the number with 8-x at the specified place in the string (for intstance: flip(2576, 2nd power, 2nd position) = 2376, i.e. 3 = 8-5).
I do understand that in octal representation the any similar formulas as for rotate_left or flip are not possible (maybe?), that is why I look for alternative implementation.
A possibility would be to represent each number in the octal string by their binary string, in other words to write: 29 --octal-> 35 --bin-> (011)(101)
Thus working on sets of binary numbers. Would that be a good idea?
If you have any suggestions for the code above for binary representation, I welcome any piece of advice.
Thanks in advance and sorry for the long post!
my understand of rotate_left, do not know my understand of question is correct, hope this will help you.
// maxPower: 8
// n < maxPower:
// 0001 -> 0010
//
// n >= maxPower
// n: 1011
// n - maxPower: 0011
// (n - maxPower) * 2: 0110
// (n - maxPower) * 2 + 1: 0111
inline u64 rotate_left(u64 n, u64 maxPower) {
return (n >= maxPower) ? (((int64_t)n - (int64_t)maxPower) * 2 + 1) : n * 2;
}
// so rotate_left for octadecimal, example: 3 digit octadecimal rotate left.
// 0 1 1 -> 1 1 0
// 000 001 001 -> 001 001 000
// 4 4 0 -> 4 0 4
// 100 100 000 -> 100 000 100
// so, keep:
// first digit of octadecimal number is:
// fisrt_digit = n & (7 << ((digit-1) * 3))
// other digit of octadecimal number is:
// other_digit = n - first_digit
// example for 100 100 000:
// first_digit is 100 000 000
// other_digit is 000 100 000
// so rotate left result is:
// (other_digit << 3) | (first_digit >> ((digit-1) * 3))
//
inline u64 rotate_left_oct(u64 n, u64 digit) {
u64 rotate = 3 * (digit - 1);
u64 first_digit = n & (7 << rotate);
u64 other_digit = n - first_digit;
return (other_digit << 3) | (first_digit >> rotate);
}
flip, for base 8, flip should be 7-x instead of 8-x:
// oct flip same with binary flip:
// (111)8 -> (001 001 001)2
// flip,
// (666)8 -> (110 110 110)2
// this should be 7 - 1, not 8 - 1, indead.
//
inline u64 flip_oct(u64 n, u64 digit) {
u64 maxNumber = (1 << (3 * digit)) - 1;
assert(n <= maxNumber);
return maxNumber - n;
}
// otc flip one digit
// (111)8 -> (001 001 001)2
// flip 2nd number of it
// (161)8 -> (001 110 001)2
// just need do xor of nth number of octadecimal number.
//
inline u64 flip_oct(u64 n, u64 nth, u64 digit) {
return (7 << (3 * (nth - 1))) ^ n;
}
simple reverse.
inline u64 reverse_oct(u64 n, u64 digit) {
u64 m = 0;
while (digit > 0) {
m = (m << 3) | (n & 7);
n = n >> 3;
--digit;
}
return m;
}
Lets say i have an array dynamically allocated.
int* array=new int[10]
That is 10*4=40 bytes or 10*32=320 bits. I want to read the 2nd bit of the 30th byte or 242nd bit. What is the easiest way to do so? I know I can access the 30th byte using array[30] but accessing individual bits is more tricky.
bool bitset(void const * data, int bitindex) {
int byte = bitindex / 8;
int bit = bitindex % 8;
unsigned char const * u = (unsigned char const *) data;
return (u[byte] & (1<<bit)) != 0;
}
this is working !
#define GET_BIT(p, n) ((((unsigned char *)p)[n/8] >> (n%8)) & 0x01)
int main()
{
int myArray[2] = { 0xaaaaaaaa, 0x00ff00ff };
for( int i =0 ; i < 2*32 ; i++ )
printf("%d", GET_BIT(myArray, i));
return 0;
}
ouput :
0101010101010101010101010101010111111111000000001111111100000000
Be carefull of the endiannes !
First, if you're doing bitwise operations, it's usually
preferable to make the elements an unsigned integral type
(although in this case, it really doesn't make that much
difference). As for accessing the bits: to access bit i in an
array of n int's:
static int const bitsPerWord = sizeof(int) * CHAR_BIT;
assert( i >= 0 && i < n * bitsPerWord );
int wordIndex = i / bitsPerWord;
int bitIndex = i % bitsPerWord;
then to read:
return (array[wordIndex] & (1 << bitIndex)) != 0;
to set:
array[wordIndex] |= 1 << bitIndex;
and to reset:
array[wordIndex] &= ~(1 << bitIndex);
Or you can use bitset, if n is constant, or vector<bool> or
boost::dynamic_bitset if it's not, and let someone else do the
work.
You can use something like this:
!((array[30] & 2) == 0)
array[30] is the integer.
& 2 is an and operation which masks the second bit (2 = 00000010)
== 0 will check if the mask result is 0
! will negate that result, because we're checking if it's 1 not zero....
You need bit operations here...
if(array[5] & 0x1)
{
//the first bit in array[5] is 1
}
else
{
//the first bit is 0
}
if(array[5] & 0x8)
{
//the 4th bit in array[5] is 1
}
else
{
//the 4th bit is 0
}
0x8 is 00001000 in binary. Doing the anding masks all other bits and allows you to see if the bit is 1 or 0.
int is typically 32 bits, so you would need to do some arithmetic to get a certain bit number in the entire array.
EDITED based on comment below - array contains int of 32 bits, not 8 bits uchar.
int pos = 241; // I start at index 0
bool bit242 = (array[pos/32] >> (pos%32)) & 1;
I am working through a problem which i was able to solve, all but for the last piece - i am not sure how can one do multiplication using bitwise operators:
0*8 = 0
1*8 = 8
2*8 = 16
3*8 = 24
4*8 = 32
Can you please recommend an approach to solve this?
To multiply by any value of 2 to the power of N (i.e. 2^N) shift the bits N times to the left.
0000 0001 = 1
times 4 = (2^2 => N = 2) = 2 bit shift : 0000 0100 = 4
times 8 = (2^3 -> N = 3) = 3 bit shift : 0010 0000 = 32
etc..
To divide shift the bits to the right.
The bits are whole 1 or 0 - you can't shift by a part of a bit thus if the number you're multiplying by is does not factor a whole value of N
ie.
since: 17 = 16 + 1
thus: 17 = 2^4 + 1
therefore: x * 17 = (x * 16) + x in other words 17 x's
thus to multiply by 17 you have to do a 4 bit shift to the left, and then add the original number again:
==> x * 17 = (x * 16) + x
==> x * 17 = (x * 2^4) + x
==> x * 17 = (x shifted to left by 4 bits) + x
so let x = 3 = 0000 0011
times 16 = (2^4 => N = 4) = 4 bit shift : 0011 0000 = 48
plus the x (0000 0011)
ie.
0011 0000 (48)
+ 0000 0011 (3)
=============
0011 0011 (51)
Edit: Update to the original answer. Charles Petzold has written a fantastic book 'Code' that will explain all of this and more in the easiest of ways. I thoroughly recommend this.
To multiply two binary encoded numbers without a multiply instruction.
It would be simple to iteratively add to reach the product.
unsigned int mult(x, y)
unsigned int x, y;
{
unsigned int reg = 0;
while(y--)
reg += x;
return reg;
}
Using bit operations, the characteristic of the data encoding can be exploited.
As explained previously, a bit shift is the same as multiply by two.
Using this an adder can be used on the powers of two.
// multiply two numbers with bit operations
unsigned int mult(x, y)
unsigned int x, y;
{
unsigned int reg = 0;
while (y != 0)
{
if (y & 1)
{
reg += x;
}
x <<= 1;
y >>= 1;
}
return reg;
}
You'd factor the multiplicand into powers of 2.
3*17 = 3*(16+1) = 3*16 + 3*1
... = 0011b << 4 + 0011b
public static int multi(int x, int y){
boolean neg = false;
if(x < 0 && y >= 0){
x = -x;
neg = true;
}
else if(y < 0 && x >= 0){
y = -y;
neg = true;
}else if( x < 0 && y < 0){
x = -x;
y = -y;
}
int res = 0;
while(y!=0){
if((y & 1) == 1) res += x;
x <<= 1;
y >>= 1;
}
return neg ? (-res) : res;
}
I believe this should be a left shift. 8 is 2^3, so left shift 3 bits:
2 << 3 = 8
-(int)multiplyNumber:(int)num1 withNumber:(int)num2
{
int mulResult =0;
int ithBit;
BOOL isNegativeSign = (num1<0 && num2>0) || (num1>0 && num2<0) ;
num1 = abs(num1);
num2 = abs(num2);
for(int i=0;i<sizeof(num2)*8;i++)
{
ithBit = num2 & (1<<i);
if(ithBit>0){
mulResult +=(num1<<i);
}
}
if (isNegativeSign) {
mulResult = ((~mulResult)+1 );
}
return mulResult;
}
I have just realized that this is the same answer as the previous one. LOL sorry.
public static uint Multiply(uint a, uint b)
{
uint c = 0;
while(b > 0)
{
c += ((b & 1) > 0) ? a : 0;
a <<= 1;
b >>= 1;
}
return c;
}
I was working on a recursive multiplication problem without the * operator and came up with a solution that was informed by the top answer here.
I thought it was worth posting because I really like the explanation in the top answer here, but wanted to expand on it in a way that:
Had a function representation.
Handled cases where your "remainder" was arbitrary.
This only handles positive integers, but you could wrap it in a check for negatives like some of the other answers.
def rec_mult_bitwise(a,b):
# Base cases for recursion
if b == 0:
return 0
if b == 1:
return a
# Get the most significant bit and the power of two it represents
msb = 1
pwr_of_2 = 0
while True:
next_msb = msb << 1
if next_msb > b:
break
pwr_of_2 += 1
msb = next_msb
if next_msb == b:
break
# To understand the return value, remember:
# 1: Left shifting by the power of two is the same as multiplying by the number itself (ie x*16=x<<4)
# 2: Once we've done that, we still need to multiply by the remainder, hence b - msb
return (a << pwr_of_2) + rec_mult_bitwise(a, b - msb)
Using Bitwise operator reduces the time complexity.
In cpp:
#include<iostream>
using name space std;
int main(){
int a, b, res = 0; // read the elements
cin>>a>>b;
// find the small number to reduce the iterations
small = (a<b)?a:b; // small number using terinary operator
big = (small^a)?a:b; // big number using bitwise XOR operator
while(small > 0)
{
if(small & 1)
{
res += big;
}
big = big << 1; // it increases the number << is big * (2 power of big)
small = small >> 1; // it decreases the number >> is small / (2 power of small)
}
cout<<res;
}
In Python:
a = int(input())
b = int(input())
res = 0
small = a if(a < b) else b
big = a if(small ^ a) else b
def multiplication(small, big):
res = 0
while small > 0:
if small & 1:
res += big
big = big << 1
small = small >> 1
return res
answer = multiplication(small, big)
print(answer)
def multiply(x, y):
return x << (y >> 1)
You would want to halve the value of y, hence y shift bits to the right once (y >> 1) and shift the bits again x times to the left to get your answer x << (y >> 1).
I'm looking for the most efficient way to calculate the minimum number of bytes needed to store an integer without losing precision.
e.g.
int: 10 = 1 byte
int: 257 = 2 bytes;
int: 18446744073709551615 (UINT64_MAX) = 8 bytes;
Thanks
P.S. This is for a hash functions which will be called many millions of times
Also the byte sizes don't have to be a power of two
The fastest solution seems to one based on tronics answer:
int bytes;
if (hash <= UINT32_MAX)
{
if (hash < 16777216U)
{
if (hash <= UINT16_MAX)
{
if (hash <= UINT8_MAX) bytes = 1;
else bytes = 2;
}
else bytes = 3;
}
else bytes = 4;
}
else if (hash <= UINT64_MAX)
{
if (hash < 72057594000000000ULL)
{
if (hash < 281474976710656ULL)
{
if (hash < 1099511627776ULL) bytes = 5;
else bytes = 6;
}
else bytes = 7;
}
else bytes = 8;
}
The speed difference using mostly 56 bit vals was minimal (but measurable) compared to Thomas Pornin answer. Also i didn't test the solution using __builtin_clzl which could be comparable.
Use this:
int n = 0;
while (x != 0) {
x >>= 8;
n ++;
}
This assumes that x contains your (positive) value.
Note that zero will be declared encodable as no byte at all. Also, most variable-size encodings need some length field or terminator to know where encoding stops in a file or stream (usually, when you encode an integer and mind about size, then there is more than one integer in your encoded object).
You need just two simple ifs if you are interested on the common sizes only. Consider this (assuming that you actually have unsigned values):
if (val < 0x10000) {
if (val < 0x100) // 8 bit
else // 16 bit
} else {
if (val < 0x100000000L) // 32 bit
else // 64 bit
}
Should you need to test for other sizes, choosing a middle point and then doing nested tests will keep the number of tests very low in any case. However, in that case making the testing a recursive function might be a better option, to keep the code simple. A decent compiler will optimize away the recursive calls so that the resulting code is still just as fast.
Assuming a byte is 8 bits, to represent an integer x you need [log2(x) / 8] + 1 bytes where [x] = floor(x).
Ok, I see now that the byte sizes aren't necessarily a power of two. Consider the byte sizes b. The formula is still [log2(x) / b] + 1.
Now, to calculate the log, either use lookup tables (best way speed-wise) or use binary search, which is also very fast for integers.
The function to find the position of the first '1' bit from the most significant side (clz or bsr) is usually a simple CPU instruction (no need to mess with log2), so you could divide that by 8 to get the number of bytes needed. In gcc, there's __builtin_clz for this task:
#include <limits.h>
int bytes_needed(unsigned long long x) {
int bits_needed = sizeof(x)*CHAR_BIT - __builtin_clzll(x);
if (bits_needed == 0)
return 1;
else
return (bits_needed + 7) / 8;
}
(On MSVC you would use the _BitScanReverse intrinsic.)
You may first get the highest bit set, which is the same as log2(N), and then get the bytes needed by ceil(log2(N) / 8).
Here are some bit hacks for getting the position of the highest bit set, which are copied from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogObvious, and you can click the URL for details of how these algorithms work.
Find the integer log base 2 of an integer with an 64-bit IEEE float
int v; // 32-bit integer to find the log base 2 of
int r; // result of log_2(v) goes here
union { unsigned int u[2]; double d; } t; // temp
t.u[__FLOAT_WORD_ORDER==LITTLE_ENDIAN] = 0x43300000;
t.u[__FLOAT_WORD_ORDER!=LITTLE_ENDIAN] = v;
t.d -= 4503599627370496.0;
r = (t.u[__FLOAT_WORD_ORDER==LITTLE_ENDIAN] >> 20) - 0x3FF;
Find the log base 2 of an integer with a lookup table
static const char LogTable256[256] =
{
#define LT(n) n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n
-1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
LT(4), LT(5), LT(5), LT(6), LT(6), LT(6), LT(6),
LT(7), LT(7), LT(7), LT(7), LT(7), LT(7), LT(7), LT(7)
};
unsigned int v; // 32-bit word to find the log of
unsigned r; // r will be lg(v)
register unsigned int t, tt; // temporaries
if (tt = v >> 16)
{
r = (t = tt >> 8) ? 24 + LogTable256[t] : 16 + LogTable256[tt];
}
else
{
r = (t = v >> 8) ? 8 + LogTable256[t] : LogTable256[v];
}
Find the log base 2 of an N-bit integer in O(lg(N)) operations
unsigned int v; // 32-bit value to find the log2 of
const unsigned int b[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000};
const unsigned int S[] = {1, 2, 4, 8, 16};
int i;
register unsigned int r = 0; // result of log2(v) will go here
for (i = 4; i >= 0; i--) // unroll for speed...
{
if (v & b[i])
{
v >>= S[i];
r |= S[i];
}
}
// OR (IF YOUR CPU BRANCHES SLOWLY):
unsigned int v; // 32-bit value to find the log2 of
register unsigned int r; // result of log2(v) will go here
register unsigned int shift;
r = (v > 0xFFFF) << 4; v >>= r;
shift = (v > 0xFF ) << 3; v >>= shift; r |= shift;
shift = (v > 0xF ) << 2; v >>= shift; r |= shift;
shift = (v > 0x3 ) << 1; v >>= shift; r |= shift;
r |= (v >> 1);
// OR (IF YOU KNOW v IS A POWER OF 2):
unsigned int v; // 32-bit value to find the log2 of
static const unsigned int b[] = {0xAAAAAAAA, 0xCCCCCCCC, 0xF0F0F0F0,
0xFF00FF00, 0xFFFF0000};
register unsigned int r = (v & b[0]) != 0;
for (i = 4; i > 0; i--) // unroll for speed...
{
r |= ((v & b[i]) != 0) << i;
}
Find the number of bits by taking the log2 of the number, then divide that by 8 to get the number of bytes.
You can find logn of x by the formula:
logn(x) = log(x) / log(n)
Update:
Since you need to do this really quickly, Bit Twiddling Hacks has several methods for quickly calculating log2(x). The look-up table approach seems like it would suit your needs.
This will get you the number of bytes. It's not strictly the most efficient, but unless you're programming a nanobot powered by the energy contained in a red blood cell, it won't matter.
int count = 0;
while (numbertotest > 0)
{
numbertotest >>= 8;
count++;
}
You could write a little template meta-programming code to figure it out at compile time if you need it for array sizes:
template<unsigned long long N> struct NBytes
{ static const size_t value = NBytes<N/256>::value+1; };
template<> struct NBytes<0>
{ static const size_t value = 0; };
int main()
{
std::cout << "short = " << NBytes<SHRT_MAX>::value << " bytes\n";
std::cout << "int = " << NBytes<INT_MAX>::value << " bytes\n";
std::cout << "long long = " << NBytes<ULLONG_MAX>::value << " bytes\n";
std::cout << "10 = " << NBytes<10>::value << " bytes\n";
std::cout << "257 = " << NBytes<257>::value << " bytes\n";
return 0;
}
output:
short = 2 bytes
int = 4 bytes
long long = 8 bytes
10 = 1 bytes
257 = 2 bytes
Note: I know this isn't answering the original question, but it answers a related question that people will be searching for when they land on this page.
Floor((log2(N) / 8) + 1) bytes
You need exactly the log function
nb_bytes = floor(log(x)/log(256))+1
if you use log2, log2(256) == 8 so
floor(log2(x)/8)+1
You need to raise 256 to successive powers until the result is larger than your value.
For example: (Tested in C#)
long long limit = 1;
int byteCount;
for (byteCount = 1; byteCount < 8; byteCount++) {
limit *= 256;
if (limit > value)
break;
}
If you only want byte sizes to be powers of two (If you don't want 65,537 to return 3), replace byteCount++ with byteCount *= 2.
I think this is a portable implementation of the straightforward formula:
#include <limits.h>
#include <math.h>
#include <stdio.h>
int main(void) {
int i;
unsigned int values[] = {10, 257, 67898, 140000, INT_MAX, INT_MIN};
for ( i = 0; i < sizeof(values)/sizeof(values[0]); ++i) {
printf("%d needs %.0f bytes\n",
values[i],
1.0 + floor(log(values[i]) / (M_LN2 * CHAR_BIT))
);
}
return 0;
}
Output:
10 needs 1 bytes
257 needs 2 bytes
67898 needs 3 bytes
140000 needs 3 bytes
2147483647 needs 4 bytes
-2147483648 needs 4 bytes
Whether and how much the lack of speed and the need to link floating point libraries depends on your needs.
I know this question didn't ask for this type of answer but for those looking for a solution using the smallest number of characters, this does the assignment to a length variable in 17 characters, or 25 including the declaration of the length variable.
//Assuming v is the value that is being counted...
int l=0;
for(;v>>l*8;l++);
This is based on SoapBox's idea of creating a solution that contains no jumps, branches etc... Unfortunately his solution was not quite correct. I have adopted the spirit and here's a 32bit version, the 64bit checks can be applied easily if desired.
The function returns number of bytes required to store the given integer.
unsigned short getBytesNeeded(unsigned int value)
{
unsigned short c = 0; // 0 => size 1
c |= !!(value & 0xFF00); // 1 => size 2
c |= (!!(value & 0xFF0000)) << 1; // 2 => size 3
c |= (!!(value & 0xFF000000)) << 2; // 4 => size 4
static const int size_table[] = { 1, 2, 3, 3, 4, 4, 4, 4 };
return size_table[c];
}
For each of eight times, shift the int eight bits to the right and see if there are still 1-bits left. The number of times you shift before you stop is the number of bytes you need.
More succinctly, the minimum number of bytes you need is ceil(min_bits/8), where min_bits is the index (i+1) of the highest set bit.
There are a multitude of ways to do this.
Option #1.
int numBytes = 0;
do {
numBytes++;
} while (i >>= 8);
return (numBytes);
In the above example, is the number you are testing, and generally works for any processor, any size of integer.
However, it might not be the fastest. Alternatively, you can try a series of if statements ...
For a 32 bit integers
if ((upper = (value >> 16)) == 0) {
/* Bit in lower 16 bits may be set. */
if ((high = (value >> 8)) == 0) {
return (1);
}
return (2);
}
/* Bit in upper 16 bits is set */
if ((high = (upper >> 8)) == 0) {
return (3);
}
return (4);
For 64 bit integers, Another level of if statements would be required.
If the speed of this routine is as critical as you say, it might be worthwhile to do this in assembler if you want it as a function call. That could allow you to avoid creating and destroying the stack frame, saving a few extra clock cycles if it is that critical.
A bit basic, but since there will be a limited number of outputs, can you not pre-compute the breakpoints and use a case statement? No need for calculations at run-time, only a limited number of comparisons.
Why not just use a 32-bit hash?
That will work at near-top-speed everywhere.
I'm rather confused as to why a large hash would even be wanted. If a 4-byte hash works, why not just use it always? Excepting cryptographic uses, who has hash tables with more then 232 buckets anyway?
there are lots of great recipes for stuff like this over at Sean Anderson's "Bit Twiddling Hacks" page.
This code has 0 branches, which could be faster on some systems. Also on some systems (GPGPU) its important for threads in the same warp to execute the same instructions. This code is always the same number of instructions no matter what the input value.
inline int get_num_bytes(unsigned long long value) // where unsigned long long is the largest integer value on this platform
{
int size = 1; // starts at 1 sot that 0 will return 1 byte
size += !!(value & 0xFF00);
size += !!(value & 0xFFFF0000);
if (sizeof(unsigned long long) > 4) // every sane compiler will optimize this out
{
size += !!(value & 0xFFFFFFFF00000000ull);
if (sizeof(unsigned long long) > 8)
{
size += !!(value & 0xFFFFFFFFFFFFFFFF0000000000000000ull);
}
}
static const int size_table[] = { 1, 2, 4, 8, 16 };
return size_table[size];
}
g++ -O3 produces the following (verifying that the ifs are optimized out):
xor %edx,%edx
test $0xff00,%edi
setne %dl
xor %eax,%eax
test $0xffff0000,%edi
setne %al
lea 0x1(%rdx,%rax,1),%eax
movabs $0xffffffff00000000,%rdx
test %rdx,%rdi
setne %dl
lea (%rdx,%rax,1),%rax
and $0xf,%eax
mov _ZZ13get_num_bytesyE10size_table(,%rax,4),%eax
retq
Why so complicated? Here's what I came up with:
bytesNeeded = (numBits/8)+((numBits%8) != 0);
Basically numBits divided by eight + 1 if there is a remainder.
There are already a lot of answers here, but if you know the number ahead of time, in c++ you can use a template to make use of the preprocessor.
template <unsigned long long N>
struct RequiredBytes {
enum : int { value = 1 + (N > 255 ? RequiredBits<(N >> 8)>::value : 0) };
};
template <>
struct RequiredBytes<0> {
enum : int { value = 1 };
};
const int REQUIRED_BYTES_18446744073709551615 = RequiredBytes<18446744073709551615>::value; // 8
or for a bits version:
template <unsigned long long N>
struct RequiredBits {
enum : int { value = 1 + RequiredBits<(N >> 1)>::value };
};
template <>
struct RequiredBits<1> {
enum : int { value = 1 };
};
template <>
struct RequiredBits<0> {
enum : int { value = 1 };
};
const int REQUIRED_BITS_42 = RequiredBits<42>::value; // 6
Recently I've been working on a C++ prime generator that uses the Sieve of Atkin ( http://en.wikipedia.org/wiki/Sieve_of_atkin ) to generate its primes. My objective is to be able to generate any 32-bit number. I'll use it mostly for project euler problems. mostly it's just a summer project.
The program uses a bitboard to store primality: that is, a series of ones and zeros where for example the 11th bit would be a 1, the 12th a 0, and the 13th a 1, etc. For efficient memory usage, this is actually and array of chars, each char containing 8 bits. I use flags and bitwise-operators to set and retrieve bits. The gyst of the algorithm is simple: do a first pass using some equations I don't pretend to understand to define if a number is considered "prime" or not. This will for the most part get the correct answers, but a couple nonprime numbers will be marked as prime. Therefore, when iterating through the list, you set all multiples of the prime you just found to "not prime". This has the handy advantage of requiring less processor time the larger a prime gets.
I've got it 90% complete, with one catch:
some of the primes are missing.
Through inspecting the bitboard, I have ascertained that these primes are omitted during the first pass, which basically toggles a number for every solution it has for a number of equations (see wikipedia entry). I've gone over this chunk of code time and time again. I even tried increasing the bounds to what is shown in the wikipedia articles, which is less efficient but I figured might hit a few numbers that I have somehow omitted. Nothing has worked. These numbers simply evaluate to not prime. Most of my test has been on all primes under 128. Of this range, these are the primes that are omitted:
23 and 59.
I have no doubt that on a higher range, more would be missing (just don't want to count through all of them). I don't know why these are missing, but they are. Is there anything special about these two primes? I've double and triple checked, finding and fixing mistakes, but it is still probably something stupid that I am missing.
anyways, here is my code:
#include <iostream>
#include <limits.h>
#include <math.h>
using namespace std;
const unsigned short DWORD_BITS = 8;
unsigned char flag(const unsigned char);
void printBinary(unsigned char);
class PrimeGen
{
public:
unsigned char* sieve;
unsigned sievelen;
unsigned limit;
unsigned bookmark;
PrimeGen(const unsigned);
void firstPass();
unsigned next();
bool getBit(const unsigned);
void onBit(const unsigned);
void offBit(const unsigned);
void switchBit(const unsigned);
void printBoard();
};
PrimeGen::PrimeGen(const unsigned max_num)
{
limit = max_num;
sievelen = limit / DWORD_BITS + 1;
bookmark = 0;
sieve = (unsigned char*) malloc(sievelen);
for (unsigned i = 0; i < sievelen; i++) {sieve[i] = 0;}
firstPass();
}
inline bool PrimeGen::getBit(const unsigned index)
{
return sieve[index/DWORD_BITS] & flag(index%DWORD_BITS);
}
inline void PrimeGen::onBit(const unsigned index)
{
sieve[index/DWORD_BITS] |= flag(index%DWORD_BITS);
}
inline void PrimeGen::offBit(const unsigned index)
{
sieve[index/DWORD_BITS] &= ~flag(index%DWORD_BITS);
}
inline void PrimeGen::switchBit(const unsigned index)
{
sieve[index/DWORD_BITS] ^= flag(index%DWORD_BITS);
}
void PrimeGen::firstPass()
{
unsigned nmod,n,x,y,xroof, yroof;
//n = 4x^2 + y^2
xroof = (unsigned) sqrt(((double)(limit - 1)) / 4);
for(x = 1; x <= xroof; x++){
yroof = (unsigned) sqrt((double)(limit - 4 * x * x));
for(y = 1; y <= yroof; y++){
n = (4 * x * x) + (y * y);
nmod = n % 12;
if (nmod == 1 || nmod == 5){
switchBit(n);
}
}
}
xroof = (unsigned) sqrt(((double)(limit - 1)) / 3);
for(x = 1; x <= xroof; x++){
yroof = (unsigned) sqrt((double)(limit - 3 * x * x));
for(y = 1; y <= yroof; y++){
n = (3 * x * x) + (y * y);
nmod = n % 12;
if (nmod == 7){
switchBit(n);
}
}
}
xroof = (unsigned) sqrt(((double)(limit + 1)) / 3);
for(x = 1; x <= xroof; x++){
yroof = (unsigned) sqrt((double)(3 * x * x - 1));
for(y = 1; y <= yroof; y++){
n = (3 * x * x) - (y * y);
nmod = n % 12;
if (nmod == 11){
switchBit(n);
}
}
}
}
unsigned PrimeGen::next()
{
while (bookmark <= limit)
{
bookmark++;
if (getBit(bookmark))
{
unsigned out = bookmark;
for(unsigned num = bookmark * 2; num <= limit; num += bookmark)
{
offBit(num);
}
return out;
}
}
return 0;
}
inline void PrimeGen::printBoard()
{
for(unsigned i = 0; i < sievelen; i++)
{
if (i % 4 == 0)
cout << endl;
printBinary(sieve[i]);
cout << " ";
}
}
inline unsigned char flag(const unsigned char bit_index)
{
return ((unsigned char) 128) >> bit_index;
}
inline void printBinary(unsigned char byte)
{
unsigned int i = 1 << (sizeof(byte) * 8 - 1);
while (i > 0) {
if (byte & i)
cout << "1";
else
cout << "0";
i >>= 1;
}
}
I did my best to clean it up and make it readable. I'm not a professional programmer, so please be merciful.
Here is the output I get, when I initialize a PrimeGen object named pgen, print its initial bitboard with pgen.printBoard() (please note that 23 and 59 are missing before next() iteration), and then iterate through next() and print all of the returned primes:
00000101 00010100 01010000 01000101
00000100 01010001 00000100 00000100
00010001 01000001 00010000 01000000
01000101 00010100 01000000 00000001
5
7
11
13
17
19
29
31
37
41
43
47
53
61
67
71
73
79
83
89
97
101
103
107
109
113
127
DONE
Process returned 0 (0x0) execution time : 0.064 s
Press any key to continue.
Eureka!!!
As expected, it was a stupid error on my part.
The 3x^2 - y^2 equation has a small caveat that I overlooked: x > y. With this taken into account, I was switching 23 and 59 too many times, leading to them failing.
Thanks for all the help you guys. Saved my bacon.