Next largest integer with some middle bits matching a pattern? - c++

My input is:
a bit mask mask of width n and some offset k >=0
a bit pattern pattern with 1s in some (but not necessarily all) the positions where the bit mask has 1s.
an integer val
I want to find the next largest integer result such that:
result > val
result & mask == pattern
For example, suppose mask = 0xFF00 and pattern = 0x0100. Then we expect the following result:
NextLargest(mask, pattern, 0x00000) => 0x00100
NextLargest(mask, pattern, 0x000FF) => 0x00100
NextLargest(mask, pattern, 0x010FE) => 0x001FF
NextLargest(mask, pattern, 0x010FF) => 0x10100
Another example -- say mask = 0xF and pattern = 0xF. Then we expect:
NextLargest(mask, pattern, 0x20) => 0x2F.
I've tried something like "strip out the bits that mask cares about, increment it, OR back in pattern and return" but I keep hitting edge cases. The problem is something like a generalization of finding the next largest multiple of some integer.
Here's my attempt so far (runnable link: https://ideone.com/AhXG5M):
#include <iostream>
using namespace std;
using uint32 = unsigned long;
uint32 NextLargest(int width, int offset, uint32 mask, uint32 pattern, uint32 val) {
unsigned long long ret = (val + 1) & ~mask;
if ((ret & ((1 << (offset + 1)) - 1)) == 0) {
// "carry" across the mask
ret += 1 << (offset + width);
}
return ret | pattern;
}
int main() {
// your code goes here
int width = 12;
int offset = 4;
uint32 significant_bits = (1 << (width + 1) - 1) << offset;
uint32 wanted_bits = 0xFFF << offset;
cout << hex;
// want 0xFFF1 -- correct
cout << NextLargest(width, offset, significant_bits, wanted_bits, 0) << endl;
// want 0xFFF2 -- correct
cout << NextLargest(width, offset, significant_bits, wanted_bits, 1) << endl;
// want 0x1FFFF0 -- incorrect, get 0xFFF0
cout << NextLargest(width, offset, significant_bits, wanted_bits, 0xF) << endl;
return 0;
}

I didn't test this, but the following algorithm should work (pseudocode):
let mask, pattern, and val be inputs
let fls be function that finds last bit set in word
let ffs be function that finds first bit set in a word
let applied be (val & ~mask) | pattern
if applied is greater than val then
return applied
let low_order_mask be (1 << ffs(mask)) - 1
if applied == val then
let flipped_low be (~value & low_order_mask)
if not flipped_low then
return applied + 1 // no need to carry
// need to carry
let set_low_zero be applied & ~low_order_mask
let carry be 1 << (fls(mask) + 1)
return set_low_zero + carry
fls and ffs are provided by POSIX, but other systems might not do so. There are answers on SO for how to implement these if you need to.

Think of the value broken into 3.
The bits above the mask, in the mask and below the mask.
H(value), M(value), L(value).
We know M(result)==pattern.
We have three candidates.
C1 is H(value)+pattern+0.
C2 is H(value)+pattern+L(value)+1
C3 is H(value)+pattern+X
X==(mask<<1)&~mask. That is the lowest bit above the mask.
If pattern>M(value) we can use C1.
Reducing the high-bits will get a number <value and setting any low bits will increase the number.
If pattern==M(value) then we can try C2 which is actually value+1.
That fails if adding one overflows to the pattern bits.
That means all the low bits are set and the next lowest place to add is the first bit above the mask.
unsigned next_masked(unsigned mask,unsigned pattern,unsigned value){
unsigned reduced_pattern=(mask&pattern);//May not be required...
unsigned over_add=(mask<<1)&~mask;
unsigned upper_mask=~(over_add-1);
unsigned cand=(value&upper_mask)|reduced_pattern;
if(cand>value){
return cand;
}
if((value&mask)==reduced_pattern){
unsigned scand=value+1;
if((scand&mask)==reduced_pattern){
return scand;
}
}
return cand + over_add;
}
Here it is again with some unit tests:
#include <iostream>
unsigned next_masked(unsigned mask,unsigned pattern,unsigned value){
unsigned reduced_pattern=(mask&pattern);//May not be required...
unsigned over_add=(mask<<1)&~mask;
unsigned upper_mask=~(over_add-1);
unsigned cand=(value&upper_mask)|reduced_pattern;
if(cand>value){
return cand;
}
if((value&mask)==reduced_pattern){
unsigned scand=value+1;
if((scand&mask)==reduced_pattern){
return scand;
}
}
return cand + over_add;
}
bool invariant_next_masked(unsigned mask,unsigned pattern,unsigned value,unsigned result){
if((result&mask)!=(pattern&mask)){
return false;
}
if(result<=value){
return false;
}
for(unsigned test=result-1;test>value;--test){
if((test&mask)==(pattern&mask)){
return false;
}
}
return true;
}
int check_next_masked(unsigned mask,unsigned pattern,unsigned value,unsigned expect){
unsigned result=next_masked(mask,pattern,value);
if(result!=expect){
std::cout << std::hex << mask << ' ' << std::hex << pattern << ' ' << std::hex <<value << "==" << std::hex <<result << "!=" << std::hex <<expect <<'\n';
return 1;
}
if(!invariant_next_masked(mask,pattern,value,result)){
return 1;
}
return 0;
}
int main() {
int errors=0;
errors+=check_next_masked(0xFF00,0x0100,0x0000,0x00100);
errors+=check_next_masked(0xFF00,0x0100,0x00FF,0x00100);
errors+=check_next_masked(0xFF00,0x0100,0x10FE,0x10100);
errors+=check_next_masked(0xFF00,0x0100,0x1067,0x10100);
errors+=check_next_masked(0xFF00,0x0100,0x10123,0x10124);
errors+=check_next_masked(0xFF00,0x0100,0x110FF,0x20100);
errors+=check_next_masked(0xFF00,0x0100,0x102FF,0x20100);
errors+=check_next_masked(0xFF00,0x0100,0x101FF,0x20100);
errors+=check_next_masked(0x000F,0x0007,0x10123,0x10127);
errors+=check_next_masked(0x000F,0x0007,0x10128,0x10137);
errors+=check_next_masked(0x0FF0,0x0230,0x10128,0x10230);
errors+=check_next_masked(0x0FFF0,0x01230,0x01231,0x01232);
errors+=check_next_masked(0x0FFF0,0x01230,0x41237,0x41238);
errors+=check_next_masked(0x0FFF0,0x01230,0x4123F,0x51230);
if(errors>0){
std::cout << "Errors "<< errors << '\n';
return 1;
}
std::cout << "Success\n";
return 0;
}

is the problem to calculate the LARGEST or SMALLEST next value?? the largest value seems odd to be. if the requirement is to calculate the smallest value, I think this code should work: (tested on gcc 7.1, assuming a 64 bit target, sizeof(void *) == sizeof(size_t) == sizeof(uint64_t))
size_t next_smallest_value (size_t mask, size_t pattern, size_t x) {
assert(pattern & mask == pattern);
// only change bits within mask range to meet the requirement
auto y = x & ~mask | pattern;
if (y > x) {
// if the operation increased the value
// mask off all the lower bits
auto lsb_mask = __builtin_ctzll(mask);
return y & ~ones(lsb_mask);
} else {
// otherwise, the operation decreased or didn't change the value
// need to increase the fraction higher than the mask
auto msb_mask = 63 - __builtin_clzll(mask);
// higher part cannot be empty if the masked part decrease
assert(msb_mask < 63);
auto higher = ((y >> msb_mask) + 1) << msb_mask;
// also higher part cannot overflow
assert(higher != 0);
return y & mask | higher;
}
}
the idea is very simple: divide the bits into 3 parts: higher part, masked part, lower part. the masked part can be derived directly from and is determined by the mask and pattern, it cannot be other values.
after calculating the masked bits, if the value increase, just mask off all bits in the lower part. otherwise, increase the higher part by 1 (and also mask off all the lower bits).
the above code doesn't deal with ill-formed input, it will trigger the assertions, but the checks are not exhausted.

Here is two functions for this little bit confusing question
First function gives the largest next integer that fulfills the requirements for the result.
Second one gives SMALLEST next value.
1: Get the LARGEST integer which satisfies result & mask == pattern and result > val:
unsigned NextLargest (unsigned mask, unsigned pattern, unsigned val) {
// zero "mask" bits and set "pattern" bits in largest (unsigned) int
unsigned const x = ~mask | pattern;
// if result is not greater than val, we can't satisfy requirements
if (x <= val) {
... report error, return error code or throw something
}
return x;
}
Obviously this just returns highest (unsigned) integer value that meets the requirements result & mask == pattern and result > val. The if-clause checks if result will not be greater than val, and the function will fail.
2: Get the SMALLEST next value after val that meets the requirements:
unsigned NextSmallest (unsigned mask, unsigned pattern, unsigned val) {
unsigned const x = (val + mask + 1) & ~mask | pattern;
if (x <= val) {
... increment wrapped, can't give greater value
}
return x;
}
edit: Changed (val|mask) to val+mask because the result must be still greater than val.
This function calculates val + 1 and carrying overflowing bits over mask'd bits.
Here are few examples what the function does, if mask = 0x0ff00 and pattern = 0x00500:
val +mask +1 &~mask |pattern == result
0x00000 0x0ff00 0x0ff01 0x00001 0x00501
0x00001 0x0ff01 0x0ff02 0x00002 0x00502
0x000fe 0x0fffe 0x0ffff 0x000ff 0x005ff
0x000ff 0x0ffff 0x10000 0x10000 0x10500
0x00100 0x10000 0x10001 0x10001 0x10501
0x0f000 0x1ef00 0x1ef01 0x10001 0x10501
0x0ff00 0x1fe00 0x1fe01 0x10001 0x10501
0x0ffff 0x1feff 0x1ff00 0x10000 0x10500
0x10000 0x1ff00 0x1ff01 0x10001 0x10501
0x10001 0x1ff01 0x1ff02 0x10002 0x10502
0x100ff 0x1ffff 0x20000 0x20000 0x20500
After long editing and rewriting I still can't give good enough answer for the question. Its examples has weird results. I still leave this here, if someone finds these functions or parts of them useful. Also I did not actually tested the functions on the computer.

Related

Next higher number with same number of set bits and some bits fixed

I'm trying to find a way to, given a number of bits that need to be set, and a few indexes of bits that should remain fixed, generate the next higher number with the same number of set bits, that has all the fixed bits in place. This is closely related to https://www.chessprogramming.org/Traversing_Subsets_of_a_Set#Snoobing_the_Universe
The difference is that I want to keep some of the bits unchanged, and I'm trying to do this as efficiently as possible / something close to the snoob function, that given a number and the conditions, bithacks its way into the next one (So I'm trying to avoid iterating through all the smaller subsets and seeing which ones contain the required bits, for example).
For example, if I have the universe of numbers {1,2,...,20}, I'd like to, given a number with bits {2,5,6} set, generate the smallest number with 6 set bits that has bit {2,5,6} set, and then the number after that etc
Solution 1 (based on "Snoobing the Universe")
One solution is to define a value corresponding to the "fixed bits" and one corresponding to the "variable bits".
E.g. for bits {2, 5, 6}, the "fixed bits" value would be 0x64 (assuming bits are counted starting from 0).
The "variable bits" is initialized with the smallest value having the remaining number of bits. E.g. if we want a total of 6 bits and have 3 fixed bits, the remaining number of bits is 6-3=3, so the "variable bits" starting value is 0x7.
Now the resulting value is calculated by "blending" the two bit sets by inserting the "variable bits" into the places where the "fixed bits" are 0 (see the function blend() below).
To get the next value, the "variable bits" are modified using the linked "Snoobing the Universe" function (snoob() below) and the result is again obtained by "blending" the fixed and variable bits.
All in all, a solution is as follows (prints the first 10 numbers as an example):
#include<stdio.h>
#include<stdint.h>
uint64_t snoob (uint64_t x) {
uint64_t smallest, ripple, ones;
smallest = x & -x;
ripple = x + smallest;
ones = x ^ ripple;
ones = (ones >> 2) / smallest;
return ripple | ones;
}
uint64_t blend(uint64_t fixed, uint64_t var)
{
uint64_t result = fixed;
uint64_t maskResult = 1;
while(var != 0)
{
if((result & maskResult) == 0)
{
if((var & 1) != 0)
{
result |= maskResult;
}
var >>= 1;
}
maskResult <<= 1;
}
return result;
}
int main(void)
{
const uint64_t fixedBits = 0x64; // Bits 2, 5, 6 must be set
const int additionalBits = 3;
uint64_t varBits = ((uint64_t)1 << additionalBits) - 1;
uint64_t value;
for(unsigned i = 0; i < 10; i++)
{
value = blend(fixedBits, varBits);
printf("%u: decimal=%llu hex=0x%04llx\n", i, value, value);
varBits = snoob(varBits); // Get next value for variable bits
}
}
Solution 2 (based on "Snoobing any Sets")
Another solution based on the linked "Snoobing any Sets" (function snoobSubset()below) is to define the "variale set" as the bits which are not fixed and then initialize the "variable bits" as the n least significant of these bits (see function getLsbOnes() below). In the example case, n=3.
This solution is as follows:
#include<stdio.h>
#include<stdint.h>
uint64_t getLsbOnes(uint64_t value, unsigned count)
{
uint64_t mask = 1;
while(mask != 0)
{
if(count > 0)
{
if((mask & value) != 0)
{
count--;
}
}
else
{
value &= ~mask;
}
mask <<= 1;
}
return value;
}
// get next greater subset of set with same number of one bits
uint64_t snoobSubset (uint64_t sub, uint64_t set) {
uint64_t tmp = sub-1;
uint64_t rip = set & (tmp + (sub & (0-sub)) - set);
for(sub = (tmp & sub) ^ rip; sub &= sub-1; rip ^= tmp, set ^= tmp)
tmp = set & (0-set);
return rip;
}
int main(void)
{
const uint64_t fixedBits = 0x64;
const int additionalBits = 3;
const uint64_t varSet = ~fixedBits;
uint64_t varBits = getLsbOnes(varSet, additionalBits);
uint64_t value;
for(unsigned i = 0; i < 10; i++)
{
value = fixedBits | varBits;
printf("%u: decimal=%llu hex=0x%04llx\n", i, value, value);
varBits = snoobSubset(varBits, varSet);
}
}
Example output
The output for both solutions should be:
0: decimal=111 hex=0x006f
1: decimal=119 hex=0x0077
2: decimal=125 hex=0x007d
3: decimal=126 hex=0x007e
4: decimal=231 hex=0x00e7
5: decimal=237 hex=0x00ed
6: decimal=238 hex=0x00ee
7: decimal=245 hex=0x00f5
8: decimal=246 hex=0x00f6
9: decimal=252 hex=0x00fc

Converting a 'long' type into a binary String

My objective is to write an algorithm that would be able to convert a long number into a binary number stored in a string.
Here is my current block of code:
#include <iostream>
#define LONG_SIZE 64; // size of a long type is 64 bits
using namespace std;
string b10_to_b2(long x)
{
string binNum;
if(x < 0) // determine if the number is negative, a number in two's complement will be neg if its' first bit is zero.
{
binNum = "1";
}
else
{
binNum = "0";
}
int i = LONG_SIZE - 1;
while(i > 0)
{
i --;
if( (x & ( 1 << i) ) == ( 1 << i) )
{
binNum = binNum + "1";
}
else
{
binNum = binNum + "0";
}
}
return binNum;
}
int main()
{
cout << b10_to_b2(10) << endl;
}
The output of this program is:
00000000000000000000000000000101000000000000000000000000000001010
I want the output to be:
00000000000000000000000000000000000000000000000000000000000001010
Can anyone identify the problem? For whatever reason the function outputs 10 represented by 32 bits concatenated with another 10 represented by 32 bits.
why would you assume long is 64 bit?
try const size_t LONG_SIZE=sizeof(long)*8;
check this, the program works correctly with my changes
http://ideone.com/y3OeB3
Edit: and ad #Mats Petersson pointed out you can make it more robust by changing this line
if( (x & ( 1 << i) ) == ( 1 << i) )
to something like
if( (x & ( 1UL << i) ) ) where that UL is important, you can see his explanation the the comments
Several suggestions:
Make sure you use a type that is guaranteed to be 64-bit, such as uint64_t, int64_t or long long.
Use above mentioned 64-bit type for your variable i to guarantee that the 1 << i calculates correctly. This is caused by the fact that shift is only guaranteed by the standard when the number of bits shifted are less or equal to the number of bits in the type being shifted - and 1 is the type int, which for most modern platforms (evidently including yours) is 32 bits.
Don't put semicolon on the end of your #define LONG_SIZE - or better yet, use const int long_size = 64; as this allows all manner of better behaviour, for example that you in the debugger can print long_size and get 64, where print LONG_SIZE where LONG_SIZE is a macro will yield an error in the debugger.

turning off leftmost non zero bit of a number

how can I turn off leftmost non-zero bit of a number in O(1)?
for example
n = 366 (base 10) = 101101110 (in base 2)
then after turning the leftmost non-zero bit off ,number looks like = 001101110
n will always be >0
Well, if you insist on O(1) under any circumstances, the Intel Intrinsics function _bit_scan_reverse() defined in immintrin.h does a hardware find for the most-significant non-zero bit in a int number.
Though the operation does use a loop (functional equivalent), I believe its constant time given its latency at fixed 3 (as per Intel Intrinsics Guide).
The function will return the index to the most-significant non-zero bit thus doing a simple:
n = n & ~(1 << _bit_scan_reverse(n));
should do.
This intrinsic is undefined for n == 0. So you gotta watch out there. I'm following the assumption of your original post where n > 0.
n = 2^x + y.
x = log(n) base 2
Your highest set bit is x.
So in order to reset that bit,
number &= ~(1 << x);
Another approach:
int highestOneBit(int i) {
i |= (i >> 1);
i |= (i >> 2);
i |= (i >> 4);
i |= (i >> 8);
i |= (i >> 16);
return i - (i >> 1);
}
int main() {
int n = 32767;
int z = highestOneBit(n); // returns the highest set bit number i.e 2^x.
cout<< (n&(~z)); // Resets the highest set bit.
return 0;
}
Check out this question, for a possibly faster solution, using a processor instruction.
However, an O(lgN) solution is:
int cmsb(int x)
{
unsigned int count = 0;
while (x >>= 1) {
++count;
}
return x & ~(1 << count);
}
If ANDN is not supported and LZCNT is supported, the fastest O(1) way to do it is not something along the lines of n = n & ~(1 << _bit_scan_reverse(n)); but rather...
int reset_highest_set_bit(int x)
{
const int mask = 0x7FFFFFFF; // 011111111[...]
return x & (mask >> __builtin_clz(x));
}

Compact a hex number

Is there a clever (ie: branchless) way to "compact" a hex number. Basically move all the 0s all to one side?
eg:
0x10302040 -> 0x13240000
or
0x10302040 -> 0x00001324
I looked on Bit Twiddling Hacks but didn't see anything.
It's for a SSE numerical pivoting algorithm. I need to remove any pivots that become 0. I can use _mm_cmpgt_ps to find good pivots, _mm_movemask_ps to convert that in to a mask, and then bit hacks to get something like the above. The hex value gets munged in to a mask for a _mm_shuffle_ps instruction to perform a permutation on the SSE 128 bit register.
To compute mask for _pext:
mask = arg;
mask |= (mask << 1) & 0xAAAAAAAA | (mask >> 1) & 0x55555555;
mask |= (mask << 2) & 0xCCCCCCCC | (mask >> 2) & 0x33333333;
First do bit-or on pairs of bits, then on quads. Masks prevent shifted values from overflowing to other digits.
After computing mask this way or harold's way (which is probably faster) you don't need the full power of _pext, so if targeted hardware doesn't support it you can replace it with this:
for(int i = 0; i < 7; i++) {
stay_mask = mask & (~mask - 1);
arg = arg & stay_mask | (arg >> 4) & ~stay_mask;
mask = stay_mask | (mask >> 4);
}
Each iteration moves all nibbles one digit to the right if there is some space. stay_mask marks bits that are in their final positions. This uses somewhat less operations than Hacker's Delight solution, but might still benefit from branching.
Supposing we can use _pext_u32, the issue then is computing a mask that has an F for every nibble that isn't zero. I'm not sure what the best approach is, but you can compute the OR of the 4 bits of the nibble and then "spread" it back out to F's like this:
// calculate horizontal OR of every nibble
x |= x >> 1;
x |= x >> 2;
// clean up junk
x &= 0x11111111;
// spread
x *= 0xF;
Then use that as the mask of _pext_u32.
_pext_u32 can be emulated by this (taken from Hacker's Delight, figure 7.6)
unsigned compress(unsigned x, unsigned m) {
unsigned mk, mp, mv, t;
int i;
x = x & m; // Clear irrelevant bits.
mk = ~m << 1; // We will count 0's to right.
for (i = 0; i < 5; i++) {
mp = mk ^ (mk << 1); // Parallel prefix.
mp = mp ^ (mp << 2);
mp = mp ^ (mp << 4);
mp = mp ^ (mp << 8);
mp = mp ^ (mp << 16);
mv = mp & m; // Bits to move.
m = m ^ mv | (mv >> (1 << i)); // Compress m.
t = x & mv;
x = x ^ t | (t >> (1 << i)); // Compress x.
mk = mk & ~mp;
}
return x;
}
But that's a bit of a disaster. It's probably better to just resort to branching code then.
uint32_t fun(uint32_t val) {
uint32_t retVal(0x00);
uint32_t sa(28);
for (int sb(28); sb >= 0; sb -= 4) {
if (val & (0x0F << sb)) {
retVal |= (0x0F << sb) << (sa - sb)
sa -= 4;
}
}
return retVal;
}
I think this (or something similar) is what you're looking for. Eliminating the 0 nibbles within a number. I've not debugged it, and it would only works on one side atm.
If your processor supports conditional instruction execution, you may get a benefit from this algorithm:
uint32_t compact(uint32_t orig_value)
{
uint32_t mask = 0xF0000000u; // Mask for isolating a hex digit.
uint32_t new_value = 0u;
for (unsigned int i = 0; i < 8; ++i) // 8 hex digits
{
if (orig_value & mask == 0u)
{
orig_value = orig_value << 4; // Shift the original value by 1 digit
}
new_value |= orig_value & mask;
mask = mask >> 4; // next digit
}
return new_value;
}
This looks like a good candidate for loop unrolling.
The algorithm assumes that when the original value is shifted left, zeros are shifted in, filling in the "empty" bits.
Edit 1:
On a processor that supports conditional execution of instructions, the shifting of the original value would be conditionally executed depending on the result of the ANDing of the original value and the mask. Thus no branching, only ignored instructions.
I came up with the following solution. Please take a look, maybe it will help you.
#include <iostream>
#include <sstream>
#include <algorithm>
using namespace std;
class IsZero
{
public:
bool operator ()(char c)
{
return '0' == c;
}
};
int main()
{
int a = 0x01020334; //IMPUT
ostringstream my_sstream;
my_sstream << hex << a;
string str = my_sstream.str();
int base_str_length = str.size();
cout << "Input hex: " << str << endl;
str.insert(remove_if(begin(str), end(str), IsZero()), count_if(begin(str), end(str), IsZero()), '0');
str.replace(begin(str) + base_str_length, end(str), "");
cout << "Processed hex: " << str << endl;
return 0;
}
Output:
Input hex: 1020334
Processed hex: 1233400

How to determine how many bytes an integer needs?

I'm looking for the most efficient way to calculate the minimum number of bytes needed to store an integer without losing precision.
e.g.
int: 10 = 1 byte
int: 257 = 2 bytes;
int: 18446744073709551615 (UINT64_MAX) = 8 bytes;
Thanks
P.S. This is for a hash functions which will be called many millions of times
Also the byte sizes don't have to be a power of two
The fastest solution seems to one based on tronics answer:
int bytes;
if (hash <= UINT32_MAX)
{
if (hash < 16777216U)
{
if (hash <= UINT16_MAX)
{
if (hash <= UINT8_MAX) bytes = 1;
else bytes = 2;
}
else bytes = 3;
}
else bytes = 4;
}
else if (hash <= UINT64_MAX)
{
if (hash < 72057594000000000ULL)
{
if (hash < 281474976710656ULL)
{
if (hash < 1099511627776ULL) bytes = 5;
else bytes = 6;
}
else bytes = 7;
}
else bytes = 8;
}
The speed difference using mostly 56 bit vals was minimal (but measurable) compared to Thomas Pornin answer. Also i didn't test the solution using __builtin_clzl which could be comparable.
Use this:
int n = 0;
while (x != 0) {
x >>= 8;
n ++;
}
This assumes that x contains your (positive) value.
Note that zero will be declared encodable as no byte at all. Also, most variable-size encodings need some length field or terminator to know where encoding stops in a file or stream (usually, when you encode an integer and mind about size, then there is more than one integer in your encoded object).
You need just two simple ifs if you are interested on the common sizes only. Consider this (assuming that you actually have unsigned values):
if (val < 0x10000) {
if (val < 0x100) // 8 bit
else // 16 bit
} else {
if (val < 0x100000000L) // 32 bit
else // 64 bit
}
Should you need to test for other sizes, choosing a middle point and then doing nested tests will keep the number of tests very low in any case. However, in that case making the testing a recursive function might be a better option, to keep the code simple. A decent compiler will optimize away the recursive calls so that the resulting code is still just as fast.
Assuming a byte is 8 bits, to represent an integer x you need [log2(x) / 8] + 1 bytes where [x] = floor(x).
Ok, I see now that the byte sizes aren't necessarily a power of two. Consider the byte sizes b. The formula is still [log2(x) / b] + 1.
Now, to calculate the log, either use lookup tables (best way speed-wise) or use binary search, which is also very fast for integers.
The function to find the position of the first '1' bit from the most significant side (clz or bsr) is usually a simple CPU instruction (no need to mess with log2), so you could divide that by 8 to get the number of bytes needed. In gcc, there's __builtin_clz for this task:
#include <limits.h>
int bytes_needed(unsigned long long x) {
int bits_needed = sizeof(x)*CHAR_BIT - __builtin_clzll(x);
if (bits_needed == 0)
return 1;
else
return (bits_needed + 7) / 8;
}
(On MSVC you would use the _BitScanReverse intrinsic.)
You may first get the highest bit set, which is the same as log2(N), and then get the bytes needed by ceil(log2(N) / 8).
Here are some bit hacks for getting the position of the highest bit set, which are copied from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogObvious, and you can click the URL for details of how these algorithms work.
Find the integer log base 2 of an integer with an 64-bit IEEE float
int v; // 32-bit integer to find the log base 2 of
int r; // result of log_2(v) goes here
union { unsigned int u[2]; double d; } t; // temp
t.u[__FLOAT_WORD_ORDER==LITTLE_ENDIAN] = 0x43300000;
t.u[__FLOAT_WORD_ORDER!=LITTLE_ENDIAN] = v;
t.d -= 4503599627370496.0;
r = (t.u[__FLOAT_WORD_ORDER==LITTLE_ENDIAN] >> 20) - 0x3FF;
Find the log base 2 of an integer with a lookup table
static const char LogTable256[256] =
{
#define LT(n) n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n
-1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
LT(4), LT(5), LT(5), LT(6), LT(6), LT(6), LT(6),
LT(7), LT(7), LT(7), LT(7), LT(7), LT(7), LT(7), LT(7)
};
unsigned int v; // 32-bit word to find the log of
unsigned r; // r will be lg(v)
register unsigned int t, tt; // temporaries
if (tt = v >> 16)
{
r = (t = tt >> 8) ? 24 + LogTable256[t] : 16 + LogTable256[tt];
}
else
{
r = (t = v >> 8) ? 8 + LogTable256[t] : LogTable256[v];
}
Find the log base 2 of an N-bit integer in O(lg(N)) operations
unsigned int v; // 32-bit value to find the log2 of
const unsigned int b[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000};
const unsigned int S[] = {1, 2, 4, 8, 16};
int i;
register unsigned int r = 0; // result of log2(v) will go here
for (i = 4; i >= 0; i--) // unroll for speed...
{
if (v & b[i])
{
v >>= S[i];
r |= S[i];
}
}
// OR (IF YOUR CPU BRANCHES SLOWLY):
unsigned int v; // 32-bit value to find the log2 of
register unsigned int r; // result of log2(v) will go here
register unsigned int shift;
r = (v > 0xFFFF) << 4; v >>= r;
shift = (v > 0xFF ) << 3; v >>= shift; r |= shift;
shift = (v > 0xF ) << 2; v >>= shift; r |= shift;
shift = (v > 0x3 ) << 1; v >>= shift; r |= shift;
r |= (v >> 1);
// OR (IF YOU KNOW v IS A POWER OF 2):
unsigned int v; // 32-bit value to find the log2 of
static const unsigned int b[] = {0xAAAAAAAA, 0xCCCCCCCC, 0xF0F0F0F0,
0xFF00FF00, 0xFFFF0000};
register unsigned int r = (v & b[0]) != 0;
for (i = 4; i > 0; i--) // unroll for speed...
{
r |= ((v & b[i]) != 0) << i;
}
Find the number of bits by taking the log2 of the number, then divide that by 8 to get the number of bytes.
You can find logn of x by the formula:
logn(x) = log(x) / log(n)
Update:
Since you need to do this really quickly, Bit Twiddling Hacks has several methods for quickly calculating log2(x). The look-up table approach seems like it would suit your needs.
This will get you the number of bytes. It's not strictly the most efficient, but unless you're programming a nanobot powered by the energy contained in a red blood cell, it won't matter.
int count = 0;
while (numbertotest > 0)
{
numbertotest >>= 8;
count++;
}
You could write a little template meta-programming code to figure it out at compile time if you need it for array sizes:
template<unsigned long long N> struct NBytes
{ static const size_t value = NBytes<N/256>::value+1; };
template<> struct NBytes<0>
{ static const size_t value = 0; };
int main()
{
std::cout << "short = " << NBytes<SHRT_MAX>::value << " bytes\n";
std::cout << "int = " << NBytes<INT_MAX>::value << " bytes\n";
std::cout << "long long = " << NBytes<ULLONG_MAX>::value << " bytes\n";
std::cout << "10 = " << NBytes<10>::value << " bytes\n";
std::cout << "257 = " << NBytes<257>::value << " bytes\n";
return 0;
}
output:
short = 2 bytes
int = 4 bytes
long long = 8 bytes
10 = 1 bytes
257 = 2 bytes
Note: I know this isn't answering the original question, but it answers a related question that people will be searching for when they land on this page.
Floor((log2(N) / 8) + 1) bytes
You need exactly the log function
nb_bytes = floor(log(x)/log(256))+1
if you use log2, log2(256) == 8 so
floor(log2(x)/8)+1
You need to raise 256 to successive powers until the result is larger than your value.
For example: (Tested in C#)
long long limit = 1;
int byteCount;
for (byteCount = 1; byteCount < 8; byteCount++) {
limit *= 256;
if (limit > value)
break;
}
If you only want byte sizes to be powers of two (If you don't want 65,537 to return 3), replace byteCount++ with byteCount *= 2.
I think this is a portable implementation of the straightforward formula:
#include <limits.h>
#include <math.h>
#include <stdio.h>
int main(void) {
int i;
unsigned int values[] = {10, 257, 67898, 140000, INT_MAX, INT_MIN};
for ( i = 0; i < sizeof(values)/sizeof(values[0]); ++i) {
printf("%d needs %.0f bytes\n",
values[i],
1.0 + floor(log(values[i]) / (M_LN2 * CHAR_BIT))
);
}
return 0;
}
Output:
10 needs 1 bytes
257 needs 2 bytes
67898 needs 3 bytes
140000 needs 3 bytes
2147483647 needs 4 bytes
-2147483648 needs 4 bytes
Whether and how much the lack of speed and the need to link floating point libraries depends on your needs.
I know this question didn't ask for this type of answer but for those looking for a solution using the smallest number of characters, this does the assignment to a length variable in 17 characters, or 25 including the declaration of the length variable.
//Assuming v is the value that is being counted...
int l=0;
for(;v>>l*8;l++);
This is based on SoapBox's idea of creating a solution that contains no jumps, branches etc... Unfortunately his solution was not quite correct. I have adopted the spirit and here's a 32bit version, the 64bit checks can be applied easily if desired.
The function returns number of bytes required to store the given integer.
unsigned short getBytesNeeded(unsigned int value)
{
unsigned short c = 0; // 0 => size 1
c |= !!(value & 0xFF00); // 1 => size 2
c |= (!!(value & 0xFF0000)) << 1; // 2 => size 3
c |= (!!(value & 0xFF000000)) << 2; // 4 => size 4
static const int size_table[] = { 1, 2, 3, 3, 4, 4, 4, 4 };
return size_table[c];
}
For each of eight times, shift the int eight bits to the right and see if there are still 1-bits left. The number of times you shift before you stop is the number of bytes you need.
More succinctly, the minimum number of bytes you need is ceil(min_bits/8), where min_bits is the index (i+1) of the highest set bit.
There are a multitude of ways to do this.
Option #1.
int numBytes = 0;
do {
numBytes++;
} while (i >>= 8);
return (numBytes);
In the above example, is the number you are testing, and generally works for any processor, any size of integer.
However, it might not be the fastest. Alternatively, you can try a series of if statements ...
For a 32 bit integers
if ((upper = (value >> 16)) == 0) {
/* Bit in lower 16 bits may be set. */
if ((high = (value >> 8)) == 0) {
return (1);
}
return (2);
}
/* Bit in upper 16 bits is set */
if ((high = (upper >> 8)) == 0) {
return (3);
}
return (4);
For 64 bit integers, Another level of if statements would be required.
If the speed of this routine is as critical as you say, it might be worthwhile to do this in assembler if you want it as a function call. That could allow you to avoid creating and destroying the stack frame, saving a few extra clock cycles if it is that critical.
A bit basic, but since there will be a limited number of outputs, can you not pre-compute the breakpoints and use a case statement? No need for calculations at run-time, only a limited number of comparisons.
Why not just use a 32-bit hash?
That will work at near-top-speed everywhere.
I'm rather confused as to why a large hash would even be wanted. If a 4-byte hash works, why not just use it always? Excepting cryptographic uses, who has hash tables with more then 232 buckets anyway?
there are lots of great recipes for stuff like this over at Sean Anderson's "Bit Twiddling Hacks" page.
This code has 0 branches, which could be faster on some systems. Also on some systems (GPGPU) its important for threads in the same warp to execute the same instructions. This code is always the same number of instructions no matter what the input value.
inline int get_num_bytes(unsigned long long value) // where unsigned long long is the largest integer value on this platform
{
int size = 1; // starts at 1 sot that 0 will return 1 byte
size += !!(value & 0xFF00);
size += !!(value & 0xFFFF0000);
if (sizeof(unsigned long long) > 4) // every sane compiler will optimize this out
{
size += !!(value & 0xFFFFFFFF00000000ull);
if (sizeof(unsigned long long) > 8)
{
size += !!(value & 0xFFFFFFFFFFFFFFFF0000000000000000ull);
}
}
static const int size_table[] = { 1, 2, 4, 8, 16 };
return size_table[size];
}
g++ -O3 produces the following (verifying that the ifs are optimized out):
xor %edx,%edx
test $0xff00,%edi
setne %dl
xor %eax,%eax
test $0xffff0000,%edi
setne %al
lea 0x1(%rdx,%rax,1),%eax
movabs $0xffffffff00000000,%rdx
test %rdx,%rdi
setne %dl
lea (%rdx,%rax,1),%rax
and $0xf,%eax
mov _ZZ13get_num_bytesyE10size_table(,%rax,4),%eax
retq
Why so complicated? Here's what I came up with:
bytesNeeded = (numBits/8)+((numBits%8) != 0);
Basically numBits divided by eight + 1 if there is a remainder.
There are already a lot of answers here, but if you know the number ahead of time, in c++ you can use a template to make use of the preprocessor.
template <unsigned long long N>
struct RequiredBytes {
enum : int { value = 1 + (N > 255 ? RequiredBits<(N >> 8)>::value : 0) };
};
template <>
struct RequiredBytes<0> {
enum : int { value = 1 };
};
const int REQUIRED_BYTES_18446744073709551615 = RequiredBytes<18446744073709551615>::value; // 8
or for a bits version:
template <unsigned long long N>
struct RequiredBits {
enum : int { value = 1 + RequiredBits<(N >> 1)>::value };
};
template <>
struct RequiredBits<1> {
enum : int { value = 1 };
};
template <>
struct RequiredBits<0> {
enum : int { value = 1 };
};
const int REQUIRED_BITS_42 = RequiredBits<42>::value; // 6