What is the most efficient way to add two scalars in c/c++ with overflow protection? For example, adding two unsigned chars is 255 if a+b >= 255.
I have:
unsigned char inline add_o(unsigned char x, unsigned char y)
{
const short int maxVal = 255;
unsigned short int s_short = (unsigned short int) x + (unsigned short int) y;
unsigned char s_char = (s_short <= maxVal) ? (unsigned char)s_short : maxVal;
return s_char;
}
that can be driven by:
unsigned char x = 200;
unsigned char y = 129;
unsigned char mySum = add_o(x,y);
I see some ideas here but I am interested in the fastest way to perform this operation---or at least one that is highly palatable to an optimizing compiler.
For most modern compilers will generate branch-free code for your current solution, which is already fairly good. Few optimisations which are very hardware dependant (x86 in particular) are
replace the comparison by a masked and
try to make the overflow protection if a conditional move.
This is how I would have done it:
unsigned char inline add_o(unsigned char x, unsigned char y) {
unsigned short int s_short = (unsigned short int) x + (unsigned short int) y;
if (s_short & 0xFF00)
s_short = 0xFF;
return s_short;
}
You mean unsigned saturating arithmetic?
unsigned char inline add_o(unsigned char x, unsigned char y) {
unsigned char s = x + y;
s |= (unsigned)(-(s < x));
return s;
}
The most efficient way is to pre-fill a table with all possible results, then use the addition of x and y to index into that table.
#include <iostream>
unsigned char add_o_results[255+255];
void pre_fill() {
for (int i = 0 ; i < 255 + 255 ; ++i) {
add_o_results[i] = std::min(i, 255);
}
}
unsigned char inline add_o(unsigned char x, unsigned char y)
{
return add_o_results[x+y];
}
using namespace std;
int main()
{
pre_fill();
cout << int(add_o(150, 151)) << endl;
cout << int(add_o(10, 150)) << endl;
return 0;
}
Related
I'm trying to just copy the contents of a 32-bit unsigned int to be used as float. Not casting it, just re-interpreting the integer bits to be used as float. I'm aware memcpy is the most-suggested option for this. However, when I do memcpy from uint_32 to float, and print out the individual bits, I see they are quite different.
Here is my code snippet:
#include <iostream>
#include <stdint.h>
#include <cstring>
using namespace std;
void print_bits(unsigned n) {
unsigned i;
for(i=1u<<31;i > 0; i/=2)
(n & i) ? printf("1"): printf("0");
}
union {
uint32_t u_int;
float u_float;
} my_union;
int main()
{
uint32_t my_int = 0xc6f05705;
float my_float;
//Method 1 using memcpy
memcpy(&my_float, &my_int, sizeof(my_float));
//Print using function
print_bits(my_int);
printf("\n");
print_bits(my_float);
//Print using printf
printf("\n%0x\n",my_int);
printf("%0x\n",my_float);
//Method 2 using unions
my_union.u_int = 0xc6f05705;
printf("union int = %0x\n",my_union.u_int);
printf("union float = %0x\n",my_union.u_float);
return 0;
}
Outputs:
11000110111100000101011100000101
11111111111111111000011111010101
c6f05705
400865
union int = c6f05705
union float = 40087b
Can someone explain what's happening? I expected the bits to match. Didn't work with a union either.
You need to change the function print_bits to
inline
int is_big_endian(void)
{
const union
{
uint32_t i;
char c[sizeof(uint32_t)];
} e = { 0x01000000 };
return e.c[0];
}
void print_bits( const void *src, unsigned int size )
{
//Check for the order of bytes in memory of the compiler:
int t, c;
if (is_big_endian())
{
t = 0;
c = 1;
}
else
{
t = size - 1;
c = -1;
}
for (; t >= 0 && t <= size - 1; t += c)
{ //print the bits of each byte from the MSB to the LSB
unsigned char i;
unsigned char n = ((unsigned char*)src)[t];
for(i = 1 << (CHAR_BIT - 1); i > 0; i /= 2)
{
printf("%d", (n & i) != 0);
}
}
printf("\n");
}
and call it like this:
int a = 7;
print_bits(&a, sizeof(a));
that way there won't be any type conversion when you call print_bits and it would work for any struct size.
EDIT: I replaced 7 with CHAR_BIT - 1 because the size of byte can be different than 8 bits.
EDIT 2: I added support for both little endian and big endian compilers.
Also as #M.M suggested in the comments if you want to you can use template to make the function call be: print_bits(a) instead of print_bits(&a, sizeof(a))
How can I convert an unsigned char array that contains letters into an integer. I have tried this so for but it only converts up to four bytes. I also need a way to convert the integer back into the unsigned char array .
int buffToInteger(char * buffer)
{
int a = static_cast<int>(static_cast<unsigned char>(buffer[0]) << 24 |
static_cast<unsigned char>(buffer[1]) << 16 |
static_cast<unsigned char>(buffer[2]) << 8 |
static_cast<unsigned char>(buffer[3]));
return a;
}
It looks like you're trying to use a for loop, i.e. repeating a task over and over again, for an in-determinant amount of steps.
unsigned int buffToInteger(char * buffer, unsigned int size)
{
// assert(size <= sizeof(int));
unsigned int ret = 0;
int shift = 0;
for( int i = size - 1; i >= 0, i-- ) {
ret |= static_cast<unsigned int>(buffer[i]) << shift;
shift += 8;
}
return ret;
}
What I think you are going for is called a hash -- converting an object to a unique integer. The problem is a hash IS NOT REVERSIBLE. This hash will produce different results for hash("WXYZABCD", 8) and hash("ABCD", 4). The answer by #Nicholas Pipitone DOES NOT produce different outputs for these different inputs.
Once you compute this hash, there is no way to get the original string back. If you want to keep knowledge of the original string, you MUST keep the original string as a variable.
int hash(char* buffer, size_t size) {
int res = 0;
for (size_t i = 0; i < size; ++i) {
res += buffer[i];
res *= 31;
}
return res;
}
Here's how to convert the first sizeof(int) bytes of the char array to an int:
int val = *(unsigned int *)buffer;
and to convert in back:
*(unsigned int *)buffer = val;
Note that your buffer must be at least the length of your int type size. You should check for this.
I'm trying to cede a function that return the maximum value of an image with the use of SSE instruction. I have a strange result of the maximum value set to be -356426400 (the value should be 254).
this is my code :
void max_sse(unsigned int *src, long h, long w, unsigned int *val)
{
unsigned int tab[16];
for(int i=0; i<h*w;i+=16)
{
__m128i PG=_mm_load_si128((__m128i*)(&src[i]));
__m128i max=_mm_max_epi8(max,PG);
_mm_store_si128((__m128i*)&tab, max);
}
*val=tab[0];
for (int i=0;i<16;i++)
{
if (tab[i]>*val)
{
*val=tab[i];
}
}
}
1) I don't see any code dealing with alignment
2) There's a mismatch between unsigned integers and _mm_max_epi8 which comapares 8-bit signed integers (http://msdn.microsoft.com/en-us/library/bb514045(v=vs.90).aspx)
3) I'm assuming you have a h*w matrix with rows multiple of 4 (or dealing with that with some padding for instance)
On Windows you could do something like:
#include "windows.h"
#include <malloc.h>
#include <smmintrin.h>
#include <iostream>
using namespace std;
void max_sse(unsigned int *src, long h, long w, unsigned int *val)
{
_STATIC_ASSERT(sizeof(unsigned int) == sizeof(BYTE)*4);
if( w % 4 != 0)
return; // ERROR Can't do it, need 4-multiple rows or do some alignment!
unsigned int *aligned_src = (unsigned int*)_aligned_malloc(h*w*sizeof(unsigned int), 16); // _mm_load_si128 needs 16-bytes aligned memory
memcpy(aligned_src, src, sizeof(unsigned int)*h*w);
__declspec(align(16)) __m128i max = {0,0,0,0};
// Iterates the matrix
for(int i=0; i<h*w; i+=4)
{
__m128i *pg = (__m128i*)(aligned_src+i);
__m128i PG = _mm_load_si128(pg);
__m128i newmax = _mm_max_epu32(max, PG);
_mm_store_si128(&max, newmax);
}
unsigned int abs_max = 0;
unsigned int *max_val = (unsigned int*)&max;
for (int i=0;i<4;i++)
{
if (abs_max < *(max_val+i))
{
abs_max = *(max_val+i);
}
}
_aligned_free(aligned_src);
cout << "The max is: " << abs_max << endl;
}
int main()
{
unsigned int src[] = {0,1,2,4, 5,6,7,8, 224,225,226,129};
unsigned int val;
max_sse(src, 3,4, &val);
return 0;
}
I'm assuming the memcpy a necessary evil in your code since there isn't any other information on memory alignment. If you have something to deal with that, do it yourself and it will be a lot better.
I am new to the low level c++, and I find it a bit hard to understand how to manipulate bits. I am trying to do the following to use in a compression algorithm I am trying to make:
unsigned int num = ...;//we want to store this number
unsigned int num_size = 3;//this is the maximum size of the number in bits, and
//can be anything from 1 bit to 32
unsigned int pos = 7;//the starting pos on the 1st bit.
//this can be anything from 1 to 8
char a;
char b;
if the num_size is 3 and pos is 7 for example, we must store num, on the 7th and 8th bit of a and on the 1st bit of b.
How about just?
a = num << (pos-1);
b = ((num << (pos-1)) & 0xFF00) >> 8;
To read num back just
num = ((unsigned int)a + ((unsigned int b) << 8)) >> (pos - 1);
Note, this doesn't do any sanity checks, such as whether all the relevant bits fit in a and b, you'll have to do that yourself.
For this specific test case, the highest number that fits into 2 unsigned char is actually 65535.
#include <iostream>
unsigned char high(int input)
{
return (input >> 8) & 0xFF;
}
unsigned char low(int input)
{
return input & 0xFF;
}
int value(unsigned char low, unsigned char high)
{
return low | (high << 8);
}
int main()
{
int num = 65535;
unsigned char l = low(num);
unsigned char h = high(num);
int val = value(l, h);
std::cout<<"l: "<<l<<" h: "<<h<<" val: "<<val;
}
Hello I have a struct here that is 7 bytes and I'd like to write it to a 64 bit integer. Next, I'd like to extract out this struct later from the 64 bit integer.
Any ideas on this?
#include "stdafx.h"
struct myStruct
{
unsigned char a;
unsigned char b;
unsigned char b;
unsigned int someNumber;
};
int _tmain(int argc, _TCHAR* argv[])
{
myStruct * m = new myStruct();
m->a = 11;
m->b = 8;
m->c = 12;
m->someNumber = 30;
printf("\n%s\t\t%i\t%i\t%i\t%i\n\n", "struct", m->a, m->b, m->c, m->someNumber);
unsigned long num = 0;
// todo: use bitwise operations from m into num (total of 7 bytes)
printf("%s\t\t%i\n\n", "ulong", num);
m = new myStruct();
// todo: use bitwise operations from num into m;
printf("%s\t\t%i\t%i\t%i\t%i\n\n", "struct", m->a, m->b, m->c, m->someNumber);
return 0;
}
You should to do something like this:
class structured_uint64
{
uint64_t data;
public:
structured_uint64(uint64_t x = 0):data(x) {}
operator uint64_t&() { return data; }
unsigned uint8_t low_byte(size_t n) const { return data >> (n * 8); }
void low_byte(size_t n, uint8_t val) {
uint64_t mask = static_cast<uint64_t>(0xff) << (8 * n);
data = (data & ~mask) | (static_cast<uint64_t>(val) << (8 * n));
}
unsigned uint32_t hi_word() const { return (data >> 24); }
// et cetera
};
(there is, of course, lots of room for variation on the details of the interface and where among the 64 bits the constituents are placed)
Using different types to alias the same portion of memory is a generally bad idea. The thing is, it's very valuable for the optimizer to be able to use reasoning like:
"Okay, I've read a uint64_t at the start of this block, and nowhere in the middle does the program write to any uint64_ts, therefore the value must be unchanged!"
which means it will get the wrong answer if you tried to change the value of the uint64_t object through a uint32_t reference. And as this is very dependent what optimizations are possible and done, it is actually pretty easy to never run across the problem in test cases, but see it in the real program you're trying to write -- and you'll spend forever trying to find the bug because you convinced yourself it's not this problem.
So, you really should do the insertion/extraction of the fields with bit twiddling (or intrinsics, if profiling shows that this is a performance issue and there are useful ones available) rather than trying to set up a clever struct.
If you really know what you're doing, you can make the aliasing work, I believe. But it should only be done if you really know what you're doing, and that includes knowing relevant rules from the standard inside and out (which I don't, and so I can't advise you on how to make it work). And even then you probably shouldn't do it.
Also, if you intend your integral types to be a specific size, you should really use the correct types. For example, never use unsigned int for an integer that is supposed to be exactly 32 bits. Instead use uint32_t. Not only is it self-documenting, but you won't run into a nasty surprise when you try to build your program in an environment where unsigned int is not 32 bits.
Use a union. Each element of a union occupies the same address space. The struct is one element, the unsigned long long is another.
#include <stdio.h>
union data
{
struct
{
unsigned char a;
unsigned char b;
unsigned char c;
unsigned int d;
} e;
unsigned long long f;
};
int main()
{
data dat;
dat.f = 0xFFFFFFFFFFFFFFFF;
dat.e.a = 1;
dat.e.b = 2;
dat.e.c = 3;
dat.e.d = 4;
printf("f=%016llX\n",dat.f);
printf("%02X %02X %02X %08X\n",dat.e.a,dat.e.b,dat.e.c,dat.e.d);
return 0;
}
Output, but note one byte of the original unsigned long long remains. Compilers like to align data such as 4-byte integers on addresses divisible by 4, so three bytes, then a pad byte so the integer is at offset 4 and the struct has a total size of 8.
f=00000004FF030201
01 02 03 00000004
This can be controlled in compiler-dependent fashion. Below is for Microsoft C++:
#include <stdio.h>
#pragma pack(push,1)
union data
{
struct
{
unsigned char a;
unsigned char b;
unsigned char c;
unsigned int d;
} e;
unsigned long long f;
};
#pragma pack(pop)
int main()
{
data dat;
dat.f = 0xFFFFFFFFFFFFFFFF;
dat.e.a = 1;
dat.e.b = 2;
dat.e.c = 3;
dat.e.d = 4;
printf("f=%016llX\n",dat.f);
printf("%02X %02X %02X %08X\n",dat.e.a,dat.e.b,dat.e.c,dat.e.d);
return 0;
}
Note the struct occupies seven bytes now and the highest byte of the unsigned long long is now unchanged:
f=FF00000004030201
01 02 03 00000004
Got it.
static unsigned long long compress(char a, char b, char c, unsigned int someNumber)
{
unsigned long long x = 0;
x = x | a;
x = x << 8;
x = x | b;
x = x << 8;
x = x | c;
x = x << 32;
x = x | someNumber;
return x;
}
myStruct * decompress(unsigned long long x)
{
printBinary(x);
myStruct * m = new myStruct();
m->someNumber = x | 4294967296;
x = x >> 32;
m->c = x | 256;
x = x >> 8;
m->b = x | 256;
x = x >> 8;
m->a = x | 256;
return m;
}