reverseBytes using Bitwise operators - bit-manipulation

reverseBytes - reverse bytes
Example: reverseBytes(0x0123456789abcdef) = 0xefcdab8967452301
Legal ops: ! ~ & ^ | + << >>
I'm required to solve the above problem. There is no limit on no. of operators. I already have a different solution to this. But I would like to know what's the problem with the following solution I came up with? Thank you.
long reverseBytes(long x) {
int a = x; //Get first bytes, first 8 bits
int b = (a >> 8); //Get 2nd byte
int c = (b >> 8); //3rd byte
int d = (c >> 8); //4th
int e = (d >> 8); //5th
int f = (e >> 8); //6th
int g = (f >> 8); //7th
int h = (g >> 8); //8th
a = a & 0xFF; //Remove the rest except LSB byte
b = b & 0xFF; // same
c = c & 0xFF;
d = d & 0xFF;
e = e & 0xFF;
f = f & 0xFF;
g = g & 0xFF;
h = h & 0xFF;
return ( (a << 56) + (b << 48) + (c << 40) + (d << 32) + (e << 24) + (f << 16) + (g << 8) + (h) );
}

For practical application, you can use library function bswap64()
For student practice, you needed write a simple loop like:
unsigned long reverseBytes(unsigned long x) {
unsigned long rc;
for(int i = 0; i < 8; i++, x >>= 8)
rc = (rc << 8) | (unsigned char)x;
return rc;
}

Related

How to unit test bit manipulation logic

I have a method which converts RGBA to BGRA. Below is the method
unsigned int ConvertRGBAToBGRA(unsigned int v) {
unsigned char r = (v)& 0xFF;
unsigned char g = (v >> 8) & 0xFF;
unsigned char b = (v >> 16) & 0xFF;
unsigned char a = (v >> 24) & 0xFF;
return (a << 24) | (r << 16) | (g << 8) | b;
};
How can I unit test this nicely? Is there a way I can read back the bits and unit test this method somehow?
I am using googletests
Inspired by #Yves Daoust's comment, why can't you just write a series of checks like below? You can use the nice formatting of C++14:
unsigned int ConvertRGBAToBGRA(unsigned int v) {
unsigned char r = (v)&0xFF;
unsigned char g = (v >> 8) & 0xFF;
unsigned char b = (v >> 16) & 0xFF;
unsigned char a = (v >> 24) & 0xFF;
return (a << 24) | (r << 16) | (g << 8) | b;
};
TEST(ConvertRGBAToBGRATest, Test1) {
EXPECT_EQ(ConvertRGBAToBGRA(0x12'34'56'78), 0x12'78'56'34);
EXPECT_EQ(ConvertRGBAToBGRA(0x12'78'56'34), 0x12'34'56'78);
EXPECT_EQ(ConvertRGBAToBGRA(0x11'11'11'11), 0x11'11'11'11);
EXPECT_EQ(ConvertRGBAToBGRA(0x00'00'00'00), 0x00'00'00'00);
EXPECT_EQ(ConvertRGBAToBGRA(0xAa'Bb'Cc'Dd), 0xAa'Dd'Cc'Bb);
EXPECT_EQ(ConvertRGBAToBGRA(ConvertRGBAToBGRA(0x12'34'56'78)), 0x12'34'56'78);
EXPECT_EQ(ConvertRGBAToBGRA(ConvertRGBAToBGRA(0x12'78'56'34)), 0x12'78'56'34);
EXPECT_EQ(ConvertRGBAToBGRA(ConvertRGBAToBGRA(0x11'11'11'11)), 0x11'11'11'11);
EXPECT_EQ(ConvertRGBAToBGRA(ConvertRGBAToBGRA(0x00'00'00'00)), 0x00'00'00'00);
EXPECT_EQ(ConvertRGBAToBGRA(ConvertRGBAToBGRA(0xAa'Bb'Cc'Dd)), 0xAa'Bb'Cc'Dd);
}
Live example: https://godbolt.org/z/eEajYYYsf
You could also define a custom matcher and use EXPECT_THAT macro:
// A custom matcher for comparing BGRA and RGBA.
MATCHER_P(IsBgraOf, n, "") {
return ((n & 0xFF000000) == (arg & 0xFF000000)) &&
((n & 0x00FF0000) == ((arg << 16) & 0x00FF0000)) &&
((n & 0x0000FF00) == (arg & 0x0000FF00));
}
TEST(ConvertRGBAToBGRATest, WithExpectThat) {
EXPECT_THAT(ConvertRGBAToBGRA(0x12'34'56'78), IsBgraOf(0x12'34'56'78));
EXPECT_THAT(ConvertRGBAToBGRA(0x12'78'56'34), IsBgraOf(0x12'78'56'34));
EXPECT_THAT(ConvertRGBAToBGRA(0xAa'Bb'Cc'Dd), IsBgraOf(0xAa'Bb'Cc'Dd));
EXPECT_THAT(ConvertRGBAToBGRA(0x00'00'00'00), IsBgraOf(0x00'00'00'00));
EXPECT_THAT(ConvertRGBAToBGRA(0x11'11'11'11), IsBgraOf(0x11'11'11'11));
}
Live example: https://godbolt.org/z/P4EcW19s9
You can split the value in four bytes by mapping to an array via a pointer. Then swap the bytes.
uint8_t* pV= reinterpret_cast<uint8_t*>(&V);
uint8_t Swap= pV[1]; pV[1]= pV[3]; pV[3]= Swap;

In shader intBitsToFloat and floatBitsToInt

So this issue I'm running into is lack of knowledge, I need to be in the glsl 120 (OpenGL 2.1) which locks the software down heavily, Essentially I need many of the features from 440 shaders in 120, using shader5 on supported hardware which covers it just fine, however on shader4 for mac and other things IE mesa I need to do a lot of functions myself. But the ones that keep nagging me are my bit related functions, I'm very bad at tracking bits so any help would be lovely. The largest function I'm having issues on is intBitsToFloat and floatBitsToInt, I've tried a few things but to no success.
int floatToIntBitst(float a){
//Nan
if(a != a) return 0x7fc00000;
//-0
if (a == 0.0) return (1.0 / a == -1.0/0.0 ) ? 0x80000000 : 0;
bool neg = false;
if (a < 0.0) {
neg = true;
a = -a;
}
if (isinf(a)) {
return neg ? 0xff800000 : 0x7f800000;
}
int exp = ((a >> 52) & 0x7ff) - 1023;
int mantissa = (a & 0xffffffff) >> 29;
if (exp <= -127) {
mantissa = (0x800000 | mantissa) >> (-127 - exp + 1);
exp = -127;
}
int bits = negative?2147483648: 0;
bits |= (exp + 127) << 23;
bits |= mantissa;
return bits;
}
some of my other functions, any feedback would be appreciated
bitfieldReverse
int bitfieldReverse(int x) {
x = ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1);
x = ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2);
x = ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4);
x = ((x & 0x00FF00FF) << 8) | ((x & 0xFF00FF00) >> 8);
x = ((x & 0x0000FFFF) << 16) | ((x & 0xFFFF0000) >> 16);
return x;
}
I have all variations, if something needs to change for uints let me know.
LSB and MSB
int findLSB(int x ) { return x&-x; }
int findMSB(int x) {
x |= (x >> 1);
x |= (x >> 2);
x |= (x >> 4);
x |= (x >> 8);
x |= (x >> 16);
return (x & ~(x >> 1));
}
Same goes for these
bitCount
int bitCount(int a) {
a = (a & 0x55555555) + ((a >> 1) & 0x55555555);
a = (a & 0x33333333) + ((a >> 2) & 0x33333333);
a = (a + (a >> 4)) & 0x0f0f0f0f;
a = (a + (a >> 8));
a = (a + (a >> 16));
return a & 0xff;
}
Hardware that doesn't offer GL 3.0+ is almost always hardware that also doesn't have integers as a type distinct from float. As such, all of your ints are really floats in disguise. Which means that they must live with the ranges and restrictions of a float.
Because of this, you cannot effectively do the kind of bit-manipulation you're trying on such hardware.

Optimizing Bitshift into Array

I have a piece of code that runs at ~1.2 million runs per second after performing some tasks, the bulkiest is setting a uint8_t array with bitshifted data from two uint32_t pieces of data. The excerpt code is as follows:
static inline uint32_t RotateRight(uint32_t val, int n)
{
return (val >> n) + (val << (32 - n));
}
static inline uint32_t CSUInt32BE(const uint8_t *b)
{
return ((uint32_t)b[0] << 24) | ((uint32_t)b[1] << 16) | ((uint32_t)b[2] << 8) | (uint32_t)b[3];
}
static uint32_t ReverseBits(uint32_t val) // Usually just static, tried inline/static inline
{
// uint32_t res = 0;
// for (int i = 0; i<32; i++)
// {
// res <<= 1;
// res |= val & 1;
// val >>= 1;
// }
// Original code above, benched ~220k l/s
//val = ((val & 0x55555555) << 1) | ((val >> 1) & 0x55555555);
//val = ((val & 0x33333333) << 2) | ((val >> 2) & 0x33333333);
//val = ((val & 0x0F0F0F0F) << 4) | ((val >> 4) & 0x0F0F0F0F);
//val = ((val & 0x00FF00FF) << 8) | ((val >> 8) & 0x00FF00FF);
//val = (val << 16) | (val >> 16);
// Option 0, benched ~770k on MBP
uint32_t c = 0;
c = (BitReverseTable256[val & 0xff] << 24) |
(BitReverseTable256[(val >> 8) & 0xff] << 16) |
(BitReverseTable256[(val >> 16) & 0xff] << 8) |
(BitReverseTable256[val >> 24]); // was (val >> 24) & 0xff
// Option 1, benched ~970k l/s on MBP, Current, minor tweak to 24
//unsigned char * p = (unsigned char *)&val;
//unsigned char * q = (unsigned char *)&c;
//q[3] = BitReverseTable256[p[0]];
//q[2] = BitReverseTable256[p[1]];
//q[1] = BitReverseTable256[p[2]];
//q[0] = BitReverseTable256[p[3]];
// Option 2 at ~970k l/s on MBP from http://stackoverflow.com/questions/746171/best-algorithm-for-bit-reversal-from-msb-lsb-to-lsb-msb-in-c
return c; // Current
// return val; // option 0
// return res; // original
//uint32_t m;
//val = (val >> 16) | (val << 16); // swap halfwords
//m = 0x00ff00ff; val = ((val >> 8) & m) | ((val << 8) & ~m); // swap bytes
//m = m^(m << 4); val = ((val >> 4) & m) | ((val << 4) & ~m); // swap nibbles
//m = m^(m << 2); val = ((val >> 2) & m) | ((val << 2) & ~m);
//m = m^(m << 1); val = ((val >> 1) & m) | ((val << 1) & ~m);
//return val;
// Benches at 850k l/s on MBP
//uint32_t t;
//val = (val << 15) | (val >> 17);
//t = (val ^ (val >> 10)) & 0x003f801f;
//val = (t + (t << 10)) ^ val;
//t = (val ^ (val >> 4)) & 0x0e038421;
//val = (t + (t << 4)) ^ val;
//t = (val ^ (val >> 2)) & 0x22488842;
//val = (t + (t << 2)) ^ val;
//return val;
// Benches at 820k l/s on MBP
}
static void StuffItDESCrypt(uint8_t data[8], StuffItDESKeySchedule *ks, BOOL enc)
{
uint32_t left = ReverseBits(CSUInt32BE(&data[0]));
uint32_t right = ReverseBits(CSUInt32BE(&data[4]));
right = RotateRight(right, 29);
left = RotateRight(left, 29);
//Encryption function runs here
left = RotateRight(left, 3);
right = RotateRight(right, 3);
uint32_t left1 = ReverseBits(left);
uint32_t right1 = ReverseBits(right);
data[0] = right1 >> 24;
data[1] = (right1 >> 16) & 0xff;
data[2] = (right1 >> 8) & 0xff;
data[3] = right1 & 0xff;
data[4] = left1 >> 24;
data[5] = (left1 >> 16) & 0xff;
data[6] = (left1 >> 8) & 0xff;
data[7] = left1 & 0xff;
Is this the most optimal way to accomplish this? I have a uint64_t version as well:
uint64_t both = ((uint64_t)ReverseBits(left) << 32) | (uint64_t)ReverseBits(right);
data[0] = (both >> 24 & 0xff);
data[1] = (both >> 16) & 0xff;
data[2] = (both >> 8) & 0xff;
data[3] = both & 0xff;
data[4] = (both >> 56);
data[5] = (both >> 48) & 0xff;
data[6] = (both >> 40) & 0xff;
data[7] = (both >> 32) & 0xff;
I tested what would happen if I completely skipped this assignment (the ReverseBits function is still done), and the code runs at ~6.5 million runs per second. In addition, this speed hit happens if I only do just one as well, leveling out at 1.2 million even without touching the other 7 assignments.
I'd hate to think that this operation takes a massive 80% speed hit due to this work and can't be made any faster.
This is on Windows Visual Studio 2015 (though I try to keep the source as portable to macOS and Linux as possible).
Edit: The full base code is at Github. I am not the original author of the code, however I have forked it and maintain a password recovery solution using a modified for speed version. You can see my speed up successes in ReverseBits with various solutions and benched speeds.
These files are 20+ years old, and has successfully recovered files albeit at a low speed for years. See blog post.
You're certainly doing more work than you need to do. Note how function ReverseBits() goes to some effort to put the bytes of the reversed word in the correct order, and how the next thing that happens -- the part to which you are attributing the slowdown -- is to reorder those same bytes.
You could write and use a modified version of ReverseBits() that puts the bytes of the reversed representation directly into the correct places in the array, instead of packing them into integers just to unpack them again. That ought to be at least a bit faster, as you would be strictly removing operations.
My immediate thought was to "view" the int32_t as if they were an array of int8_t like
uint8_t data2[8];
*((uint32_t*)&data2[0]) = right1;
*((uint32_t*)&data2[4]) = left1;
However, you store the most significant bits of right1 in data[0], whereas this approach lets the least significant bits go to data[0]. Anyway, as I do not know what ReverseBits does and whether you could also adapt your code according to a different order, maybe it helps...

Best c++ way to choose randomly position of set bit in bitset

I have std::bitset<32> word and I want to choose randomly and index (0-31) of some bit which is 1. How can I do that without loops and counters. Is there any std::algorithm suitable for that?
If it's easier I can convert the bitset to string or int and make it on the string or int.
Here's a first stab at it:
std::bitset<32> bitset{...};
std::mt19937 prng(std::time(nullptr));
std::uniform_int_distribution<std::size_t> dist{1, bitset.count()};
std::size_t p = 0;
for(std::size_t c = dist(prng); c; ++p)
c -= bitset[p];
// (p - 1) is now the index of the chosen bit.
It works by counting the set bits, doing the random pick c in that interval, then looking for the cth set bit.
If you have 32-bit (or even 64-bit) bitset, more efficient solution would be to convert to integer and then use bitwise operations on that integer to get random set bit.
Here is how you can convert your bitset to unsigned long:
std::bitset<32> word(0x1028);
unsigned long ulWord = word.to_ulong(); // ulWord == 0x1028
Then you can use “Select the bit position“ function from the Bit Twiddling Hacks page to select random set bit efficiently:
unsigned int bitcnt = word.count();
unsigned int randomSetBitIndex = 63-selectBit(ulWord, random() % bitcnt + 1);
unsigned long randomSetBit = 1 << randomSetBitIndex;
Here is the full code:
// Select random set bit from a bitset
#include <iostream>
#include <bitset>
#include <random>
using namespace std;
unsigned int selectBit(unsigned long long v, unsigned int r) {
// Source: https://graphics.stanford.edu/~seander/bithacks.html
// v - Input: value to find position with rank r.
// r - Input: bit's desired rank [1-64].
unsigned int s; // Output: Resulting position of bit with rank r [1-64]
uint64_t a, b, c, d; // Intermediate temporaries for bit count.
unsigned int t; // Bit count temporary.
// Do a normal parallel bit count for a 64-bit integer,
// but store all intermediate steps.
a = v - ((v >> 1) & ~0UL/3);
b = (a & ~0UL/5) + ((a >> 2) & ~0UL/5);
c = (b + (b >> 4)) & ~0UL/0x11;
d = (c + (c >> 8)) & ~0UL/0x101;
t = (d >> 32) + (d >> 48);
// Now do branchless select!
s = 64;
s -= ((t - r) & 256) >> 3; r -= (t & ((t - r) >> 8));
t = (d >> (s - 16)) & 0xff;
s -= ((t - r) & 256) >> 4; r -= (t & ((t - r) >> 8));
t = (c >> (s - 8)) & 0xf;
s -= ((t - r) & 256) >> 5; r -= (t & ((t - r) >> 8));
t = (b >> (s - 4)) & 0x7;
s -= ((t - r) & 256) >> 6; r -= (t & ((t - r) >> 8));
t = (a >> (s - 2)) & 0x3;
s -= ((t - r) & 256) >> 7; r -= (t & ((t - r) >> 8));
t = (v >> (s - 1)) & 0x1;
s -= ((t - r) & 256) >> 8;
return 64-s;
}
int main() {
// Input
std::bitset<32> word(0x1028);
// Initialize random number generator
std::random_device randDevice;
std::mt19937 random(randDevice());
// Select random bit
unsigned long ulWord = word.to_ulong();
unsigned int bitcnt = word.count();
unsigned int randomSetBitIndex = 63-selectBit(ulWord, random() % bitcnt + 1);
unsigned long randomSetBit = 1 << randomSetBitIndex;
// Output
cout << "0x" << std::hex << randomSetBit << endl; // either 0x8, 0x20 or 0x1000
return 0;
}
Run it on Ideone.

A memory-efficient SHA1 implementation

I'm working with a very restrictive embedded processor, which only has 128 bytes of ram. I'd like to implement SHA1 on it. RFC3174 describes, in 'method 2', a way of implementing SHA1 that doesn't require allocating an array of 80 32-bit words (which, at 320 bytes, is obviously not practical), and seems like it ought to be usable on my processor. I'm unable to find any implementations of 'method 2', though, and the sample code in the RFC only implements the default method.
Is anyone aware of a memory-efficient implementation of SHA1 in C or C++?
You should be able to quickly adapt the method 1 source to method 2. The function to change is Sha1ProcessMessageBlock() in method 1. Initialize w[0:15] from message, then do a loop of 0 to 79, where you only do w[] manipulation after iteration 16, and temp calculation depends on ts value (0-19 uses one, 20-39 uses another, etc). The important thing to remember is using index%16 or index & 0x0f whenever you are addressing the w[] array.
A quick modification would be something like this (double check all accesses to w to make sure I haven't missed the t & 0x0f):
void SHA1ProcessMessageBlock(SHA1Context *context)
{
const uint32_t K[] = { /* Constants defined in SHA-1 */
0x5A827999,
0x6ED9EBA1,
0x8F1BBCDC,
0xCA62C1D6
};
int t; /* Loop counter */
uint32_t temp; /* Temporary word value */
uint32_t W[16]; /* Word sequence */
uint32_t A, B, C, D, E; /* Word buffers */
/*
* Initialize the first 16 words in the array W. You can move this to your
* context.
*/
for(t = 0; t < 16; t++)
{
W[t] = context->Message_Block[t * 4] << 24;
W[t] |= context->Message_Block[t * 4 + 1] << 16;
W[t] |= context->Message_Block[t * 4 + 2] << 8;
W[t] |= context->Message_Block[t * 4 + 3];
}
A = context->Intermediate_Hash[0];
B = context->Intermediate_Hash[1];
C = context->Intermediate_Hash[2];
D = context->Intermediate_Hash[3];
E = context->Intermediate_Hash[4];
for(t = 0; t < 80; t++) {
if (t >= 16) {
W[t&0xf] = SHA1CircularShift(1,W[(t-3)&0xf] ^ W[(t-8)&0xf] ^ W[(t-14)&0xf] ^ W[t&0xf]);
}
if (t<20) {
temp = SHA1CircularShift(5,A) +
((B & C) | ((~B) & D)) + E + W[t&0xf] + K[0];
}
else if (t<40) {
temp = SHA1CircularShift(5,A) + (B ^ C ^ D) + E + W[t&0xf] + K[1];
}
else if (t < 60) {
temp = SHA1CircularShift(5,A) +
((B & C) | (B & D) | (C & D)) + E + W[t&0xf] + K[2];
}
else {
temp = SHA1CircularShift(5,A) + (B ^ C ^ D) + E + W[t&0xf] + K[3];
}
E = D;
D = C;
C = SHA1CircularShift(30,B);
B = A;
A = temp;
}
context->Intermediate_Hash[0] += A;
context->Intermediate_Hash[1] += B;
context->Intermediate_Hash[2] += C;
context->Intermediate_Hash[3] += D;
context->Intermediate_Hash[4] += E;
context->Message_Block_Index = 0;
}
There are still savings to be made: get rid of W[] array on stack and put it in context pre-initialized with the data you get.
Also, you need a lot of pre-processing before calling this function. For example, if all your messages are less than 55 bytes, you can put it in W array, add padding, and process immediately. If not, you'll have to call process twice: first with your partially padded input, and again with the rest of the pad, etc. That sort of thing would be very application specific, and I doubt you'll be able to find the code to do it for you.
By the way, the code above is a straight adaptation from the type 1 source from your link. You can probably squeeze a bit more out of it if you try to optimize it further.
I couldn't think of a way to get any savings on the intermediate hash, so you will need a total of 108 bytes for this (109 if counter is also in RAM), and 24 of which is local to this function, and can be reused in other places - so long as they are also temporary. So it is very hard to do what you want to do.
EDIT: If all your messages are less than 55 bytes, you can save another 20 bytes in your context by getting rid of the intermediate_hash[] storage. Simply initialize A-E from the constants, and add the constants at the end. Finally, instead of storing them in a separate variable, overwrite your input when this function ends.
I have implemented SHA-1 for several memory-constrained environments. You can get by with
DWORD W[16] ; // instead of H[80]
DWORD H[5] ; // Intermediate hash value
DWORD BitCount[2] ; // Probably a single DWORD is enough here
plus a few bytes of housekeeping. W is updated on the fly, as a circular buffer, instead of being generated at the start of each round.
working example:
#include<iostream>
#include<stdio.h>
#include<stdlib.h>
#include<string>
using namespace std;
unsigned CircularShift(int bits, unsigned word)
{
return ((word << bits) & 0xFFFFFFFF) | ((word & 0xFFFFFFFF) >> (32-bits));
}
int main(void)
{
string mess;
cin >> mess;
unsigned int lm = mess.length();
unsigned int lmb = lm*8;
unsigned char *messc;
messc=(unsigned char*)malloc((sizeof(unsigned char))*64);
for (unsigned short int i =0;i<64;i++)
{
messc[i]=char(0x00);
}
for(int i=0;i<mess.length();i++)
{
messc[i]=mess[i];
}
messc[lm]=(unsigned char)128;
messc[56] = (lmb >> 24) & 0xFF;
messc[57] = (lmb >> 16) & 0xFF;
messc[58] = (lmb >> 8) & 0xFF;
// messc[59] = (lmb) & 0xFF;
messc[60] = (lmb >> 24) & 0xFF;
messc[61] = (lmb >> 16) & 0xFF;
messc[62] = (lmb >> 8) & 0xFF;
messc[63] = (lmb) & 0xFF;
for(int i =0 ;i<64;i++)
{
cout<< hex << (int)messc[i] << " ";
}
unsigned *H;
H=(unsigned*)malloc(5*sizeof(unsigned));
H[0] = 0x67452301;
H[1] = 0xEFCDAB89;
H[2] = 0x98BADCFE;
H[3] = 0x10325476;
H[4] = 0xC3D2E1F0;
const unsigned K[]={0x5A827999,0x6ED9EBA1,0x8F1BBCDC,0xCA62C1D6};
int t;
unsigned temp;
unsigned *W;
unsigned A, B, C, D, E;
W=(unsigned*)malloc(80*sizeof(unsigned));
unsigned char *messh;
messh=(unsigned char*)malloc(64*sizeof(unsigned char));
int k;
for(t = 0; t < 16; t++)
{
W[t] = ((unsigned) messc[t * 4])<< 24; ;
W[t] |= ((unsigned) messc[t * 4 + 1])<< 16;
W[t] |= ((unsigned) messc[t * 4 + 2]) << 8;
W[t] |= ((unsigned) messc[t * 4 + 3]);
}
for(t = 16; t < 80; t++)
{
W[t] = CircularShift(1,W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16]);
}
A = H[0];
B = H[1];
C = H[2];
D = H[3];
E = H[4];
for(t = 0; t < 20; t++)
{
temp = CircularShift(5,A) + ((B & C) | ((~B) & D)) + E + W[t] + K[0];
temp &= 0xFFFFFFFF;
E = D;
D = C;
C = CircularShift(30,B);
B = A;
A = temp;
}
for(t = 20; t < 40; t++)
{
temp = CircularShift(5,A) + (B ^ C ^ D) + E + W[t] + K[1];
temp &= 0xFFFFFFFF;
E = D;
D = C;
C = CircularShift(30,B);
B = A;
A = temp;
}
for(t = 40; t < 60; t++)
{
temp = CircularShift(5,A) +
((B & C) | (B & D) | (C & D)) + E + W[t] + K[2];
temp &= 0xFFFFFFFF;
E = D;
D = C;
C = CircularShift(30,B);
B = A;
A = temp;
}
for(t = 60; t < 80; t++)
{
temp = CircularShift(5,A) + (B ^ C ^ D) + E + W[t] + K[3];
temp &= 0xFFFFFFFF;
E = D;
D = C;
C = CircularShift(30,B);
B = A;
A = temp;
}
H[0] = (H[0] + A) & 0xFFFFFFFF;
H[1] = (H[1] + B) & 0xFFFFFFFF;
H[2] = (H[2] + C) & 0xFFFFFFFF;
H[3] = (H[3] + D) & 0xFFFFFFFF;
H[4] = (H[4] + E) & 0xFFFFFFFF;
cout <<"\nTHIS IS SHHHHHAAAAAAAAAAA\n";
for(int i=0;i<5;i++)
{
cout << hex << H[i] << " ";
}
//Message_Block_Index = 0;
}
All things considered, looking at your requirements, I think you are going to have to change your specs. Either a bigger chip, or a simpler algorithm. Even implementing SHA-1 (without HMAC) would be a challenge, but it should be doable.