Convert C++ double to DEC double - c++

I want to be able to take a user given double and write out in the DEC 64 dpfp format (http://www.wsmr.army.mil/RCCsite/Documents/106%20Previous%20Versions/106-07/appendixO.pdf). Having trouble getting this to line up correctly, anyone have experience or have written conversion functions for DEC types?

This seems pretty straight forward, let me take a shot at it. Note that I don't have any way of testing this for correctness.
std::vector<unsigned char> ToDEC64Float(double d)
{
uint64_t dec_bits = 0ULL;
if (d != 0.0)
{
assert(sizeof(double) == sizeof(uint64_t));
uint64_t bits = *reinterpret_cast<uint64_t*>(&d);
uint64_t fraction = bits & 0x000fffffffffffffULL;
int exp = (int)((bits >> 52) & 0x7ff) - 1023;
bool sign = (bool)(bits & 0x8000000000000000ULL);
// convert the individual values for the new format
fraction <<= 3;
exp += 1 + 128;
if (exp > 255)
throw std::overflow_error("overflow");
if (exp < 0 || (exp == 0 && fraction != 0))
throw std::underflow_error("underflow");
dec_bits = (uint64_t)sign << 63 | (uint64_t)exp << 55 | fraction;
}
std::vector<unsigned char> result;
for (int i = 0; i < 64; i+=8)
result.push_back((unsigned char)((dec_bits >> i) & 0xff));
return result;
}

double static const DECBytesToDouble(uint64_t value)
{
//DEC Byte Conversion Constants
static const float MANTISSA_CONSTANT = 0.5;
static const int32_t EXPONENT_BIAS = 128;
uint8_t * byte_array = (uint8_t*)&value;
uint8_t first = byte_array[0];
uint8_t second = byte_array[1];
uint8_t third = byte_array[2];
uint8_t fourth = byte_array[3];
uint8_t fifth = byte_array[4];
uint8_t sixth = byte_array[5];
uint8_t seventh = byte_array[6];
uint8_t eighth = byte_array[7];
// |second |first|fourth|third|sixth|fifth|eighth|seventh|
// |s|exponent|mantissa |
bool sign = second & 0x80;
std::cout<<"(DECBytesToDouble) Sign: "<<sign<<std::endl;
int32_t exponent = ((second & 0x7F) << 1) + ((first >> 7) & 0x1);
std::cout<<"(DECBytesToDouble) Exponent: "<<exponent<<std::endl;
int64_t mantissa = ((int64_t)(first & 0x7F) << 48) + ((int64_t)fourth << 40)
+ ((int64_t)third << 32) + ((int64_t)sixth << 24) + ((int64_t)fifth << 16)
+ ((int64_t)eighth << 8) + (int64_t) seventh;
std::cout<<"(DECBytesToDouble) Fraction: "<<mantissa<<std::endl;
double fraction = MANTISSA_CONSTANT;
for (int32_t i=0; i<55; i++)
{
fraction += ((mantissa >> i) & 0x1) * pow(2,i-56);
}//for
return pow(-1,sign)*fraction*pow(2,exponent-EXPONENT_BIAS);
}//DECBytesToDouble
uint64_t static const DoubleToDECBytes(double value)
{
static const int32_t EXPONENT_BIAS = 128;
uint64_t dec_bits = 0ULL;
if (value != 0.0)
{
uint64_t bits = *reinterpret_cast<uint64_t*>(&value);
uint64_t fraction = bits & 0x000fffffffffffffULL;
int exp = (int)((bits >> 52) & 0x7ff) - 1023;
bool sign = false;
if(value < 0)
{
sign = true;
}//if
std::cout<<"(DoubleToDECBytes) Sign: "<<sign<<std::endl;
// convert the individual values for the new format
fraction <<= 3;
exp += EXPONENT_BIAS + 1;
std::cout<<"(DoubleToDECBytes) Exponent: "<<exp<<std::endl;
std::cout<<"(DoubleToDECBytes) Fraction: "<<fraction<<std::endl;
if (exp > 255)
throw std::overflow_error("overflow");
if (exp < 0 || (exp == 0 && fraction != 0))
throw std::underflow_error("underflow");
dec_bits = (uint64_t)(sign << 63) | (uint64_t)(exp << 55) | fraction;
//|second |first|fourth|third|sixth|fifth|eighth|seventh|
uint8_t * byte_array = (uint8_t*)&dec_bits;
uint8_t first = byte_array[0];
uint8_t second = byte_array[1];
uint8_t third = byte_array[2];
uint8_t fourth = byte_array[3];
uint8_t fifth = byte_array[4];
uint8_t sixth = byte_array[5];
uint8_t seventh = byte_array[6];
uint8_t eighth = byte_array[7];
byte_array[7] = second;
byte_array[6] = first;
byte_array[5] = fourth;
byte_array[4] = third;
byte_array[3] = sixth;
byte_array[2] = fifth;
byte_array[1] = eighth;
byte_array[0] = seventh;
std::cout<<"(DoubleToDECBytes) Guess ="<<dec_bits<<std::endl;
}//if
/*std::vector<unsigned char> result;
for (int i = 0; i < 64; i+=8)
{
result.push_back((unsigned char)((dec_bits >> i) & 0xff));
}//for
uint64_t final_result = 0;
memcpy(&final_result, &result[0], sizeof(uint64_t));
std::cout<<"Final result: "<<final_result<<std::endl;*/
return dec_bits;
}//DoubleToDECBytes
Output:
input uint64_t value: 9707381994276473045
(DECBytesToDouble) Sign: 0
(DECBytesToDouble) Exponent: 145
(DECBytesToDouble) Fraction: 24184718387676855
output double value: 109527.7465
(DoubleToDECBytes) Sign: 0
(DoubleToDECBytes) Exponent: 145
(DoubleToDECBytes) Fraction: 24184718387676848
(DoubleToDECBytes) Guess =9705411669439479893
Converted double, uint64_t: 9705411669439479893
uint64_t difference: 1970324836993152
(DECBytesToDouble) Sign: 0
(DECBytesToDouble) Exponent: 0
(DECBytesToDouble) Fraction: 24184718387676848
output double value: 0.0000

I came to find that integrating libvaxdata C library into my C++ solution was the best way to go. In my use case situation all that was required was some byte flipping, however the routines work flawlessly.
I recommend the libvaxdata library when dealing with conversion to/from IEEE/DEC types.
http://pubs.usgs.gov/of/2005/1424/

Related

Is this constexpr sqrt function portable?

I wrote this implementation of sqrt that is finite in complexity and precise up to the last digit when double is ieee754 double. The question is that is this portable on devices of various endian (assuming 0LL is still 64 bit)? get_fraction returns the 52bits plus the 1 bit at the begining. Small doubles are treated separately and ensured that they also have 1 in the 53rd bit. The c++ part numeric_limits nan can easily be replaced with a constant.
Code:
static inline constexpr int16_t get_exponent(double x)
{
uint64_t bits = *(uint64_t*)&x;
int16_t val = ((bits & 0x7FF0000000000000ULL) >> 52) - 1023;
if(val != -1023)
return val;
uint64_t temp_fractal= (bits & 0x000FFFFFFFFFFFFFULL);
for (int i = 51; i >= 0;--i) {
if(!(temp_fractal & (0x01ULL<<i))) --val;
else break;
}
return val;
}
static inline constexpr uint64_t get_fraction(double x)
{
uint64_t bits = *(uint64_t*)&x;
if (bits & 0x7FF0000000000000ULL)
return (bits & 0x000FFFFFFFFFFFFFULL) | 0x0010000000000000ULL;
uint64_t temp_fraction = bits & 0x000FFFFFFFFFFFFFULL;
for (int i = 51; i >= 0; --i) {
temp_fraction<<=1;
if(0x0010000000000000ULL & temp_fraction) break;
}
return temp_fraction;
}
static inline constexpr bool is_reserved(double x)
{
return get_exponent(x) == 1024;
}
static inline constexpr double my_abs(double x)
{
uint64_t bits = *(uint64_t*)&x;
bits &= 0x7FFFFFFFFFFFFFFFULL;
return *(double*)&bits;
}
constexpr double make_double(bool sign, int16_t exponent, uint64_t fractal)
{
uint64_t data = (fractal & 0x000FFFFFFFFFFFFFULL);
assert((fractal & 0xFFF0000000000000ULL) == 0x0010000000000000ULL);
if (exponent < -1023) {
fractal >>= (-1022 - exponent);
data = fractal;
exponent = -1023;
}
else if (exponent > 1023) {
return (1-2*sign)*std::numeric_limits<double>::infinity();
}
{
data |= ((uint64_t)((uint16_t)(exponent + 1023))) << 52;
if (sign)
data |= 0x8000000000000000ULL;
return *(double*)&data;
}
}
constexpr double my_sqrt(double x)
{
if(!x || is_reserved(x))
return x;
if(x < 0)
return -std::numeric_limits<double>::quiet_NaN();
uint64_t fraction = get_fraction(x);
int16_t exponent = get_exponent(x);
//C standard says it rounds to zero
int16_t half_exponent = ((exponent-1024)/2)+512;
uint64_t test_fraction = 0x0010000000000000ULL;
double test = make_double(0, half_exponent, test_fraction);
if (test * test > x) half_exponent -= 1;
//just to be safe
test = make_double(0, half_exponent, test_fraction);
if (test * test > x) half_exponent -= 1;
//find each bit except last one, binary search for result
for (int i = 51; i > 0; --i) {
test = make_double(0, half_exponent, test_fraction | (0x01ULL<<i));
if(test*test<x) test_fraction |= (0x01ULL << i);
}
double del1 = my_abs(x - test*test);
double temp = make_double(0, half_exponent, test_fraction | 0x01ULL);
double del2 = my_abs(x - temp * temp);
//see if the whole fraction needs to round up by one
if (x > temp * temp) {
test_fraction += 2;
//rounding up by one made the fraction too large
if (test_fraction >= 0x0020000000000000ULL) {
test_fraction >>= 1;
half_exponent -= 1;
}
double temp2 = make_double(0, half_exponent, test_fraction);
double del3 = my_abs(x - temp2 * temp2);
if(del3 <del2) return temp2;
else return temp;
}
else if(del2<del1) return temp;
else return make_double(0, half_exponent, test_fraction);
}
Edit: add some comments
Edit2: add missing functions

Copy n Bits from 8-bit Array into 64-bit Integer?

I am trying to copy n bits from any position of an array of uint8_ts into a single 64 bit integer. Here is a working solution that can copy an arbitrary amount of bits into a 64 bit integer starting at the beginning of the array, but I want to be able to start at any position of the array.
For example I might want to copy bits 2 through 11 of the array:
{7, 128, 7}
In binary that would be:
00000111 1000000 00000111
And I want an integer with value:
0001111000
std::uint64_t key_reg(std::uint8_t* bytes, std::size_t n)
{
std::uint64_t reg = 0;
// The amount of bits that fit into an entire element of an array
// ex, if I'm copying 17 bits, even_bytes == 2
std::size_t even_bytes = (n - (n % 8)) / 8;
// what's left over after the even bytes
// in this case, remainder == 1
std::size_t remainder = n - even_bytes * 8;
// copy each byte into the integer
for(std::size_t i = 0; i < even_bytes; ++i)
if(remainder)
reg |= (std::uint64_t)bytes[i] << (8 * (even_bytes - i));
else
reg |= (std::uint64_t)bytes[i] << (8 * (even_bytes - i - 1));
// if there is an uneven number of bits, copy them in
if(remainder)
reg |= (std::uint64_t)bytes[even_bytes];
return reg;
}
Do you have any idea how to implement
std::uint64_t key_reg(std::uint8_t* bytes, std::size_t pos, std::size_t n);
I didn't think anyone would answer so fast, so here was a solution I came up with in the same style. I found this bitfieldmask function on stackoverflow, but I'm unable to find the question to credit the author.
template<typename R>
static constexpr R bitfieldmask(unsigned int const a, unsigned int const b)
{
return ((static_cast<R>(-1) >> (((sizeof(R) * CHAR_BIT) - 1) - (b)))
& ~((1 << (a)) - 1));
}
std::uint64_t key_reg(std::uint8_t* bytes, std::size_t pos, std::size_t n)
{
std::uint64_t reg = 0;
std::size_t starting_byte = (pos < 8) ? 0 : ((pos - (pos % 8)) / 8);
std::size_t even_bytes = (n - (n % 8)) / 8;
std::size_t remainder = n - even_bytes * 8;
for(std::size_t i = 0; i < even_bytes; ++i)
if(remainder)
reg |= (std::uint64_t)bytes[starting_byte + i] << (8 * (even_bytes - i));
else
reg |= (std::uint64_t)bytes[starting_byte + i] << (8 * (even_bytes - i - 1));
if(remainder)
reg |= (std::uint64_t)bytes[even_bytes];
// mask out anything before the first bit
if(pos % 8 != 0) {
std::size_t a = n - pos;
std::size_t b = n;
auto mask = bitfieldmask<std::uint64_t>(a, b);
reg = (reg & ~mask);
}
return reg;
}
I think it is just simpler to copy all necessary bytes and then mask extra bits:
std::uint64_t key_reg(std::uint8_t* bytes, std::size_t n)
{
std::uint64_t reg = 0;
std::reverse_copy( bytes, bytes + n / 8 + ( n % 8 != 0 ),
reinterpret_cast<char *>( &reg ) );
reg >>= n % 8;
reg &= ~( -1UL << n );
return reg;
}
using pos would be little more complex:
std::uint64_t key_reg(std::uint8_t* bytes, std::size_t pos, std::size_t n)
{
std::uint64_t reg = 0;
auto endpos = pos + n;
auto start = bytes + pos / 8;
auto end = bytes + endpos / 8 + ( endpos % 8 != 0 );
std::reverse_copy( start, end, reinterpret_cast<char *>( &reg ) );
reg >>= endpos % 8;
reg &= ~( -1UL << n );
return reg;
}
live example
Your basic approach looks sound. To handle bit offsets that aren't multiples of 8, you just need to first read in a single partial byte and then proceed with the rest:
uint64_t key_reg(const uint8_t* bytes, size_t pos, size_t n) {
const uint8_t* ptr = bytes + pos / 8;
uint64_t result = 0;
if (pos % 8 > 0) {
/* read the first partial byte, masking off unwanted bits */
result = *(ptr++) & (0xFF >> (pos % 8));
if (n <= 8 - pos % 8) {
/* we need no more bits; shift off any excess and return early */
return result >> (8 - pos % 8 - n);
} else {
/* reduce the requested bit count by the number we got from this byte */
n -= 8 - pos % 8;
}
}
/* read and shift in as many whole bytes as we need */
while (n >= 8) {
result = (result << 8) + *(ptr++);
n -= 8;
}
/* finally read and shift in the last partial byte */
if (n > 0) {
result = (result << n) + (*ptr >> (8-n));
}
return result;
}
Here's an online demo with a simple test harness, demonstrating that this code indeed works correctly in all the edge cases I could find, such as reading a full 64 bits starting from the middle of a byte or reading only part of a single byte (which is actually a non-trivial special case, handled in a separate branch with its own return statement in the code above).
(Note that I wrote the code above in plain C since, like your original code, it doesn't really make use of any C++ specific features. Feel free to "C++ify" it by adding std:: where appropriate.)
One feature that the test harness doesn't check, but which I believe this code should possess, is that it never reads more bytes from the input array than necessary. In particular, the bytes array is not accessed at all if n == 0 (although a pointer to pos / 8 bytes after the start of the array is still calculated).
I have the following
struct MyType
{
std::array<uint8_t, 892> m_rguID;
uint16_t m_bitLength;
void GetBits(uint16_t startBit, uint16_t nBits, uint64_t & bits) const
};
void MyType::GetBits(uint16_t startBit, uint16_t nBits, uint64_t & bits) const
{
if(startBit + nBits > m_bitLength)
throw std::runtime_error("Index is out of range");
uint32_t num1 = startBit % 8U;
uint32_t num2 = 8U - num1;
uint32_t num3 = nBits >= num2 ? num2 : nBits;
uint32_t num4 = startBit >> 3;
bits = (uint64_t)(((int64_t)((uint64_t)m_rguID[num4] >> (8 - num3 - num1)) & (int64_t)((1 << num3) - 1)) << (nBits - num3));
uint32_t num5 = num4 + 1U;
int num6 = nBits - num3;
if(num6 <= 0)
return;
int num7 = num6 - 8;
int num8 = 8 - num6;
do
{
if(num6 >= 8)
{
bits |= (uint64_t)m_rguID[num5] << num7;
++num5;
}
else
{
bits |= (uint64_t)m_rguID[num5] >> num8;
++num5;
}
num6 += -8;
num7 += -8;
num8 += 8;
} while(num6 > 0);
}

How to take input 128 bit unsigned integer in c++

I am new to c++. I want to take input a unsigned 128 bit integer using scanf and print it using printf. As I am new to c++ , I only know these two methods for input output. Can someone help me out?
You could use boost, but this library set must be installed yourself:
#include <boost/multiprecision/cpp_int.hpp>
#include <iostream>
int main()
{
using namespace boost::multiprecision;
uint128_t v = 0;
std::cin >> v; // read
std::cout << v << std::endl; // write
return 0;
}
If you want to get along without boost, you can store the value into two uint64_t as such:
std::string input;
std::cin >> input;
uint64_t high = 0, low = 0, tmp;
for(char c : input)
{
high *= 10;
tmp = low * 10;
if(tmp / 10 != low)
{
high += ((low >> 32) * 10 + ((low & 0xf) * 10 >> 32)) >> 32;
}
low = tmp;
tmp = low + c - '0';
high += tmp < low;
low = tmp;
}
Printing then, however, gets more ugly:
std::vector<uint64_t> v;
while(high | low)
{
uint64_t const pow10 = 100000000;
uint64_t const mod = (((uint64_t)1 << 32) % pow10) * (((uint64_t)1 << 32) % pow10) % pow10;
tmp = high % pow10;
uint64_t temp = tmp * mod % pow10 + low % pow10;
v.push_back((tmp * mod + low) % pow10);
low = low / pow10 + tmp * 184467440737 + tmp * /*0*/9551616 / pow10 + (temp >= pow10);
high /= pow10;
}
std::vector<uint64_t>::reverse_iterator i = v.rbegin();
while(i != v.rend() && *i == 0)
{
++i;
}
if(i == v.rend())
{
std::cout << 0;
}
else
{
std::cout << *i << std::setfill('0');
for(++i; i != v.rend(); ++i)
{
std::cout << std::setw(8) << *i;
}
}
Above solution works up to (including)
340282366920938463463374516198409551615
= 0x ffff ffff ffff ffff ffff ad06 1410 beff
Above, there is an error.
Note: pow10 can be varied, then some other constants need to be adjusted, e. g. pow10 = 10:
low = low / pow10 + tmp * 1844674407370955161 + tmp * 6 / pow10 + (temp >= pow10);
and
std::cout << std::setw(1) << *i; // setw also can be dropped in this case
Increasing results in reducing the maximum number for which printing still works correctly, decreasing raises the maximum. With pow10 = 10, maximum is
340282366920938463463374607431768211425
= ffff ffff ffff ffff ffff ffff ffff ffe1
I don't know where the error for the very highest numbers comes from, yet, possibly some unconsidered overflow. Any suggestions appreciated, then I'll improve the algorithm. Until then, I'd reduce pow10 to 10 and introduce a special handling for the highest 30 failing numbers:
std::string const specialValues[0] = { /*...*/ };
if(high == 0xffffffffffffffff && low > 0xffffffffffffffe1)
{
std::cout << specialValues[low - 0xffffffffffffffe2];
}
else
{
/* ... */
}
So at least, we can handle all valid 128-bit values correctly.
You can try from_string_128_bits and to_string_128_bits with 128 bits unsigned integers in C :
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
__uint128_t from_string_128_bits(const char *str) {
__uint128_t res = 0;
for (; *str; res = res * 10 + *str++ - '0');
return res;
}
static char *to_string_128_bits(__uint128_t num) {
__uint128_t mask = -1;
size_t a, b, c = 1, d;
char *s = malloc(2);
strcpy(s, "0");
for (mask -= mask / 2; mask; mask >>= 1) {
for (a = (num & mask) != 0, b = c; b;) {
d = ((s[--b] - '0') << 1) + a;
s[b] = "0123456789"[d % 10];
a = d / 10;
}
for (; a; s = realloc(s, ++c + 1), memmove(s + 1, s, c), *s = "0123456789"[a % 10], a /= 10);
}
return s;
}
int main(void) {
__uint128_t n = from_string_128_bits("10000000000000000000000000000000000001");
n *= 7;
char *s = to_string_128_bits(n);
puts(s);
free(s); // string must be freed
// print 70000000000000000000000000000000000007
}

Best c++ way to choose randomly position of set bit in bitset

I have std::bitset<32> word and I want to choose randomly and index (0-31) of some bit which is 1. How can I do that without loops and counters. Is there any std::algorithm suitable for that?
If it's easier I can convert the bitset to string or int and make it on the string or int.
Here's a first stab at it:
std::bitset<32> bitset{...};
std::mt19937 prng(std::time(nullptr));
std::uniform_int_distribution<std::size_t> dist{1, bitset.count()};
std::size_t p = 0;
for(std::size_t c = dist(prng); c; ++p)
c -= bitset[p];
// (p - 1) is now the index of the chosen bit.
It works by counting the set bits, doing the random pick c in that interval, then looking for the cth set bit.
If you have 32-bit (or even 64-bit) bitset, more efficient solution would be to convert to integer and then use bitwise operations on that integer to get random set bit.
Here is how you can convert your bitset to unsigned long:
std::bitset<32> word(0x1028);
unsigned long ulWord = word.to_ulong(); // ulWord == 0x1028
Then you can use “Select the bit position“ function from the Bit Twiddling Hacks page to select random set bit efficiently:
unsigned int bitcnt = word.count();
unsigned int randomSetBitIndex = 63-selectBit(ulWord, random() % bitcnt + 1);
unsigned long randomSetBit = 1 << randomSetBitIndex;
Here is the full code:
// Select random set bit from a bitset
#include <iostream>
#include <bitset>
#include <random>
using namespace std;
unsigned int selectBit(unsigned long long v, unsigned int r) {
// Source: https://graphics.stanford.edu/~seander/bithacks.html
// v - Input: value to find position with rank r.
// r - Input: bit's desired rank [1-64].
unsigned int s; // Output: Resulting position of bit with rank r [1-64]
uint64_t a, b, c, d; // Intermediate temporaries for bit count.
unsigned int t; // Bit count temporary.
// Do a normal parallel bit count for a 64-bit integer,
// but store all intermediate steps.
a = v - ((v >> 1) & ~0UL/3);
b = (a & ~0UL/5) + ((a >> 2) & ~0UL/5);
c = (b + (b >> 4)) & ~0UL/0x11;
d = (c + (c >> 8)) & ~0UL/0x101;
t = (d >> 32) + (d >> 48);
// Now do branchless select!
s = 64;
s -= ((t - r) & 256) >> 3; r -= (t & ((t - r) >> 8));
t = (d >> (s - 16)) & 0xff;
s -= ((t - r) & 256) >> 4; r -= (t & ((t - r) >> 8));
t = (c >> (s - 8)) & 0xf;
s -= ((t - r) & 256) >> 5; r -= (t & ((t - r) >> 8));
t = (b >> (s - 4)) & 0x7;
s -= ((t - r) & 256) >> 6; r -= (t & ((t - r) >> 8));
t = (a >> (s - 2)) & 0x3;
s -= ((t - r) & 256) >> 7; r -= (t & ((t - r) >> 8));
t = (v >> (s - 1)) & 0x1;
s -= ((t - r) & 256) >> 8;
return 64-s;
}
int main() {
// Input
std::bitset<32> word(0x1028);
// Initialize random number generator
std::random_device randDevice;
std::mt19937 random(randDevice());
// Select random bit
unsigned long ulWord = word.to_ulong();
unsigned int bitcnt = word.count();
unsigned int randomSetBitIndex = 63-selectBit(ulWord, random() % bitcnt + 1);
unsigned long randomSetBit = 1 << randomSetBitIndex;
// Output
cout << "0x" << std::hex << randomSetBit << endl; // either 0x8, 0x20 or 0x1000
return 0;
}
Run it on Ideone.

A memory-efficient SHA1 implementation

I'm working with a very restrictive embedded processor, which only has 128 bytes of ram. I'd like to implement SHA1 on it. RFC3174 describes, in 'method 2', a way of implementing SHA1 that doesn't require allocating an array of 80 32-bit words (which, at 320 bytes, is obviously not practical), and seems like it ought to be usable on my processor. I'm unable to find any implementations of 'method 2', though, and the sample code in the RFC only implements the default method.
Is anyone aware of a memory-efficient implementation of SHA1 in C or C++?
You should be able to quickly adapt the method 1 source to method 2. The function to change is Sha1ProcessMessageBlock() in method 1. Initialize w[0:15] from message, then do a loop of 0 to 79, where you only do w[] manipulation after iteration 16, and temp calculation depends on ts value (0-19 uses one, 20-39 uses another, etc). The important thing to remember is using index%16 or index & 0x0f whenever you are addressing the w[] array.
A quick modification would be something like this (double check all accesses to w to make sure I haven't missed the t & 0x0f):
void SHA1ProcessMessageBlock(SHA1Context *context)
{
const uint32_t K[] = { /* Constants defined in SHA-1 */
0x5A827999,
0x6ED9EBA1,
0x8F1BBCDC,
0xCA62C1D6
};
int t; /* Loop counter */
uint32_t temp; /* Temporary word value */
uint32_t W[16]; /* Word sequence */
uint32_t A, B, C, D, E; /* Word buffers */
/*
* Initialize the first 16 words in the array W. You can move this to your
* context.
*/
for(t = 0; t < 16; t++)
{
W[t] = context->Message_Block[t * 4] << 24;
W[t] |= context->Message_Block[t * 4 + 1] << 16;
W[t] |= context->Message_Block[t * 4 + 2] << 8;
W[t] |= context->Message_Block[t * 4 + 3];
}
A = context->Intermediate_Hash[0];
B = context->Intermediate_Hash[1];
C = context->Intermediate_Hash[2];
D = context->Intermediate_Hash[3];
E = context->Intermediate_Hash[4];
for(t = 0; t < 80; t++) {
if (t >= 16) {
W[t&0xf] = SHA1CircularShift(1,W[(t-3)&0xf] ^ W[(t-8)&0xf] ^ W[(t-14)&0xf] ^ W[t&0xf]);
}
if (t<20) {
temp = SHA1CircularShift(5,A) +
((B & C) | ((~B) & D)) + E + W[t&0xf] + K[0];
}
else if (t<40) {
temp = SHA1CircularShift(5,A) + (B ^ C ^ D) + E + W[t&0xf] + K[1];
}
else if (t < 60) {
temp = SHA1CircularShift(5,A) +
((B & C) | (B & D) | (C & D)) + E + W[t&0xf] + K[2];
}
else {
temp = SHA1CircularShift(5,A) + (B ^ C ^ D) + E + W[t&0xf] + K[3];
}
E = D;
D = C;
C = SHA1CircularShift(30,B);
B = A;
A = temp;
}
context->Intermediate_Hash[0] += A;
context->Intermediate_Hash[1] += B;
context->Intermediate_Hash[2] += C;
context->Intermediate_Hash[3] += D;
context->Intermediate_Hash[4] += E;
context->Message_Block_Index = 0;
}
There are still savings to be made: get rid of W[] array on stack and put it in context pre-initialized with the data you get.
Also, you need a lot of pre-processing before calling this function. For example, if all your messages are less than 55 bytes, you can put it in W array, add padding, and process immediately. If not, you'll have to call process twice: first with your partially padded input, and again with the rest of the pad, etc. That sort of thing would be very application specific, and I doubt you'll be able to find the code to do it for you.
By the way, the code above is a straight adaptation from the type 1 source from your link. You can probably squeeze a bit more out of it if you try to optimize it further.
I couldn't think of a way to get any savings on the intermediate hash, so you will need a total of 108 bytes for this (109 if counter is also in RAM), and 24 of which is local to this function, and can be reused in other places - so long as they are also temporary. So it is very hard to do what you want to do.
EDIT: If all your messages are less than 55 bytes, you can save another 20 bytes in your context by getting rid of the intermediate_hash[] storage. Simply initialize A-E from the constants, and add the constants at the end. Finally, instead of storing them in a separate variable, overwrite your input when this function ends.
I have implemented SHA-1 for several memory-constrained environments. You can get by with
DWORD W[16] ; // instead of H[80]
DWORD H[5] ; // Intermediate hash value
DWORD BitCount[2] ; // Probably a single DWORD is enough here
plus a few bytes of housekeeping. W is updated on the fly, as a circular buffer, instead of being generated at the start of each round.
working example:
#include<iostream>
#include<stdio.h>
#include<stdlib.h>
#include<string>
using namespace std;
unsigned CircularShift(int bits, unsigned word)
{
return ((word << bits) & 0xFFFFFFFF) | ((word & 0xFFFFFFFF) >> (32-bits));
}
int main(void)
{
string mess;
cin >> mess;
unsigned int lm = mess.length();
unsigned int lmb = lm*8;
unsigned char *messc;
messc=(unsigned char*)malloc((sizeof(unsigned char))*64);
for (unsigned short int i =0;i<64;i++)
{
messc[i]=char(0x00);
}
for(int i=0;i<mess.length();i++)
{
messc[i]=mess[i];
}
messc[lm]=(unsigned char)128;
messc[56] = (lmb >> 24) & 0xFF;
messc[57] = (lmb >> 16) & 0xFF;
messc[58] = (lmb >> 8) & 0xFF;
// messc[59] = (lmb) & 0xFF;
messc[60] = (lmb >> 24) & 0xFF;
messc[61] = (lmb >> 16) & 0xFF;
messc[62] = (lmb >> 8) & 0xFF;
messc[63] = (lmb) & 0xFF;
for(int i =0 ;i<64;i++)
{
cout<< hex << (int)messc[i] << " ";
}
unsigned *H;
H=(unsigned*)malloc(5*sizeof(unsigned));
H[0] = 0x67452301;
H[1] = 0xEFCDAB89;
H[2] = 0x98BADCFE;
H[3] = 0x10325476;
H[4] = 0xC3D2E1F0;
const unsigned K[]={0x5A827999,0x6ED9EBA1,0x8F1BBCDC,0xCA62C1D6};
int t;
unsigned temp;
unsigned *W;
unsigned A, B, C, D, E;
W=(unsigned*)malloc(80*sizeof(unsigned));
unsigned char *messh;
messh=(unsigned char*)malloc(64*sizeof(unsigned char));
int k;
for(t = 0; t < 16; t++)
{
W[t] = ((unsigned) messc[t * 4])<< 24; ;
W[t] |= ((unsigned) messc[t * 4 + 1])<< 16;
W[t] |= ((unsigned) messc[t * 4 + 2]) << 8;
W[t] |= ((unsigned) messc[t * 4 + 3]);
}
for(t = 16; t < 80; t++)
{
W[t] = CircularShift(1,W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16]);
}
A = H[0];
B = H[1];
C = H[2];
D = H[3];
E = H[4];
for(t = 0; t < 20; t++)
{
temp = CircularShift(5,A) + ((B & C) | ((~B) & D)) + E + W[t] + K[0];
temp &= 0xFFFFFFFF;
E = D;
D = C;
C = CircularShift(30,B);
B = A;
A = temp;
}
for(t = 20; t < 40; t++)
{
temp = CircularShift(5,A) + (B ^ C ^ D) + E + W[t] + K[1];
temp &= 0xFFFFFFFF;
E = D;
D = C;
C = CircularShift(30,B);
B = A;
A = temp;
}
for(t = 40; t < 60; t++)
{
temp = CircularShift(5,A) +
((B & C) | (B & D) | (C & D)) + E + W[t] + K[2];
temp &= 0xFFFFFFFF;
E = D;
D = C;
C = CircularShift(30,B);
B = A;
A = temp;
}
for(t = 60; t < 80; t++)
{
temp = CircularShift(5,A) + (B ^ C ^ D) + E + W[t] + K[3];
temp &= 0xFFFFFFFF;
E = D;
D = C;
C = CircularShift(30,B);
B = A;
A = temp;
}
H[0] = (H[0] + A) & 0xFFFFFFFF;
H[1] = (H[1] + B) & 0xFFFFFFFF;
H[2] = (H[2] + C) & 0xFFFFFFFF;
H[3] = (H[3] + D) & 0xFFFFFFFF;
H[4] = (H[4] + E) & 0xFFFFFFFF;
cout <<"\nTHIS IS SHHHHHAAAAAAAAAAA\n";
for(int i=0;i<5;i++)
{
cout << hex << H[i] << " ";
}
//Message_Block_Index = 0;
}
All things considered, looking at your requirements, I think you are going to have to change your specs. Either a bigger chip, or a simpler algorithm. Even implementing SHA-1 (without HMAC) would be a challenge, but it should be doable.