Related
As part of a project I'm working on with pulling data from a bluetooth device, I was provided a CRC8 function from the manufacturer to use. I'm writing the application in Swift, but the function they provided is in C++.
Below is the function they've provided in C++
unsigned char xTable_CRC8[]={0x00, 0x07, 0x0E, 0x09, 0x1C, 0x1B, 0x12, 0x15, 0x38, 0x3F, 0x36, 0x31, 0x24, 0x23, 0x2A, 0x2D,0x70, 0x77, 0x7E, 0x79, 0x6C, 0x6B, 0x62, 0x65,0x48, 0x4F, 0x46, 0x41, 0x54, 0x53, 0x5A, 0x5D,0xE0, 0xE7, 0xEE, 0xE9, 0xFC, 0xFB, 0xF2, 0xF5,0xD8, 0xDF, 0xD6, 0xD1, 0xC4, 0xC3, 0xCA, 0xCD,0x90, 0x97, 0x9E, 0x99, 0x8C, 0x8B, 0x82, 0x85,0xA8, 0xAF, 0xA6, 0xA1, 0xB4, 0xB3, 0xBA, 0xBD,0xC7, 0xC0, 0xC9, 0xCE, 0xDB, 0xDC, 0xD5, 0xD2,0xFF, 0xF8, 0xF1, 0xF6, 0xE3, 0xE4, 0xED, 0xEA,0xB7, 0xB0, 0xB9, 0xBE, 0xAB, 0xAC, 0xA5, 0xA2,0x8F, 0x88, 0x81, 0x86, 0x93, 0x94, 0x9D, 0x9A,0x27, 0x20, 0x29, 0x2E, 0x3B, 0x3C, 0x35, 0x32,0x1F, 0x18, 0x11, 0x16, 0x03, 0x04, 0x0D, 0x0A,0x57, 0x50, 0x59, 0x5E, 0x4B, 0x4C, 0x45, 0x42,0x6F, 0x68, 0x61, 0x66, 0x73, 0x74, 0x7D, 0x7A,0x89, 0x8E, 0x87, 0x80, 0x95, 0x92, 0x9B, 0x9C,0xB1, 0xB6, 0xBF, 0xB8, 0xAD, 0xAA, 0xA3, 0xA4,0xF9, 0xFE, 0xF7, 0xF0, 0xE5, 0xE2, 0xEB, 0xEC,0xC1, 0xC6, 0xCF, 0xC8, 0xDD, 0xDA, 0xD3, 0xD4,0x69, 0x6E, 0x67, 0x60, 0x75, 0x72, 0x7B, 0x7C,0x51, 0x56, 0x5F, 0x58, 0x4D, 0x4A, 0x43, 0x44,0x19, 0x1E, 0x17, 0x10, 0x05, 0x02, 0x0B, 0x0C,0x21, 0x26, 0x2F, 0x28, 0x3D, 0x3A, 0x33, 0x34,0x4E, 0x49, 0x40, 0x47, 0x52, 0x55, 0x5C, 0x5B,0x76, 0x71, 0x78, 0x7F, 0x6A, 0x6D, 0x64, 0x63,0x3E, 0x39, 0x30, 0x37, 0x22, 0x25, 0x2C, 0x2B,0x06, 0x01, 0x08, 0x0F, 0x1A, 0x1D, 0x14, 0x13,0xAE, 0xA9, 0xA0, 0xA7, 0xB2, 0xB5, 0xBC, 0xBB,0x96, 0x91, 0x98, 0x9F, 0x8A, 0x8D, 0x84, 0x83,0xDE, 0xD9, 0xD0, 0xD7, 0xC2, 0xC5, 0xCC, 0xCB,0xE6, 0xE1, 0xE8, 0xEF, 0xFA, 0xFD, 0xF4, 0xF3 };
uint8_t CRC8(char *RP_ByteData, unsigned int Buffer_Size) {
uint8_t x,R_CRC_Data;
R_CRC_Data=0;
for(unsigned int i = 0; i < Buffer_Size; i++) {
x = R_CRC_Data ^ (*RP_ByteData);
R_CRC_Data = xTable_CRC8[x];
RP_ByteData++;
}
return R_CRC_Data;
}
Here is my attempt at converting it so far:
func calCR8(buf : [UInt8]) -> UInt8 {
let initialValue : UInt8 = UInt8.min;
let Table_CRC8 : [UInt8] = [
0x00, 0x07, 0x0E, 0x09, 0x1C, 0x1B, 0x12, 0x15,0x38, 0x3F, 0x36, 0x31, 0x24, 0x23, 0x2A, 0x2D,
0x70, 0x77, 0x7E, 0x79, 0x6C, 0x6B, 0x62, 0x65,0x48, 0x4F, 0x46, 0x41, 0x54, 0x53, 0x5A, 0x5D,
0xE0, 0xE7, 0xEE, 0xE9, 0xFC, 0xFB, 0xF2, 0xF5, 0xD8, 0xDF, 0xD6, 0xD1, 0xC4, 0xC3, 0xCA, 0xCD,
0x90, 0x97, 0x9E, 0x99, 0x8C, 0x8B, 0x82, 0x85,0xA8, 0xAF, 0xA6, 0xA1, 0xB4, 0xB3, 0xBA, 0xBD,
0xC7, 0xC0, 0xC9, 0xCE, 0xDB, 0xDC, 0xD5, 0xD2, 0xFF, 0xF8, 0xF1, 0xF6, 0xE3, 0xE4, 0xED, 0xEA,
0xB7, 0xB0, 0xB9, 0xBE, 0xAB, 0xAC, 0xA5, 0xA2,0x8F, 0x88, 0x81, 0x86, 0x93, 0x94, 0x9D, 0x9A,
0x27, 0x20, 0x29, 0x2E, 0x3B, 0x3C, 0x35, 0x32,0x1F, 0x18, 0x11, 0x16, 0x03, 0x04, 0x0D, 0x0A,
0x57, 0x50, 0x59, 0x5E, 0x4B, 0x4C, 0x45, 0x42,0x6F, 0x68, 0x61, 0x66, 0x73, 0x74, 0x7D, 0x7A,
0x89, 0x8E, 0x87, 0x80, 0x95, 0x92, 0x9B, 0x9C,0xB1, 0xB6, 0xBF, 0xB8, 0xAD, 0xAA, 0xA3, 0xA4,
0xF9, 0xFE, 0xF7, 0xF0, 0xE5, 0xE2, 0xEB, 0xEC, 0xC1, 0xC6, 0xCF, 0xC8, 0xDD, 0xDA, 0xD3, 0xD4,
0x69, 0x6E, 0x67, 0x60, 0x75, 0x72, 0x7B, 0x7C,0x51, 0x56, 0x5F, 0x58, 0x4D, 0x4A, 0x43, 0x44,
0x19, 0x1E, 0x17, 0x10, 0x05, 0x02, 0x0B, 0x0C,0x21, 0x26, 0x2F, 0x28, 0x3D, 0x3A, 0x33, 0x34,
0x4E, 0x49, 0x40, 0x47, 0x52, 0x55, 0x5C, 0x5B,0x76, 0x71, 0x78, 0x7F, 0x6A, 0x6D, 0x64, 0x63,
0x3E, 0x39, 0x30, 0x37, 0x22, 0x25, 0x2C, 0x2B,0x06, 0x01, 0x08, 0x0F, 0x1A, 0x1D, 0x14, 0x13,
0xAE, 0xA9, 0xA0, 0xA7, 0xB2, 0xB5, 0xBC, 0xBB,0x96, 0x91, 0x98, 0x9F, 0x8A, 0x8D, 0x84, 0x83,
0xDE, 0xD9, 0xD0, 0xD7, 0xC2, 0xC5, 0xCC, 0xCB, 0xE6, 0xE1, 0xE8, 0xEF, 0xFA, 0xFD, 0xF4, 0xF3 ];
var crc = initialValue;
let x: UInt8
for i in 0 ..< buf.count{
x = crc ^ buf
crc = Table_CRC8[x]
}
return crc;
}
}
I'm dealing with two type errors: Cannot convert value of type '[UInt8]' to expected argument type 'UInt8', and Cannot convert value of type 'UInt8' to expected argument type 'Int'. I feel very lost and uncomfortable here.
Basically, this function will be called in a manner as such:
var buf : [UInt8] = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00];
buf[0] = 0xAA;
buf[1] = 0x14;
buf[2] = ~0x14
buf[buf.count - 1] = self.calcCR8(buf)
Based on this byte array being use a parameter for this example, I'm expecting to get 0xC6 returned.
You have a few small mistakes. First, you are trying to perform the xor calculation on buf - this is the entire array; You need to access a single element buf[i].
Your second issue is that you can't index a sequence with a UInt8, so you need to make an Int equivalent in order to access the Table_CRC8 array.
Finally you are declaring x as a let (constant) outside the for loop, so you can't change it. You can move the declaration of x inside the for loop to address this.
Making these small changes will give you code that compiles:
for i in 0 ..< buf.count {
let x = crc ^ buf[i]
crc = Table_CRC8[Int(x)]
}
You can make it more 'Swifty' by changing the name of the CRC table to conform to Swift idioms and use an iteration loop rather than a for in <range> loop:
func calcCRC8(_ buf : [UInt8]) -> UInt8 {
let tableCRC8 : [UInt8] = [
0x00, 0x07, 0x0E, 0x09, 0x1C, 0x1B, 0x12, 0x15,0x38, 0x3F, 0x36, 0x31, 0x24, 0x23, 0x2A, 0x2D,
0x70, 0x77, 0x7E, 0x79, 0x6C, 0x6B, 0x62, 0x65,0x48, 0x4F, 0x46, 0x41, 0x54, 0x53, 0x5A, 0x5D,
0xE0, 0xE7, 0xEE, 0xE9, 0xFC, 0xFB, 0xF2, 0xF5, 0xD8, 0xDF, 0xD6, 0xD1, 0xC4, 0xC3, 0xCA, 0xCD,
0x90, 0x97, 0x9E, 0x99, 0x8C, 0x8B, 0x82, 0x85,0xA8, 0xAF, 0xA6, 0xA1, 0xB4, 0xB3, 0xBA, 0xBD,
0xC7, 0xC0, 0xC9, 0xCE, 0xDB, 0xDC, 0xD5, 0xD2, 0xFF, 0xF8, 0xF1, 0xF6, 0xE3, 0xE4, 0xED, 0xEA,
0xB7, 0xB0, 0xB9, 0xBE, 0xAB, 0xAC, 0xA5, 0xA2,0x8F, 0x88, 0x81, 0x86, 0x93, 0x94, 0x9D, 0x9A,
0x27, 0x20, 0x29, 0x2E, 0x3B, 0x3C, 0x35, 0x32,0x1F, 0x18, 0x11, 0x16, 0x03, 0x04, 0x0D, 0x0A,
0x57, 0x50, 0x59, 0x5E, 0x4B, 0x4C, 0x45, 0x42,0x6F, 0x68, 0x61, 0x66, 0x73, 0x74, 0x7D, 0x7A,
0x89, 0x8E, 0x87, 0x80, 0x95, 0x92, 0x9B, 0x9C,0xB1, 0xB6, 0xBF, 0xB8, 0xAD, 0xAA, 0xA3, 0xA4,
0xF9, 0xFE, 0xF7, 0xF0, 0xE5, 0xE2, 0xEB, 0xEC, 0xC1, 0xC6, 0xCF, 0xC8, 0xDD, 0xDA, 0xD3, 0xD4,
0x69, 0x6E, 0x67, 0x60, 0x75, 0x72, 0x7B, 0x7C,0x51, 0x56, 0x5F, 0x58, 0x4D, 0x4A, 0x43, 0x44,
0x19, 0x1E, 0x17, 0x10, 0x05, 0x02, 0x0B, 0x0C,0x21, 0x26, 0x2F, 0x28, 0x3D, 0x3A, 0x33, 0x34,
0x4E, 0x49, 0x40, 0x47, 0x52, 0x55, 0x5C, 0x5B,0x76, 0x71, 0x78, 0x7F, 0x6A, 0x6D, 0x64, 0x63,
0x3E, 0x39, 0x30, 0x37, 0x22, 0x25, 0x2C, 0x2B,0x06, 0x01, 0x08, 0x0F, 0x1A, 0x1D, 0x14, 0x13,
0xAE, 0xA9, 0xA0, 0xA7, 0xB2, 0xB5, 0xBC, 0xBB,0x96, 0x91, 0x98, 0x9F, 0x8A, 0x8D, 0x84, 0x83,
0xDE, 0xD9, 0xD0, 0xD7, 0xC2, 0xC5, 0xCC, 0xCB, 0xE6, 0xE1, 0xE8, 0xEF, 0xFA, 0xFD, 0xF4, 0xF3 ];
var crc = UInt8.min
for byte in buf {
let x = crc ^ byte
crc = tableCRC8[Int(x)]
}
return crc
}
I am looking for a C++ function that returns the inverse sqrt of a float: rsqrt(x) = 1/sqrt(x) by using the exact method like the built-in XMM operation RSQRTSS (cf. https://www.felixcloutier.com/x86/rsqrtss). (I.e., I want the built-in approximation rather than the more precise 1/sqrtf, and I don't care about speed (a lot).)
According to this question:
Is there a fast C or C++ standard library function for double precision inverse square root?
...there is at least no "fast way with double precision" to accomplish this with a standard C++ library. But how to do it slow, non-standard and with floats?
The RSQRTSS instruction is readily accessible via the _mm_rsqrt_ss() intrinsic declared in immintrin.h. But we can also emulate the instruction in software, as is done in the my_rsqrtf() function below. By simply observing the output of RSQRTSS one easily finds that its function values are based on a (virtual) table of 211 entries, each 12 bit in size.
Note the "virtual" attribute, because it is unlikely that the hardware employs a straight 24 Kbit table. My analysis of the patterns in the table entries doesn't suggest the use of bipartite tables. A much simpler compression scheme -- like the one I used in the code below -- based on a table of base values and a table of offsets may be used. My scheme requires only a narrow adder but reduces ROM storage to 13 Kbit, i.e. almost half.
The implementation below was developed on and tested against an Intel Xeon Processor E3-1270 V2 that uses the Ivy Bridge architecture. There may be some functional differences in the implementation of RSQRTSS between various Intel architectures, and such differences are likely between architectures from different x86-84 vendors.
The framework below checks that the emulation by my_rsqrtf() delivers bit-wise identical results to RSQRTSS for all four rounding modes, two DAZ (denormals are zero) modes, and two FTZ (flush to zero) modes. We find that the function results are not affected by any modes, which matches with how Intel specified RSQRTSS in the
Intel® 64 and IA-32 Architectures Software Developer’s Manual:
The RSQRTSS instruction is not affected by the rounding control bits in the MXCSR register. When a source value is a 0.0, an ∞ of the sign of the source value is returned. A denormal source value is treated as a 0.0 (of the same sign). When a source value is a negative value (other than −0.0), a floating-point indefinite is returned. When a source value is an SNaN or QNaN, the SNaN is converted to a QNaN or the source QNaN is returned.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
#include "immintrin.h"
/* SSE reference for RSQRTSS instruction */
float sse_rsqrtf (float a, uint32_t daz, uint32_t ftz, uint32_t rnd)
{
__m128 b, t;
float res;
uint32_t old_mxcsr;
old_mxcsr = _mm_getcsr();
_MM_SET_DENORMALS_ZERO_MODE (daz);
_MM_SET_FLUSH_ZERO_MODE (ftz);
_MM_SET_ROUNDING_MODE (rnd);
b = _mm_set_ss (a);
t = _mm_rsqrt_ss (b);
_mm_store_ss (&res, t);
_mm_setcsr (old_mxcsr);
return res;
}
inline uint32_t float_as_uint32 (float a)
{
uint32_t r;
memcpy (&r, &a, sizeof r);
return r;
}
inline float uint32_as_float (uint32_t a)
{
float r;
memcpy (&r, &a, sizeof r);
return r;
}
#define LOG2_NBR_TAB_ENTRIES (11)
#define NBR_TAB_ENTRIES (1 << LOG2_NBR_TAB_ENTRIES)
#define TAB_ENTRY_BITS (12)
#define BASE_TAB_ENTRY_BITS (9)
/* 128 9-bit entries = 1152 bits */
const uint16_t base_tab[128] = {
0x0ce, 0x0c9, 0x0c3, 0x0be, 0x0b9, 0x0b4, 0x0af, 0x0aa,
0x0a6, 0x0a1, 0x09d, 0x098, 0x094, 0x090, 0x08b, 0x087,
0x083, 0x07f, 0x07c, 0x078, 0x074, 0x070, 0x06d, 0x069,
0x066, 0x062, 0x05f, 0x05c, 0x058, 0x055, 0x052, 0x04f,
0x04c, 0x049, 0x046, 0x043, 0x040, 0x03d, 0x03a, 0x038,
0x035, 0x032, 0x030, 0x02d, 0x02a, 0x028, 0x025, 0x023,
0x021, 0x01e, 0x01c, 0x019, 0x017, 0x015, 0x013, 0x010,
0x00e, 0x00c, 0x00a, 0x008, 0x006, 0x004, 0x002, 0x000,
0x1f8, 0x1f0, 0x1e9, 0x1e1, 0x1da, 0x1d3, 0x1cc, 0x1c5,
0x1bf, 0x1b8, 0x1b2, 0x1ab, 0x1a5, 0x19f, 0x199, 0x194,
0x18e, 0x188, 0x183, 0x17e, 0x178, 0x173, 0x16e, 0x169,
0x164, 0x15f, 0x15a, 0x156, 0x151, 0x14d, 0x148, 0x144,
0x13f, 0x13b, 0x137, 0x133, 0x12f, 0x12b, 0x127, 0x123,
0x11f, 0x11b, 0x118, 0x114, 0x110, 0x10d, 0x109, 0x106,
0x102, 0x0ff, 0x0fc, 0x0f8, 0x0f5, 0x0f2, 0x0ef, 0x0eb,
0x0e8, 0x0e5, 0x0e2, 0x0df, 0x0dc, 0x0d9, 0x0d7, 0x0d4,
};
/* 2048 6-bit entries = 12288 bits */
const uint8_t ofs_tab[2048] = {
0x2f, 0x2c, 0x2a, 0x27, 0x24, 0x21, 0x1e, 0x1c, 0x19, 0x16, 0x13, 0x10, 0x0e, 0x0b, 0x08, 0x05,
0x2b, 0x28, 0x25, 0x22, 0x1f, 0x1d, 0x1a, 0x17, 0x15, 0x12, 0x0f, 0x0c, 0x0a, 0x07, 0x04, 0x02,
0x2f, 0x2c, 0x29, 0x27, 0x24, 0x21, 0x1f, 0x1c, 0x19, 0x17, 0x14, 0x11, 0x0f, 0x0c, 0x09, 0x07,
0x2c, 0x29, 0x27, 0x24, 0x22, 0x1f, 0x1c, 0x1a, 0x17, 0x15, 0x12, 0x0f, 0x0d, 0x0a, 0x08, 0x05,
0x2a, 0x28, 0x25, 0x23, 0x20, 0x1e, 0x1b, 0x18, 0x16, 0x13, 0x11, 0x0e, 0x0c, 0x09, 0x07, 0x04,
0x2a, 0x27, 0x24, 0x22, 0x1f, 0x1d, 0x1a, 0x18, 0x15, 0x13, 0x10, 0x0e, 0x0b, 0x09, 0x07, 0x04,
0x2a, 0x27, 0x25, 0x22, 0x20, 0x1d, 0x1b, 0x18, 0x16, 0x13, 0x11, 0x0f, 0x0c, 0x0a, 0x07, 0x05,
0x2a, 0x28, 0x26, 0x23, 0x21, 0x1e, 0x1c, 0x1a, 0x17, 0x15, 0x12, 0x10, 0x0e, 0x0b, 0x09, 0x07,
0x24, 0x22, 0x1f, 0x1d, 0x1b, 0x18, 0x16, 0x14, 0x11, 0x0f, 0x0d, 0x0a, 0x08, 0x06, 0x03, 0x01,
0x27, 0x24, 0x22, 0x20, 0x1d, 0x1b, 0x19, 0x16, 0x14, 0x12, 0x10, 0x0d, 0x0b, 0x09, 0x06, 0x04,
0x22, 0x20, 0x1d, 0x1b, 0x19, 0x17, 0x14, 0x12, 0x10, 0x0e, 0x0b, 0x09, 0x07, 0x05, 0x02, 0x00,
0x26, 0x24, 0x21, 0x1f, 0x1d, 0x1b, 0x19, 0x16, 0x14, 0x12, 0x10, 0x0e, 0x0b, 0x09, 0x07, 0x05,
0x23, 0x20, 0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x13, 0x11, 0x0f, 0x0d, 0x0b, 0x09, 0x06, 0x04, 0x02,
0x20, 0x1e, 0x1c, 0x1a, 0x17, 0x15, 0x13, 0x11, 0x0f, 0x0d, 0x0b, 0x09, 0x06, 0x04, 0x02, 0x00,
0x26, 0x24, 0x22, 0x20, 0x1e, 0x1c, 0x19, 0x17, 0x15, 0x13, 0x11, 0x0f, 0x0d, 0x0b, 0x09, 0x07,
0x25, 0x23, 0x21, 0x1f, 0x1d, 0x1a, 0x18, 0x16, 0x14, 0x12, 0x10, 0x0e, 0x0c, 0x0a, 0x08, 0x06,
0x24, 0x22, 0x20, 0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x14, 0x12, 0x10, 0x0e, 0x0c, 0x0a, 0x08, 0x06,
0x24, 0x22, 0x20, 0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x14, 0x12, 0x10, 0x0e, 0x0c, 0x0a, 0x08, 0x06,
0x1d, 0x1b, 0x19, 0x17, 0x15, 0x13, 0x11, 0x0f, 0x0d, 0x0b, 0x09, 0x07, 0x05, 0x03, 0x01, 0x00,
0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x14, 0x12, 0x10, 0x0e, 0x0c, 0x0b, 0x09, 0x07, 0x05, 0x03, 0x01,
0x1f, 0x1d, 0x1c, 0x1a, 0x18, 0x16, 0x14, 0x12, 0x10, 0x0e, 0x0d, 0x0b, 0x09, 0x07, 0x05, 0x03,
0x21, 0x20, 0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x15, 0x13, 0x11, 0x0f, 0x0d, 0x0b, 0x0a, 0x08, 0x06,
0x1c, 0x1a, 0x19, 0x17, 0x15, 0x13, 0x11, 0x10, 0x0e, 0x0c, 0x0a, 0x08, 0x07, 0x05, 0x03, 0x01,
0x1f, 0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x15, 0x13, 0x11, 0x0f, 0x0e, 0x0c, 0x0a, 0x08, 0x07, 0x05,
0x1b, 0x19, 0x18, 0x16, 0x14, 0x12, 0x11, 0x0f, 0x0d, 0x0b, 0x0a, 0x08, 0x06, 0x04, 0x03, 0x01,
0x1f, 0x1e, 0x1c, 0x1a, 0x18, 0x17, 0x15, 0x13, 0x12, 0x10, 0x0e, 0x0c, 0x0b, 0x09, 0x07, 0x06,
0x1c, 0x1a, 0x19, 0x17, 0x15, 0x13, 0x12, 0x10, 0x0e, 0x0d, 0x0b, 0x09, 0x08, 0x06, 0x04, 0x03,
0x19, 0x17, 0x16, 0x14, 0x12, 0x11, 0x0f, 0x0d, 0x0c, 0x0a, 0x08, 0x07, 0x05, 0x03, 0x02, 0x00,
0x1f, 0x1d, 0x1b, 0x1a, 0x18, 0x16, 0x15, 0x13, 0x11, 0x10, 0x0e, 0x0d, 0x0b, 0x09, 0x08, 0x06,
0x1d, 0x1b, 0x19, 0x18, 0x16, 0x14, 0x13, 0x11, 0x10, 0x0e, 0x0c, 0x0b, 0x09, 0x08, 0x06, 0x04,
0x1b, 0x19, 0x18, 0x16, 0x15, 0x13, 0x11, 0x10, 0x0e, 0x0d, 0x0b, 0x0a, 0x08, 0x06, 0x05, 0x03,
0x1a, 0x18, 0x17, 0x15, 0x13, 0x12, 0x10, 0x0f, 0x0d, 0x0c, 0x0a, 0x09, 0x07, 0x06, 0x04, 0x02,
0x19, 0x17, 0x16, 0x14, 0x13, 0x11, 0x10, 0x0e, 0x0d, 0x0b, 0x0a, 0x08, 0x07, 0x05, 0x03, 0x02,
0x18, 0x17, 0x15, 0x14, 0x12, 0x11, 0x0f, 0x0e, 0x0c, 0x0b, 0x09, 0x08, 0x06, 0x05, 0x03, 0x02,
0x18, 0x17, 0x15, 0x14, 0x12, 0x11, 0x0f, 0x0e, 0x0d, 0x0b, 0x0a, 0x08, 0x07, 0x05, 0x04, 0x02,
0x19, 0x17, 0x16, 0x14, 0x13, 0x11, 0x10, 0x0e, 0x0d, 0x0c, 0x0a, 0x09, 0x07, 0x06, 0x04, 0x03,
0x19, 0x18, 0x16, 0x15, 0x14, 0x12, 0x11, 0x0f, 0x0e, 0x0c, 0x0b, 0x0a, 0x08, 0x07, 0x05, 0x04,
0x1a, 0x19, 0x18, 0x16, 0x15, 0x13, 0x12, 0x10, 0x0f, 0x0e, 0x0c, 0x0b, 0x09, 0x08, 0x07, 0x05,
0x1c, 0x1a, 0x19, 0x18, 0x16, 0x15, 0x13, 0x12, 0x11, 0x0f, 0x0e, 0x0c, 0x0b, 0x0a, 0x08, 0x07,
0x15, 0x14, 0x13, 0x11, 0x10, 0x0f, 0x0d, 0x0c, 0x0a, 0x09, 0x08, 0x06, 0x05, 0x04, 0x02, 0x01,
0x17, 0x16, 0x15, 0x13, 0x12, 0x11, 0x0f, 0x0e, 0x0d, 0x0b, 0x0a, 0x08, 0x07, 0x06, 0x04, 0x03,
0x1a, 0x18, 0x17, 0x16, 0x14, 0x13, 0x12, 0x10, 0x0f, 0x0e, 0x0c, 0x0b, 0x0a, 0x08, 0x07, 0x06,
0x14, 0x13, 0x12, 0x10, 0x0f, 0x0e, 0x0c, 0x0b, 0x0a, 0x08, 0x07, 0x06, 0x05, 0x03, 0x02, 0x01,
0x17, 0x16, 0x15, 0x13, 0x12, 0x11, 0x0f, 0x0e, 0x0d, 0x0c, 0x0a, 0x09, 0x08, 0x06, 0x05, 0x04,
0x1b, 0x19, 0x18, 0x17, 0x15, 0x14, 0x13, 0x12, 0x10, 0x0f, 0x0e, 0x0c, 0x0b, 0x0a, 0x09, 0x07,
0x16, 0x15, 0x13, 0x12, 0x11, 0x10, 0x0e, 0x0d, 0x0c, 0x0b, 0x09, 0x08, 0x07, 0x06, 0x04, 0x03,
0x1a, 0x19, 0x17, 0x16, 0x15, 0x14, 0x12, 0x11, 0x10, 0x0f, 0x0d, 0x0c, 0x0b, 0x0a, 0x08, 0x07,
0x16, 0x15, 0x13, 0x12, 0x11, 0x10, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x08, 0x07, 0x06, 0x05, 0x03,
0x12, 0x11, 0x10, 0x0f, 0x0d, 0x0c, 0x0b, 0x0a, 0x08, 0x07, 0x06, 0x05, 0x04, 0x02, 0x01, 0x00,
0x17, 0x16, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x08, 0x07, 0x06, 0x05,
0x14, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x05, 0x04, 0x03, 0x02,
0x19, 0x18, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x08, 0x07,
0x16, 0x15, 0x14, 0x13, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x09, 0x08, 0x07, 0x06, 0x05,
0x14, 0x13, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03,
0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01,
0x18, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07,
0x16, 0x15, 0x14, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x06, 0x05,
0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04,
0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03,
0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02,
0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x04, 0x03, 0x02, 0x01,
0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01,
0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01,
0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01,
0x3e, 0x3a, 0x36, 0x32, 0x2e, 0x2a, 0x26, 0x22, 0x1e, 0x1a, 0x16, 0x12, 0x0e, 0x0b, 0x07, 0x03,
0x3f, 0x3b, 0x37, 0x33, 0x2f, 0x2b, 0x27, 0x24, 0x20, 0x1c, 0x18, 0x14, 0x10, 0x0c, 0x09, 0x05,
0x39, 0x35, 0x31, 0x2e, 0x2a, 0x26, 0x22, 0x1e, 0x1b, 0x17, 0x13, 0x0f, 0x0c, 0x08, 0x04, 0x00,
0x3d, 0x39, 0x35, 0x31, 0x2e, 0x2a, 0x26, 0x23, 0x1f, 0x1b, 0x18, 0x14, 0x10, 0x0d, 0x09, 0x05,
0x3a, 0x36, 0x32, 0x2f, 0x2b, 0x27, 0x24, 0x20, 0x1d, 0x19, 0x15, 0x12, 0x0e, 0x0b, 0x07, 0x03,
0x38, 0x34, 0x31, 0x2d, 0x2a, 0x26, 0x22, 0x1f, 0x1b, 0x18, 0x14, 0x11, 0x0d, 0x0a, 0x06, 0x03,
0x37, 0x34, 0x30, 0x2d, 0x29, 0x26, 0x22, 0x1f, 0x1b, 0x18, 0x15, 0x11, 0x0e, 0x0a, 0x07, 0x03,
0x38, 0x35, 0x31, 0x2e, 0x2a, 0x27, 0x24, 0x20, 0x1d, 0x19, 0x16, 0x13, 0x0f, 0x0c, 0x09, 0x05,
0x32, 0x2e, 0x2b, 0x28, 0x24, 0x21, 0x1e, 0x1a, 0x17, 0x14, 0x11, 0x0d, 0x0a, 0x07, 0x03, 0x00,
0x35, 0x31, 0x2e, 0x2b, 0x28, 0x24, 0x21, 0x1e, 0x1b, 0x17, 0x14, 0x11, 0x0e, 0x0a, 0x07, 0x04,
0x31, 0x2e, 0x2a, 0x27, 0x24, 0x21, 0x1e, 0x1a, 0x17, 0x14, 0x11, 0x0e, 0x0b, 0x07, 0x04, 0x01,
0x36, 0x33, 0x30, 0x2c, 0x29, 0x26, 0x23, 0x20, 0x1d, 0x1a, 0x17, 0x13, 0x10, 0x0d, 0x0a, 0x07,
0x34, 0x31, 0x2e, 0x2b, 0x28, 0x25, 0x21, 0x1e, 0x1b, 0x18, 0x15, 0x12, 0x0f, 0x0c, 0x09, 0x06,
0x33, 0x30, 0x2d, 0x2a, 0x27, 0x24, 0x21, 0x1e, 0x1b, 0x18, 0x15, 0x12, 0x0f, 0x0c, 0x09, 0x06,
0x33, 0x30, 0x2d, 0x2a, 0x27, 0x24, 0x21, 0x1e, 0x1b, 0x18, 0x15, 0x13, 0x10, 0x0d, 0x0a, 0x07,
0x2c, 0x29, 0x26, 0x23, 0x20, 0x1d, 0x1a, 0x18, 0x15, 0x12, 0x0f, 0x0c, 0x09, 0x06, 0x03, 0x01,
0x2e, 0x2b, 0x28, 0x25, 0x22, 0x1f, 0x1d, 0x1a, 0x17, 0x14, 0x11, 0x0e, 0x0c, 0x09, 0x06, 0x03,
0x30, 0x2e, 0x2b, 0x28, 0x25, 0x22, 0x20, 0x1d, 0x1a, 0x17, 0x14, 0x12, 0x0f, 0x0c, 0x09, 0x07,
0x2c, 0x29, 0x26, 0x24, 0x21, 0x1e, 0x1b, 0x19, 0x16, 0x13, 0x10, 0x0e, 0x0b, 0x08, 0x06, 0x03,
0x28, 0x25, 0x23, 0x20, 0x1d, 0x1b, 0x18, 0x15, 0x13, 0x10, 0x0d, 0x0b, 0x08, 0x05, 0x03, 0x00,
0x2d, 0x2b, 0x28, 0x25, 0x23, 0x20, 0x1d, 0x1b, 0x18, 0x15, 0x13, 0x10, 0x0e, 0x0b, 0x08, 0x06,
0x2b, 0x28, 0x26, 0x23, 0x21, 0x1e, 0x1b, 0x19, 0x16, 0x14, 0x11, 0x0f, 0x0c, 0x09, 0x07, 0x04,
0x2a, 0x27, 0x25, 0x22, 0x1f, 0x1d, 0x1a, 0x18, 0x15, 0x13, 0x10, 0x0e, 0x0b, 0x09, 0x06, 0x03,
0x29, 0x26, 0x24, 0x21, 0x1f, 0x1c, 0x1a, 0x17, 0x15, 0x12, 0x10, 0x0d, 0x0b, 0x08, 0x06, 0x03,
0x29, 0x26, 0x24, 0x21, 0x1f, 0x1d, 0x1a, 0x18, 0x15, 0x13, 0x10, 0x0e, 0x0b, 0x09, 0x06, 0x04,
0x2a, 0x27, 0x25, 0x22, 0x20, 0x1d, 0x1b, 0x19, 0x16, 0x14, 0x11, 0x0f, 0x0d, 0x0a, 0x08, 0x05,
0x2b, 0x29, 0x26, 0x24, 0x21, 0x1f, 0x1d, 0x1a, 0x18, 0x15, 0x13, 0x11, 0x0e, 0x0c, 0x0a, 0x07,
0x25, 0x23, 0x20, 0x1e, 0x1b, 0x19, 0x17, 0x14, 0x12, 0x10, 0x0d, 0x0b, 0x09, 0x06, 0x04, 0x02,
0x27, 0x25, 0x23, 0x20, 0x1e, 0x1c, 0x1a, 0x17, 0x15, 0x13, 0x10, 0x0e, 0x0c, 0x09, 0x07, 0x05,
0x23, 0x20, 0x1e, 0x1c, 0x1a, 0x17, 0x15, 0x13, 0x10, 0x0e, 0x0c, 0x0a, 0x07, 0x05, 0x03, 0x01,
0x26, 0x24, 0x22, 0x20, 0x1d, 0x1b, 0x19, 0x17, 0x15, 0x12, 0x10, 0x0e, 0x0c, 0x09, 0x07, 0x05,
0x23, 0x21, 0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x13, 0x11, 0x0f, 0x0d, 0x0b, 0x08, 0x06, 0x04, 0x02,
0x28, 0x25, 0x23, 0x21, 0x1f, 0x1d, 0x1b, 0x18, 0x16, 0x14, 0x12, 0x10, 0x0e, 0x0c, 0x09, 0x07,
0x25, 0x23, 0x21, 0x1f, 0x1d, 0x1a, 0x18, 0x16, 0x14, 0x12, 0x10, 0x0e, 0x0c, 0x09, 0x07, 0x05,
0x23, 0x21, 0x1f, 0x1d, 0x1b, 0x19, 0x16, 0x14, 0x12, 0x10, 0x0e, 0x0c, 0x0a, 0x08, 0x06, 0x04,
0x22, 0x1f, 0x1d, 0x1b, 0x19, 0x17, 0x15, 0x13, 0x11, 0x0f, 0x0d, 0x0b, 0x09, 0x07, 0x05, 0x03,
0x21, 0x1f, 0x1c, 0x1a, 0x18, 0x16, 0x14, 0x12, 0x10, 0x0e, 0x0c, 0x0a, 0x08, 0x06, 0x04, 0x02,
0x20, 0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x14, 0x12, 0x10, 0x0e, 0x0c, 0x0a, 0x08, 0x06, 0x04, 0x02,
0x20, 0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x14, 0x12, 0x10, 0x0e, 0x0c, 0x0a, 0x08, 0x06, 0x04, 0x02,
0x20, 0x1f, 0x1d, 0x1b, 0x19, 0x17, 0x15, 0x13, 0x11, 0x0f, 0x0d, 0x0b, 0x09, 0x07, 0x05, 0x03,
0x21, 0x1f, 0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x14, 0x12, 0x10, 0x0e, 0x0c, 0x0a, 0x08, 0x07, 0x05,
0x23, 0x21, 0x1f, 0x1d, 0x1b, 0x19, 0x17, 0x15, 0x14, 0x12, 0x10, 0x0e, 0x0c, 0x0a, 0x08, 0x06,
0x1c, 0x1b, 0x19, 0x17, 0x15, 0x13, 0x11, 0x0f, 0x0e, 0x0c, 0x0a, 0x08, 0x06, 0x04, 0x02, 0x01,
0x1f, 0x1d, 0x1b, 0x19, 0x17, 0x15, 0x14, 0x12, 0x10, 0x0e, 0x0c, 0x0a, 0x09, 0x07, 0x05, 0x03,
0x21, 0x1f, 0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x15, 0x13, 0x11, 0x0f, 0x0d, 0x0c, 0x0a, 0x08, 0x06,
0x1c, 0x1b, 0x19, 0x17, 0x15, 0x13, 0x12, 0x10, 0x0e, 0x0c, 0x0a, 0x09, 0x07, 0x05, 0x03, 0x02,
0x20, 0x1e, 0x1c, 0x1a, 0x19, 0x17, 0x15, 0x13, 0x12, 0x10, 0x0e, 0x0c, 0x0b, 0x09, 0x07, 0x05,
0x1c, 0x1a, 0x18, 0x16, 0x15, 0x13, 0x11, 0x0f, 0x0e, 0x0c, 0x0a, 0x08, 0x07, 0x05, 0x03, 0x01,
0x20, 0x1e, 0x1c, 0x1b, 0x19, 0x17, 0x15, 0x14, 0x12, 0x10, 0x0f, 0x0d, 0x0b, 0x09, 0x08, 0x06,
0x1c, 0x1b, 0x19, 0x17, 0x15, 0x14, 0x12, 0x10, 0x0f, 0x0d, 0x0b, 0x0a, 0x08, 0x06, 0x05, 0x03,
0x19, 0x17, 0x16, 0x14, 0x12, 0x11, 0x0f, 0x0d, 0x0c, 0x0a, 0x08, 0x07, 0x05, 0x03, 0x02, 0x00,
0x1e, 0x1d, 0x1b, 0x19, 0x18, 0x16, 0x14, 0x13, 0x11, 0x10, 0x0e, 0x0c, 0x0b, 0x09, 0x07, 0x06,
0x1c, 0x1a, 0x19, 0x17, 0x16, 0x14, 0x12, 0x11, 0x0f, 0x0d, 0x0c, 0x0a, 0x08, 0x07, 0x05, 0x04,
0x1a, 0x18, 0x17, 0x15, 0x14, 0x12, 0x10, 0x0f, 0x0d, 0x0b, 0x0a, 0x08, 0x07, 0x05, 0x03, 0x02,
0x18, 0x17, 0x15, 0x13, 0x12, 0x10, 0x0f, 0x0d, 0x0c, 0x0a, 0x08, 0x07, 0x05, 0x04, 0x02, 0x00,
0x1f, 0x1d, 0x1c, 0x1a, 0x19, 0x17, 0x15, 0x14, 0x12, 0x11, 0x0f, 0x0e, 0x0c, 0x0a, 0x09, 0x07,
0x1e, 0x1c, 0x1b, 0x19, 0x18, 0x16, 0x14, 0x13, 0x11, 0x10, 0x0e, 0x0d, 0x0b, 0x0a, 0x08, 0x07,
0x1d, 0x1c, 0x1a, 0x18, 0x17, 0x15, 0x14, 0x12, 0x11, 0x0f, 0x0e, 0x0c, 0x0b, 0x09, 0x08, 0x06,
0x1d, 0x1b, 0x1a, 0x18, 0x17, 0x15, 0x13, 0x12, 0x10, 0x0f, 0x0d, 0x0c, 0x0a, 0x09, 0x07, 0x06,
0x1c, 0x1b, 0x19, 0x18, 0x16, 0x15, 0x13, 0x12, 0x10, 0x0f, 0x0d, 0x0c, 0x0b, 0x09, 0x08, 0x06,
0x1d, 0x1b, 0x1a, 0x18, 0x17, 0x15, 0x14, 0x12, 0x11, 0x0f, 0x0e, 0x0c, 0x0b, 0x09, 0x08, 0x06,
0x1d, 0x1c, 0x1a, 0x19, 0x17, 0x16, 0x14, 0x13, 0x11, 0x10, 0x0e, 0x0d, 0x0b, 0x0a, 0x09, 0x07,
0x16, 0x14, 0x13, 0x11, 0x10, 0x0e, 0x0d, 0x0c, 0x0a, 0x09, 0x07, 0x06, 0x04, 0x03, 0x02, 0x00,
0x17, 0x15, 0x14, 0x12, 0x11, 0x10, 0x0e, 0x0d, 0x0b, 0x0a, 0x08, 0x07, 0x06, 0x04, 0x03, 0x01,
};
#define IEEE_BINARY32_EXPO_BIAS (127)
#define IEEE_BINARY32_MANT_BITS (23)
#define IEEE_BINARY32_EXPO_BITS (8)
#define IEEE_BINARY32_EXPO_MASK (0x7f800000)
#define IEEE_BINARY32_NAN_INDEF (0xffc00000)
#define IEEE_BINARY32_POS_INF (0x7f800000)
#define IEEE_BINARY32_POS_ZERO (0x00000000)
#define IEEE_BINARY32_MIN_NORM (0x00800000)
#define IEEE_BINARY32_SIGN_BIT (0x80000000)
/* Emulate the RSQRTSS instruction in software */
float my_rsqrtf (float x)
{
float r;
uint32_t arg, res, idx, expo, mant;
arg = float_as_uint32 (x);
/* zeros and subnormals */
if ((arg & ~IEEE_BINARY32_SIGN_BIT) < IEEE_BINARY32_MIN_NORM) {
res = IEEE_BINARY32_POS_INF | (arg & IEEE_BINARY32_SIGN_BIT);
r = uint32_as_float (res);
}
/* NaNs */
else if ((arg & ~IEEE_BINARY32_SIGN_BIT) > IEEE_BINARY32_POS_INF) {
r = x + x; // convert SNaN to QNaN
}
/* negative arguments */
else if (arg & IEEE_BINARY32_SIGN_BIT) {
res = IEEE_BINARY32_NAN_INDEF;
r = uint32_as_float (res);
}
/* positive infinity */
else if (arg == IEEE_BINARY32_POS_INF) {
res = IEEE_BINARY32_POS_ZERO;
r = uint32_as_float (res);
}
/* positive normals */
else {
/* extract exponent lsb and leading mantissa bits for table index */
expo = (arg & IEEE_BINARY32_EXPO_MASK) >> IEEE_BINARY32_MANT_BITS;
idx = (arg >> (IEEE_BINARY32_MANT_BITS - LOG2_NBR_TAB_ENTRIES + 1))
& (NBR_TAB_ENTRIES - 1);
/* compute exponent and mantissa of reciprocal square root */
expo = (3 * IEEE_BINARY32_EXPO_BIAS + ~expo) >> 1;
mant = (((base_tab [idx >> 4] << (TAB_ENTRY_BITS - BASE_TAB_ENTRY_BITS)) + ofs_tab [idx])
<< (IEEE_BINARY32_MANT_BITS - TAB_ENTRY_BITS));
/* combine exponent and mantissa bits to compute final result */
res = (expo << IEEE_BINARY32_MANT_BITS) | mant;
r = uint32_as_float (res);
}
return r;
}
#define NBR_RND_MODES (4)
#define NBR_DAZ_MODES (2)
#define NBR_FTZ_MODES (2)
int main (void)
{
const uint32_t rnd_mode [NBR_RND_MODES] =
{
_MM_ROUND_NEAREST,
_MM_ROUND_TOWARD_ZERO,
_MM_ROUND_DOWN,
_MM_ROUND_UP
};
const uint32_t ftz_mode [NBR_FTZ_MODES] =
{
_MM_FLUSH_ZERO_OFF,
_MM_FLUSH_ZERO_ON
};
const uint32_t daz_mode [NBR_DAZ_MODES] =
{
_MM_DENORMALS_ZERO_OFF,
_MM_DENORMALS_ZERO_ON
};
uint32_t iarg, ires, iref;
float arg, res, ref;
double relerr, maxrelerr;
for (int rnd = 0; rnd < NBR_RND_MODES; rnd++) {
printf ("rnd=%d\n", rnd);
for (int ftz = 0; ftz < NBR_FTZ_MODES; ftz++) {
printf (" ftz=%d\n", ftz);
for (int daz = 0; daz < NBR_DAZ_MODES; daz++) {
printf (" daz=%d\n", daz); fflush(stdout);
maxrelerr = 0;
iarg = 0;
do {
arg = uint32_as_float (iarg);
ref = sse_rsqrtf (arg, daz_mode[daz], ftz_mode[ftz], rnd_mode[rnd]);
res = my_rsqrtf (arg);
if ((arg >= 1.17549435e-38f) && (arg < 3.40282347e+38f)) { /* normals only */
relerr = fabs ((ref - sqrt(1.0/(double)arg)) / sqrt(1.0/(double)arg));
if (relerr > maxrelerr) maxrelerr = relerr;
}
iref = float_as_uint32 (ref);
ires = float_as_uint32 (res);
if (ires != iref) {
printf ("!!!! rnd=%d ftz=%d daz=%d arg=%08x res=%08x ref=%08x\n",
rnd, ftz, daz, iarg, ires, iref);
return EXIT_FAILURE;
}
iarg++;
} while (iarg);
printf (" maxrelerr = %15.8e\n", maxrelerr);
}
}
}
printf ("RSQRTSS emulation test passed\n");
return EXIT_SUCCESS;
}
There is no function in the standard library that does this, but your compiler might optimize the expression 1 / sqrt(value) such that it does emit the RSQRTSS instruction. For example, with the compiler flags -ffast-math -march=native, GCC will emit that instruction, see: https://godbolt.org/z/cL6seG
For what it's worth, I ended up implementing it in plain assembly within C++, as #François Andrieux suggested (more precisely I used ASMJIT).
This works well, though it comes with the drawback of losing portability (less than with plain asm, though). But this is somewhat inherent to my question since I WANT to use a very specific x86 function.
Here's my code:
typedef float(*JITFunc)();
JITFunc func;
asmjit::JitRuntime jit_runtime;
asmjit::CodeHolder code;
code.init(jit_runtime.getCodeInfo());
asmjit::X86Compiler cc(&code);
cc.addFunc(asmjit::FuncSignature0<float>());
float value = 2.71; // Some example value.
asmjit::X86Xmm x = cc.newXmm();
setXmmVar(cc, x, value);
cc.rsqrtss(x, x); // THE asm function.
cc.ret(x);
cc.endFunc();
cc.finalize();
jit_runtime.add(&func, &code);
return func(); // Or something to that effect. func() is the result, anyway.
Assume in C there is following code:
const unsigned char C[12][64] = {
{
0xB1, 0x08, 0x5B, 0xDA, 0x1E, 0xCA, 0xDA, 0xE9, 0xEB, 0xCB, 0x2F, 0x81, 0xC0, 0x65, 0x7C, 0x1F,
0x2F, 0x6A, 0x76, 0x43, 0x2E, 0x45, 0xD0, 0x16, 0x71, 0x4E, 0xB8, 0x8D, 0x75, 0x85, 0xC4, 0xFC,
0x4B, 0x7C, 0xE0, 0x91, 0x92, 0x67, 0x69, 0x01, 0xA2, 0x42, 0x2A, 0x08, 0xA4, 0x60, 0xD3, 0x15,
0x05, 0x76, 0x74, 0x36, 0xCC, 0x74, 0x4D, 0x23, 0xDD, 0x80, 0x65, 0x59, 0xF2, 0xA6, 0x45, 0x07
},
{
0x6F, 0xA3, 0xB5, 0x8A, 0xA9, 0x9D, 0x2F, 0x1A, 0x4F, 0xE3, 0x9D, 0x46, 0x0F, 0x70, 0xB5, 0xD7,
0xF3, 0xFE, 0xEA, 0x72, 0x0A, 0x23, 0x2B, 0x98, 0x61, 0xD5, 0x5E, 0x0F, 0x16, 0xB5, 0x01, 0x31,
0x9A, 0xB5, 0x17, 0x6B, 0x12, 0xD6, 0x99, 0x58, 0x5C, 0xB5, 0x61, 0xC2, 0xDB, 0x0A, 0xA7, 0xCA,
0x55, 0xDD, 0xA2, 0x1B, 0xD7, 0xCB, 0xCD, 0x56, 0xE6, 0x79, 0x04, 0x70, 0x21, 0xB1, 0x9B, 0xB7
},
and so on
How do I convert it into std::vector so that the vector has been initialized all the necessary values?
Under vector I assume something like vector< vector<uint8_t> >
Or maybe I'd better use boost::multi_array?
Starting in C++11 you can initialize a vector with an initializer list. You can simply change
const unsigned char C[12][64] = ...
To
std::vector<std::vector<unsigned char>> C = ...
Live Example
I have a weird issue while creating a substitution for AES encryption.
I use a lookup table, named SBox, to substitute the bytes in the string state (technically a vector, but it's irrelevant)
const vector<byte> SBox =
{
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
};
void subBytes(vector<string>& state)
{
for (int i=0; i<4; ++i)
{
for (int j=0; j<4; ++j)
{
state[i][j] = SBox[state[i][j]];
}
}
}
this does not work but if I use
state[i][j] = SBox[state[i][j]&0xff];
then everything magically works again. I thought a char was byte-sized, so AND'ing by 0xff had no influence. FYI I am running under 64-bit machine. This is a little bit annoying as I have to use a lot of lines like that in other functions. Can anybody explain why? Is there another solution?
state[i][j] may return signed chars so if your character codes are larger than 7bits you are using negative index to SBox[state[i][j]]
Another thing:
if you have fix sizes for SBox and states: why not using std::array instead of vector?
std::array<byte,256> for SBox
and std::array<std::array<char,4>,4> for State.
Also you may use the size member of your vectors or arrays instead using magic number in your loop.
I am using Crypto++ for the first time, and I am having some trouble. Why does this work in C# (with the same keys), but not with Crypto++? I will show my C# and C++ code below.
C# code (this works!):
byte[] Modulus = new byte[] { 0xA3, 0x1D, 0x6C, 0xE5, 0xFA, 0x95, 0xFD, 0xE8, 0x90, 0x21, 0xFA, 0xD1, 0x0C, 0x64, 0x19, 0x2B, 0x86, 0x58, 0x9B, 0x17, 0x2B, 0x10, 0x05, 0xB8, 0xD1, 0xF8, 0x4C, 0xEF, 0x53, 0x4C, 0xD5, 0x4E, 0x5C, 0xAE, 0x86, 0xEF, 0x92, 0x7B, 0x90, 0xD1, 0xE0, 0x62, 0xFD, 0x7C, 0x54, 0x55, 0x9E, 0xE0, 0xE7, 0xBE, 0xFA, 0x3F, 0x9E, 0x15, 0x6F, 0x6C, 0x38, 0x4E, 0xAF, 0x07, 0x0C, 0x61, 0xAB, 0x51, 0x5E, 0x23, 0x53, 0x14, 0x18, 0x88, 0xCB, 0x6F, 0xCB, 0xC5, 0xD6, 0x30, 0xF4, 0x06, 0xED, 0x24, 0x23, 0xEF, 0x25, 0x6D, 0x00, 0x91, 0x77, 0x24, 0x9B, 0xE5, 0xA3, 0xC0, 0x27, 0x90, 0xC2, 0x97, 0xF7, 0x74, 0x9D, 0x6F, 0x17, 0x83, 0x7E, 0xB5, 0x37, 0xDE, 0x51, 0xE8, 0xD7, 0x1C, 0xE1, 0x56, 0xD9, 0x56, 0xC8, 0xC3, 0xC3, 0x20, 0x9D, 0x64, 0xC3, 0x2F, 0x8C, 0x91, 0x92, 0x30, 0x6F, 0xDB };
byte[] Exponent = new byte[] { 0x00, 0x01, 0x00, 0x01 };
byte[] P = new byte[] { 0xCC, 0xE7, 0x5D, 0xFE, 0x72, 0xB6, 0xFD, 0xE7, 0x1D, 0xE3, 0x1A, 0x0E, 0xAC, 0x33, 0x7A, 0xB9, 0x21, 0xE8, 0x8A, 0x84, 0x9B, 0xDA, 0x9F, 0x1E, 0x58, 0x34, 0x68, 0x7A, 0xB1, 0x1D, 0x7E, 0x1C, 0x18, 0x52, 0x65, 0x7B, 0x97, 0x8E, 0xA7, 0x6A, 0x9D, 0xEE, 0x5A, 0x77, 0x52, 0x3B, 0x71, 0x8F, 0x33, 0xD0, 0x49, 0x5E, 0xC3, 0x30, 0x39, 0x72, 0x36, 0xBF, 0x1D, 0xD9, 0xF2, 0x24, 0xE8, 0x71 };
byte[] Q = new byte[] { 0xCB, 0xCA, 0x58, 0x74, 0xD4, 0x03, 0x62, 0x93, 0x06, 0x50, 0x1F, 0x42, 0xF6, 0xAA, 0x59, 0x36, 0xA7, 0xA1, 0xF3, 0x97, 0x5C, 0x9A, 0xC8, 0x6A, 0x27, 0xCF, 0x85, 0x05, 0x2A, 0x66, 0x41, 0x6A, 0x7F, 0x2F, 0x84, 0xC8, 0x18, 0x13, 0xC6, 0x1D, 0x8D, 0xC7, 0x32, 0x2F, 0x72, 0x19, 0x3F, 0xA4, 0xED, 0x71, 0xE7, 0x61, 0xC0, 0xCF, 0x61, 0xAE, 0x8B, 0xA0, 0x68, 0xA7, 0x7D, 0x83, 0x23, 0x0B };
byte[] DP = new byte[] { 0x4C, 0xCA, 0x74, 0xE6, 0x74, 0x35, 0x72, 0x48, 0x58, 0x62, 0x11, 0x14, 0xE8, 0xA2, 0x4E, 0x5E, 0xED, 0x7F, 0x49, 0xD2, 0x52, 0xDA, 0x87, 0x01, 0x87, 0x4A, 0xF4, 0xD0, 0xEE, 0x69, 0xC0, 0x26, 0x65, 0x53, 0x13, 0xE7, 0x52, 0xB0, 0x4A, 0xBB, 0xE1, 0x3E, 0x3F, 0xB7, 0x32, 0x21, 0x46, 0xF8, 0xC5, 0x11, 0x4D, 0x3D, 0xEF, 0x66, 0xB6, 0x50, 0xC0, 0x85, 0xB5, 0x79, 0x45, 0x8F, 0x61, 0x71 };
byte[] InverseQ = new byte[] { 0x28, 0x6A, 0xBB, 0xD1, 0x93, 0x95, 0x94, 0x1A, 0x6E, 0xED, 0xD7, 0x0E, 0xC0, 0x61, 0x2B, 0xC2, 0xEF, 0xE1, 0x86, 0x3D, 0x34, 0x12, 0x88, 0x6F, 0x94, 0xA4, 0x48, 0x6E, 0xC9, 0x87, 0x1E, 0x46, 0x00, 0x46, 0x00, 0x52, 0x8E, 0x9F, 0x47, 0xC0, 0x8C, 0xAB, 0xBC, 0x49, 0xAC, 0x5B, 0x13, 0xF2, 0xEC, 0x27, 0x8D, 0x1B, 0x6E, 0x51, 0x06, 0xA6, 0xF1, 0x62, 0x1A, 0xEB, 0x78, 0x2E, 0x88, 0x48 };
byte[] D = new byte[] { 0x9B, 0xF9, 0xDE, 0xC2, 0x45, 0x93, 0x4C, 0x4C, 0xAC, 0x48, 0x2B, 0xA8, 0x4D, 0xFC, 0xD7, 0xED, 0xB2, 0xFB, 0x72, 0xE9, 0xEA, 0xC1, 0x88, 0x39, 0x07, 0x2A, 0x6F, 0x34, 0x07, 0x81, 0x97, 0x7E, 0xCD, 0xFA, 0x21, 0x02, 0xF5, 0xDD, 0x30, 0xDD, 0x22, 0x4A, 0xB3, 0x41, 0xE5, 0x89, 0x80, 0x73, 0xC4, 0xAF, 0x90, 0x9E, 0x2B, 0x50, 0x8A, 0x0A, 0xD4, 0x6E, 0xBD, 0x0F, 0x15, 0x79, 0x37, 0x95, 0xE8, 0x3D, 0xCF, 0x4C, 0x6D, 0xFF, 0x51, 0x65, 0xE7, 0x90, 0xC1, 0xAC, 0x2D, 0xC6, 0xEB, 0x47, 0x19, 0x2D, 0xD0, 0x58, 0x74, 0x79, 0xAC, 0x08, 0x1C, 0xA3, 0x1D, 0xD0, 0xCE, 0x39, 0x2E, 0xC3, 0xFA, 0x66, 0xEF, 0xC7, 0x8E, 0x10, 0x2F, 0xE4, 0xA1, 0xE7, 0x4E, 0xA8, 0x42, 0xF0, 0xF4, 0xFD, 0x10, 0xA6, 0x67, 0x64, 0xCB, 0x3A, 0x6D, 0x4D, 0x51, 0xEC, 0x1F, 0x9D, 0x56, 0x26, 0xC2, 0xFC };
byte[] DQ = new byte[] { 0xAF, 0xDC, 0x46, 0xE7, 0x52, 0x8A, 0x35, 0x47, 0xA1, 0x1C, 0x05, 0x4E, 0x39, 0x24, 0x99, 0xE6, 0x43, 0x54, 0xCB, 0xAB, 0xE3, 0xDB, 0x22, 0x76, 0x11, 0x32, 0xD0, 0x9C, 0xBB, 0x91, 0x10, 0x84, 0x81, 0x8B, 0x15, 0x2F, 0xC3, 0x2F, 0x55, 0x38, 0xED, 0xBF, 0x67, 0x3C, 0x70, 0x5E, 0xFF, 0x80, 0x28, 0xF3, 0xB1, 0x73, 0xB6, 0xFA, 0x7F, 0x56, 0x2B, 0xE1, 0xDA, 0x4E, 0x27, 0x4E, 0xC2, 0x2F };
RSAParameters rsaParams = new RSAParameters();
rsaParams.Modulus = Modulus;
rsaParams.Exponent = Exponent;
rsaParams.P = P;
rsaParams.Q = Q;
rsaParams.DP = DP;
rsaParams.InverseQ = InverseQ;
rsaParams.D = D;
rsaParams.DQ = DQ;
RSACryptoServiceProvider crypt = new RSACryptoServiceProvider();
crypt.ImportParameters(rsaParams);
RSAPKCS1SignatureFormatter formatter = new RSAPKCS1SignatureFormatter();
formatter.SetHashAlgorithm("SHA1");
formatter.SetKey(crypt);
byte[] dataFile = new byte[] { 0x6F, 0x9F, 0x07, 0x04, 0xE2, 0x1A, 0xF7, 0xB8, 0xB2, 0x4F, 0x8D, 0x66, 0x49, 0xA1, 0x09, 0xA7, 0xB2, 0x22, 0x3C, 0xF9};
byte[] signature = formatter.CreateSignature(dataFile);
Now, my C++ code, which doesn't work:
const char ModulusCON[0x80] = { 0xA3, 0x1D, 0x6C, 0xE5, 0xFA, 0x95, 0xFD, 0xE8, 0x90, 0x21, 0xFA, 0xD1, 0x0C, 0x64, 0x19, 0x2B, 0x86, 0x58, 0x9B, 0x17, 0x2B, 0x10, 0x05, 0xB8, 0xD1, 0xF8, 0x4C, 0xEF, 0x53, 0x4C, 0xD5, 0x4E, 0x5C, 0xAE, 0x86, 0xEF, 0x92, 0x7B, 0x90, 0xD1, 0xE0, 0x62, 0xFD, 0x7C, 0x54, 0x55, 0x9E, 0xE0, 0xE7, 0xBE, 0xFA, 0x3F, 0x9E, 0x15, 0x6F, 0x6C, 0x38, 0x4E, 0xAF, 0x07, 0x0C, 0x61, 0xAB, 0x51, 0x5E, 0x23, 0x53, 0x14, 0x18, 0x88, 0xCB, 0x6F, 0xCB, 0xC5, 0xD6, 0x30, 0xF4, 0x06, 0xED, 0x24, 0x23, 0xEF, 0x25, 0x6D, 0x00, 0x91, 0x77, 0x24, 0x9B, 0xE5, 0xA3, 0xC0, 0x27, 0x90, 0xC2, 0x97, 0xF7, 0x74, 0x9D, 0x6F, 0x17, 0x83, 0x7E, 0xB5, 0x37, 0xDE, 0x51, 0xE8, 0xD7, 0x1C, 0xE1, 0x56, 0xD9, 0x56, 0xC8, 0xC3, 0xC3, 0x20, 0x9D, 0x64, 0xC3, 0x2F, 0x8C, 0x91, 0x92, 0x30, 0x6F, 0xDB };
const char ExponentCON[0x4] = { 0x00, 0x01, 0x00, 0x01 };
const char PCON[0x40] = { 0xCC, 0xE7, 0x5D, 0xFE, 0x72, 0xB6, 0xFD, 0xE7, 0x1D, 0xE3, 0x1A, 0x0E, 0xAC, 0x33, 0x7A, 0xB9, 0x21, 0xE8, 0x8A, 0x84, 0x9B, 0xDA, 0x9F, 0x1E, 0x58, 0x34, 0x68, 0x7A, 0xB1, 0x1D, 0x7E, 0x1C, 0x18, 0x52, 0x65, 0x7B, 0x97, 0x8E, 0xA7, 0x6A, 0x9D, 0xEE, 0x5A, 0x77, 0x52, 0x3B, 0x71, 0x8F, 0x33, 0xD0, 0x49, 0x5E, 0xC3, 0x30, 0x39, 0x72, 0x36, 0xBF, 0x1D, 0xD9, 0xF2, 0x24, 0xE8, 0x71 };
const char QCON[0x40] = { 0xCB, 0xCA, 0x58, 0x74, 0xD4, 0x03, 0x62, 0x93, 0x06, 0x50, 0x1F, 0x42, 0xF6, 0xAA, 0x59, 0x36, 0xA7, 0xA1, 0xF3, 0x97, 0x5C, 0x9A, 0xC8, 0x6A, 0x27, 0xCF, 0x85, 0x05, 0x2A, 0x66, 0x41, 0x6A, 0x7F, 0x2F, 0x84, 0xC8, 0x18, 0x13, 0xC6, 0x1D, 0x8D, 0xC7, 0x32, 0x2F, 0x72, 0x19, 0x3F, 0xA4, 0xED, 0x71, 0xE7, 0x61, 0xC0, 0xCF, 0x61, 0xAE, 0x8B, 0xA0, 0x68, 0xA7, 0x7D, 0x83, 0x23, 0x0B };
const char DPCON[0x40] = { 0x4C, 0xCA, 0x74, 0xE6, 0x74, 0x35, 0x72, 0x48, 0x58, 0x62, 0x11, 0x14, 0xE8, 0xA2, 0x4E, 0x5E, 0xED, 0x7F, 0x49, 0xD2, 0x52, 0xDA, 0x87, 0x01, 0x87, 0x4A, 0xF4, 0xD0, 0xEE, 0x69, 0xC0, 0x26, 0x65, 0x53, 0x13, 0xE7, 0x52, 0xB0, 0x4A, 0xBB, 0xE1, 0x3E, 0x3F, 0xB7, 0x32, 0x21, 0x46, 0xF8, 0xC5, 0x11, 0x4D, 0x3D, 0xEF, 0x66, 0xB6, 0x50, 0xC0, 0x85, 0xB5, 0x79, 0x45, 0x8F, 0x61, 0x71 };
const char InverseQCON[0x40] = { 0x28, 0x6A, 0xBB, 0xD1, 0x93, 0x95, 0x94, 0x1A, 0x6E, 0xED, 0xD7, 0x0E, 0xC0, 0x61, 0x2B, 0xC2, 0xEF, 0xE1, 0x86, 0x3D, 0x34, 0x12, 0x88, 0x6F, 0x94, 0xA4, 0x48, 0x6E, 0xC9, 0x87, 0x1E, 0x46, 0x00, 0x46, 0x00, 0x52, 0x8E, 0x9F, 0x47, 0xC0, 0x8C, 0xAB, 0xBC, 0x49, 0xAC, 0x5B, 0x13, 0xF2, 0xEC, 0x27, 0x8D, 0x1B, 0x6E, 0x51, 0x06, 0xA6, 0xF1, 0x62, 0x1A, 0xEB, 0x78, 0x2E, 0x88, 0x48 };
const char DCON[0x80] = { 0x9B, 0xF9, 0xDE, 0xC2, 0x45, 0x93, 0x4C, 0x4C, 0xAC, 0x48, 0x2B, 0xA8, 0x4D, 0xFC, 0xD7, 0xED, 0xB2, 0xFB, 0x72, 0xE9, 0xEA, 0xC1, 0x88, 0x39, 0x07, 0x2A, 0x6F, 0x34, 0x07, 0x81, 0x97, 0x7E, 0xCD, 0xFA, 0x21, 0x02, 0xF5, 0xDD, 0x30, 0xDD, 0x22, 0x4A, 0xB3, 0x41, 0xE5, 0x89, 0x80, 0x73, 0xC4, 0xAF, 0x90, 0x9E, 0x2B, 0x50, 0x8A, 0x0A, 0xD4, 0x6E, 0xBD, 0x0F, 0x15, 0x79, 0x37, 0x95, 0xE8, 0x3D, 0xCF, 0x4C, 0x6D, 0xFF, 0x51, 0x65, 0xE7, 0x90, 0xC1, 0xAC, 0x2D, 0xC6, 0xEB, 0x47, 0x19, 0x2D, 0xD0, 0x58, 0x74, 0x79, 0xAC, 0x08, 0x1C, 0xA3, 0x1D, 0xD0, 0xCE, 0x39, 0x2E, 0xC3, 0xFA, 0x66, 0xEF, 0xC7, 0x8E, 0x10, 0x2F, 0xE4, 0xA1, 0xE7, 0x4E, 0xA8, 0x42, 0xF0, 0xF4, 0xFD, 0x10, 0xA6, 0x67, 0x64, 0xCB, 0x3A, 0x6D, 0x4D, 0x51, 0xEC, 0x1F, 0x9D, 0x56, 0x26, 0xC2, 0xFC };
const char DQCON[0x40] = { 0xAF, 0xDC, 0x46, 0xE7, 0x52, 0x8A, 0x35, 0x47, 0xA1, 0x1C, 0x05, 0x4E, 0x39, 0x24, 0x99, 0xE6, 0x43, 0x54, 0xCB, 0xAB, 0xE3, 0xDB, 0x22, 0x76, 0x11, 0x32, 0xD0, 0x9C, 0xBB, 0x91, 0x10, 0x84, 0x81, 0x8B, 0x15, 0x2F, 0xC3, 0x2F, 0x55, 0x38, 0xED, 0xBF, 0x67, 0x3C, 0x70, 0x5E, 0xFF, 0x80, 0x28, 0xF3, 0xB1, 0x73, 0xB6, 0xFA, 0x7F, 0x56, 0x2B, 0xE1, 0xDA, 0x4E, 0x27, 0x4E, 0xC2, 0x2F };
// set the params
CryptoPP::AutoSeededRandomPool rng;
InvertibleRSAFunction params;
Integer integ(ModulusCON);
params.SetModulus(integ);
Integer integ1(ExponentCON);
params.SetPublicExponent(integ1);
Integer integ2(PCON);
params.SetPrime1(integ2);
Integer integ3(QCON);
params.SetPrime2(integ3);
Integer integ4(DPCON);
params.SetModPrime1PrivateExponent(integ4);
Integer integ5(InverseQCON);
params.SetMultiplicativeInverseOfPrime2ModPrime1(integ5);
Integer integ6(DCON);
params.SetPrivateExponent(integ6);
Integer integ7(DQCON);
params.SetModPrime2PrivateExponent(integ7);
// create the keys
RSA::PrivateKey privateKey(params);
RSA::PublicKey publicKey(params);
CryptoPP::RSASSA_PKCS1v15_SHA_Signer signer(privateKey);
unsigned char data[20] = { 0x6F, 0x9F, 0x07, 0x04, 0xE2, 0x1A, 0xF7, 0xB8, 0xB2, 0x4F, 0x8D, 0x66, 0x49, 0xA1, 0x09, 0xA7, 0xB2, 0x22, 0x3C, 0xF9 };
BYTE *signature = new BYTE[0x80];
signer.SignMessage(rng, data, 20, signature);
Based on what I know, Crypto++'s 'RSASSA_PKCS1v15_SHA_Signer' is what I want equivalent to C#'s 'RSAPKCS1SignatureFormatter' and setting the hash algorithm to SHA1.
The error it throws is:
Unhandled exception at at 0x7646B9BC in proj.exe: Microsoft C++ exception: > CryptoPP::PK_SignatureScheme::KeyTooShort at memory location 0x0040EF18.
Thanks for any help, Hetelek.
I can't answer as to why this works in C#, but as for Crypto++, there are a couple of issues.
Firstly, you're probably invoking the wrong constructor of Integer. I guess you want this one which converts from big-endian byte array:
Integer (const byte *encodedInteger, size_t byteCount, Signedness s=UNSIGNED)
but are using this one which convert from a string in base 2, 8, 10, or 16 (in your case base 10).
So, using ExponentCON as an example, you should do:
const int ExponentSize(0x4);
const byte ExponentCON[ExponentSize] = { 0x00, 0x01, 0x00, 0x01 };
Integer integ1(ExponentCON, ExponentSize);
or possibly better still:
std::string ExponentCON("101h"); // <-- Note trailing 'h' indicating hex encoding
Integer integ1(ExponentCON.c_str());
Next, the members of InvertibleRSAFunction must satisfy certain conditions in order to qualify as a valid key. These can be seen by stepping through the InvertibleRSAFunction::Validate function and I assume check for the conditions described here.
params.Validate(rng, 3); // Returns false for your input data
There are some helper functions which are designed to avoid having to set everything explicitly as you're doing. I don't know if these are suitable for you, but I'm referring to InvertibleRSAFunction::GenerateRandomWithKeySize and the overloaded InvertibleRSAFunction::Initialize.
The Crypto++ Wiki describes the use of these functions with example code.
TLDR: your private exponent, d, is wrong in your sample program. You have other, more obvious problems, too. But d is causing the mathematical problems.
And it begs the question, how did that work in C#? The math does not change across computer languages and runtimes.
The first problem is the declarations using chars:
...
const char ExponentCON[0x4] = { 0x00, 0x01, 0x00, 0x01 };
...
This causes you to use the constructors for ASCII strings, which is the wrong Integer constructor. It results in at least two issues:
// Added for testing
if (integ != integ2 * integ3)
{
cerr << "error: n != p*q" << endl;
}
// Added for testing
params.ThrowIfInvalid(prng, 3);
When running the program, ThrowIfInvalid validates the parameters and its the source of CryptoMaterial: ...:
error: n != p*q
CryptoMaterial: this object contains invalid values
The const char need to be changed to const byte. Then:
Integer integ(ModulusCON, sizeof(ModulusCON));
params.SetModulus(integ);
Integer integ1(ExponentCON, sizeof(ExponentCON));
params.SetPublicExponent(integ1);
Integer integ2(PCON, sizeof(PCON));
params.SetPrime1(integ2);
Integer integ3(QCON, sizeof(QCON));
params.SetPrime2(integ3);
...
With that change in place, this error message from above disappears: error: n != p*q. But the CryptoMaterial: this object contains invalid values remains.
The second problem appears to be some of your private key parameters are not correct. You can see what tests InvertibleRSAFunction::Validate is performing by looking at the source code in rsa.cpp near line 250.
You can partially side step it by using Initialize that takes {n,e,d}, and let Initialize factor and solve:
Integer n(ModulusCON, sizeof(ModulusCON));
Integer e(ExponentCON, sizeof(ExponentCON));
Integer d(DCON, sizeof(DCON));
params.Initialize(n,e,d);
params.ThrowIfInvalid(prng, 3);
However, that results in:
InvertibleRSAFunction: input is not a valid RSA private key
Next, check p and q. #include <nbtheory.h>, and then:
Integer p(PCON, sizeof(PCON));
Integer q(QCON, sizeof(QCON));
cout << "P is prime: " << (VerifyPrime(prng, p, 10) ? "yes" : "no") << endl;
cout << "Q is prime: " << (VerifyPrime(prng, q, 10) ? "yes" : "no") << endl;
It results in:
$ ./test.exe
P is prime: yes
Q is prime: yes
At this point, it appears the problem is with one of the exponents.
Next, assume e is relatively prime to phi, #include <modarith.h>, and then:
Integer n(ModulusCON, sizeof(ModulusCON));
Integer p(PCON, sizeof(PCON));
Integer q(QCON, sizeof(QCON));
Integer e(ExponentCON, sizeof(ExponentCON));
Integer phi = (p-1)*(q-1);
ModularArithmetic ma(phi);
Integer d = ma.MultiplicativeInverse(e);
params.Initialize(n,e,d);
params.ThrowIfInvalid(prng, 3);
cout << "Validated parameters" << endl;
It results in:
$ ./test.exe
Validated parameters
So, the problem is with your d exponent. It is easy enough to print with cout << std::hex << d << endl:
$ ./test.exe
Validated parameters
174bcc91cc08400b470a9357e7fd23db2384e4219af4dedc56a0afdc3e796abd965f16c680954549
b4526f01a2c9d7b727620f3ba6c848f19bd9210650ae62593249d7a4e0522ad48aea559d4f6a1f3f
6ae9953bed7c947e273d455a4982523dd90640b0a25572ae8c5e064e39807ddf778af96afd492999
acc40919a4d4f601h
As you can see, the first octet of the calculated d is 0x17, but the first octet of the d you provided is 0x9B.
Also, you might want to do something like the following:
unsigned char data[20] = { 0x6F, 0x9F, 0x07, 0x04, 0xE2, 0x1A, 0xF7, 0xB8, 0xB2, 0x4F,
0x8D, 0x66, 0x49, 0xA1, 0x09, 0xA7, 0xB2, 0x22, 0x3C, 0xF9 };
const size_t req = signer.MaxSignatureLength(sizeof(data));
byte signature[req];
const size_t used = signer.SignMessage(prng, data, 20, signature);
// Trim 'signature' to 'used'