getline with unsigned char string - c++

So, I did some searching for why my compiler gives an error saying:
49 ~\C++\SHA-1\main.cpp invalid conversion from `unsigned char*' to `char*'
And I found out that you cannot convert between unsigned char to char because they are completely different types. So this lead me to the problem of needing a getline function for and unsigned char string in my code.
#include <iostream>
#include <stdint.h>
using namespace std;
uint32_t rotl( uint32_t value, int shift)
{
if ((shift &= sizeof(value)*8 - 1) == 0) return value;
return (value << shift) | (value >> (sizeof(value)*8 - shift));
}
uint32_t rotr( uint32_t value, int shift)
{
if ((shift &= sizeof(value)*8 - 1) == 0) return value;
return (value >> shift) | (value << (sizeof(value)*8 - shift));
}
int textInput();
int hexInput();
int binInput();
unsigned char message[64];
int SHA_1();
int main()
{
int selection;
cout<<"Select Input type:\n\n\t1. Text String\n\t2. Hex String\n\t3. Binary String\n";
cin>>selection;
cin.ignore();
switch(selection)
{
case 1: textInput(); break;
case 2: hexInput(); break;
case 3: binInput(); break;
}
SHA_1();
cout<<"\ndone";
cin.get();
return 0;
}
int textInput()
{
unsigned char input[63] = {0};
cout<<"Enter a text string to be hashed\n\n";
cin.getline(input, 62, '\n');
cin.ignore();
for(int x = 0; x <= 63; x++)
{
//cout<<x<<"\n";
if (input[x] == 0x00)
{
message[x] = 0x00000080;
message[63] = x; //This might be wrong.
//cout<<std::hex<<message;
break;
}
else message[x] = input[x];
}
return 0;
}
int hexInput()
{
return 0;
}
int binInput()
{
return 0;
}
int SHA_1()
{
uint32_t h0 = 0x67452301;
uint32_t h1 = 0xEFCDAB89;
uint32_t h2 = 0x98BADCFE;
uint32_t h3 = 0x10325476;
uint32_t h4 = 0xC3D2E1F0;
uint32_t a;
uint32_t b;
uint32_t c;
uint32_t d;
uint32_t e;
uint32_t f;
uint32_t k;
uint32_t temp;
uint32_t w[80];
/*for( int m = 0; m <= 63; m++)
{
cout<<"message["<<m<<"]="<<std::hex<<int(message[m])<<std::dec<<"\n";
}*/
for( int i = 0; i <= 15; i++)
{
w[i] = ((message[(i*4)] << 24) | (message[(i*4) + 1] << 16) | (message[(i*4) + 2] << 8) | (message[(i*4) + 3]));
//cout<<"W["<<i<<"]="<<std::hex<<w[i]<<std::dec<<"\n";
}
for( int i = 16; i <= 79; i++)
{
w[i] = rotl((w[i - 3] ^ w[i - 8] ^ w[i - 14] ^ w[i - 16]), 1);
}
a = h0;
b = h1;
c = h2;
d = h3;
e = h4;
for(int iteration = 0; iteration <= 79; iteration++)
{
if((0 <= iteration) && (iteration <= 19))
{
f = ((b & c) | ((~b) & d));
k = 0x5A827999;
}
else if((20 <= iteration) && (iteration <= 39))
{
f = (b ^ c ^ d);
k = 0x6ED9EBA1;
}
else if((40 <= iteration) && (iteration <= 59))
{
f = ((b & c) | (b & d) | (c & d));
k = 0x8F1BBCDC;
}
else if((60 <= iteration) && (iteration <= 79))
{
f = (b ^ c ^ d);
k = 0xCA62C1D6;
}
temp = (rotl( a, 5) + f + e + k + w[iteration]);
e = d;
d = c;
c = rotl( b, 30);
b = a;
a = temp;
}
h0 = h0 + a;
h1 = h1 + b;
h2 = h2 + c;
h3 = h3 + d;
h4 = h4 + e;
cout<<hex<<h0<<" "<<h1<<" "<<h2<<" "<<h3<<" "<<h4;
return 0;
}
If anyone could give me some suggestions that would be helpful.
Thanks

This is related to strict-aliasing constraints: you're not permitted to reference an unsigned char array via a char pointer.
#include <iostream>
using namespace std;
void aliasing_fun(char* arr) {
arr[0] = 42;
}
int main() {
unsigned char arr[10] = {0};
aliasing_fun(arr); // Not allowed
return 0;
}
http://ideone.com/r4OVZi
You might cast your array to char* in order to do that.

Cast your array to (char *) when calling getline:
getline((char *)input, ...);

Related

& operator for bitwise programming returns wrong value (CPP)

I have this piece of code in cpp on Visual Studio
((handrule1 - maskRule1[0]) & test)
All of the variables are unsigned int.
Their values are respectively
66848250
50138096
0x80808080.
I keep getting value zero as the outcome for this line, which should not be possible.
How does this come?
I already tried working with long unsigned variables instead.
I am guessing that maybe I am doing something else wrong when choosing the data types.
Underneath you can find my full code.
Some of the variables are not defined but that's because they are already defined in another cpp-file we are not supposed to use.
void init(void) {
int aa, ab, l, x = 0;
for (int i = 4; i <= 13; i++) {
aa = 13 - i;
for (int j = (aa + 2) / 3; j <= i && j <= aa; j++) {
ab = aa - j;
for (int k = (ab + 1) / 2; k <= j && k <= ab; k++) {
adj[x] = i + j - 8;
l = ab - k; code[x++] = (((i - 4) * 7) + j) * 5 + k;
// printf("%d %d %d %d: %d\n", i, j, k, l, (((i-4)*7)+j)*5+k);
}
}
}
return;
}
char countSetBits(long long unsigned n)
{
if (n == 0)
return 0;
else
return 1 + countSetBits(n & (n - 1));
}
void init_set(void) {
long long unsigned hand = 0;
char honorPoints = 0;
char nrOfSpades = 0;
char nrOfHearts = 0;
char nrOfDiamonds = 0;
char nrOfClubs = 0;
long long unsigned maskAces = 0x8004002001;
long long unsigned maskKings = 0x10008004002;
long long unsigned maskQueens = 0x20010008004;
long long unsigned maskJacks = 0x40020010008;
long long unsigned maskSpades = 0x1FFF;
long long unsigned maskHearts = 0x3FFE000;
long long unsigned maskDiamonds = 0x7FFC000000;
long long unsigned maskClubs = 0xFFF8000000000;
char upperbound[RU][4];
char lowerbound[RU][4];
unsigned int handrule1 = 0;
unsigned int handrule2 = 0;
unsigned int handrule3 = 0;
unsigned int maskRule1[RU];
unsigned int maskRule2[RU];
unsigned int maskRule3[RU];
unsigned int maskInverse = 0x00FF00FF;
unsigned int test = 0x80808080;
unsigned int result1 = 0;
bool applicableRule = false;
unsigned int fuck = 0xFF936636;
result1 = fuck & test;
for (int r = 0; r < nrr; r++)
{
for (int i = 0; i < 4; i++)
{
upperbound[r][i] = 13;
lowerbound[r][i] = 0;
}
}
for (int r = 0; r < nrr; r++)
{
if (res[r] != 0)
{
for (int i = 0; i < res[r]; i++)
{
upperbound[r][color[r][i]] = (char) nru[r][i];
lowerbound[r][color[r][i]] = (char) nrl[r][i];
}
}
maskRule1[r] = (((char) distl[r] << 24) | ((char) distu[r] << 16) | ((char) ahpl[r] << 8) | (char) ahpu[r]) ^ maskInverse;
maskRule2[r] = ((lowerbound[r][0] << 24) | (upperbound[r][0] << 16) | (lowerbound[r][1] << 8) | upperbound[r][1]) ^ maskInverse;
maskRule3[r] = ((lowerbound[r][2] << 24) | (upperbound[r][2] << 16) | (lowerbound[r][3] << 8) | upperbound[r][3]) ^ maskInverse;
}
int x[52], y, a;
for (int i = 0; i < 52; i++) x[i] = i;
srand(1);
for (int i = 0; i < CRD; i++) {
for (int j = 52; --j > 1;) {
y = rand() % j;
a = x[y]; x[y] = x[j]; x[j] = a;
}
for (int j = 0; j < 4; j++)
{
for (int k = 13 * j; k < 13 * (j + 1); k++)
{
hand |= 1LLU << x[k];
}
//Counting honorpoints
honorPoints = (countSetBits(hand & maskAces) * 4) + (countSetBits(hand & maskKings) * 3) + (countSetBits(hand & maskQueens) * 2) + (countSetBits(hand & maskJacks) * 1);
hp[i][j] = (char) honorPoints;
honorPoints = 0;
//Counting distributions
nrOfSpades = countSetBits(hand & maskSpades);
nrOfHearts = countSetBits(hand & maskHearts);
nrOfDiamonds = countSetBits(hand & maskDiamonds);
nrOfClubs = countSetBits(hand & maskClubs);
std::array<char, 4> arrayTest = { nrOfSpades, nrOfHearts, nrOfDiamonds, nrOfClubs };
std::sort(arrayTest.begin(), arrayTest.end());
char p = arrayTest[3];
char o = arrayTest[2];
char m = arrayTest[1];
int test = (((p - 4) * 7) + o) * 5 + m;
for (int x = 0; x < 39; x++)
{
if (code[x] == test)
{
dis[i][j] = (char) x;
}
}
//Counting opening bids
ahp[i][j] = hp[i][j] + adj[dis[i][j]];
if (ahp[i][j] < 0) ahp[i][j] = 0;
handrule1 = ((dis[i][j] << 24) | (dis[i][j] << 16) | (ahp[i][j] << 8) | ahp[i][j]) ^ maskInverse;
handrule2 = ((nrOfSpades << 24) | (nrOfSpades << 16) | (nrOfHearts << 8) | nrOfHearts) ^ maskInverse;
handrule3 = ((nrOfDiamonds << 24) | (nrOfDiamonds << 16) | (nrOfClubs << 8) | nrOfClubs) ^ maskInverse;
printf("%u \n", handrule1);
printf("%u \n", maskRule1[0]);
for (int r = 0; r < nrr; r++)
{
if ((((handrule1 - maskRule1[r]) & test) == 0) && (((handrule2 - maskRule2[r]) & test) == 0) && (((handrule3 - maskRule3[r]) & test) == 0))
{
cnt[bid[r]][j]++;
applicableRule = true;
break;
}
}
if (applicableRule == false)
{
cnt[0][j]++;
}
applicableRule = false;
handrule1 = 0;
handrule2 = 0;
handrule3 = 0;
nrOfSpades = 0;
nrOfHearts = 0;
nrOfDiamonds = 0;
nrOfClubs = 0;
hand = 0;
}
}
return;
}

Implicit conversion or cast?

I have a function that interleaves the bits of 32 bit words and returns a 64 bit result. For this simple test case, the bottom 3 bytes are correct, and the contents of the top 5 bytes are incorrect. intToBin_32 and intToBin_64 are convenience functions to see the binary representation of the arguments and return val. I've placed casts from the 32 bit type to the 64 bit type everywhere I think they are needed, but I'm still seeing this unexpected (to me, at least) behavior. Is there an implicit conversion going on here, or is there some other reason this doesn't work correctly?
#include <stdint.h>
#include <stdio.h>
struct intString_32 {char bstr [32 + 1 + 8];};
struct intString_64 { char bstr [64 + 1 + 8];};
intString_32 intToBin_32(int a)
{
intString_32 b;
for (int i = 0; i < 8; i++)
{
for (int j = 0; j < 5; j++)
{
if (j != 4)
{
b.bstr[5*i + j] = * ((a & (1 << (31 - (4*i + j)))) ? "1" : "0");
}
else
{
b.bstr[5*i + j] = 0x20;
}
}
}
b.bstr[40] = * ( "\0" );
return b;
}
intString_64 intToBin_64(long long a)
{
intString_64 b;
for (int i = 0; i < 8; i++)
{
for (int j = 0; j < 9; j++)
{
if (j != 8)
{
b.bstr[9*i + j] = * ((a & (1 << (63 - (8*i + j)))) ? "1" : "0");
}
else
{
b.bstr[9*i + j] = 0x20;
}
}
}
b.bstr[72] = * ( "\0" );
return b;
}
uint64_t interleaveBits(unsigned int a, unsigned int b)
{
uint64_t retVal = 0;
for (unsigned int i = 0; i < 32; i++)
{
retVal |= (uint64_t)((uint64_t)((a >> i) & 0x1)) << (2*i);
retVal |= (uint64_t)((uint64_t)((b >> i) & 0x1)) << (2*i + 1);
}
return retVal;
}
int main(int arc, char* argv)
{
unsigned int foo = 0x0004EDC7;
unsigned int bar = 0x5A5A00FF;
uint64_t bat = interleaveBits(foo, bar);
printf("foo: %s \n", intToBin_32(foo).bstr);
printf("bar: %s \n", intToBin_32(bar).bstr);
printf("bat: %s \n\n", intToBin_64(bat).bstr);
}
Through debugging I noticed it's your intToBin_64 which is wrong, to be specific, in this line:
b.bstr[9*i + j] = * ((a & (1 << (63 - (8*i + j)))) ? "1" : "0");
take a closer look on the shift:
(1 << (63 - (8*i + j)))
The literal 1 is a integer, however, shifting a integer by more than 31 bits is undefined behavior. Shift a longlong instead:
b.bstr[9*i + j] = * ((a & (1ll << (63 - (8*i + j)))) ? "1" : "0");

Convert string of hexadecimal to decimal in c

I am writing an operating system in C and assembly, and in implementing the EXT2 file system I have encountered a problem. I need to convert FOUR bytes of hexadecimal to decimal in c. An example would be to convert 00 00 01(10000) to 65536.I need to convert to decimal,because parsing the super block requires all values to be in decimal. Most specifically the ext2 fs I'm working on is here:
#include "ext2.h"
#include <stdlib.h>
long hex2dec(unsigned const char *hex){
long ret = 0;
int i = 0;
while(hex[i] != 0){
//if(hex[i] >= 0x00 && hex[i] <= 0x09)
// ret+=(10 * i) * hex[i];
}
//kprintf("\n");
return ret;
}
char *strsep(char *buf,int offset,int num){
char *ret = malloc(1024);
int j = 0;
int i = offset;
int end = (offset + num);
int i1 = 0;
while(i1 < num){
///kstrcat(ret,&buf[i]);
ret[i1] = buf[i];
i++;
i1++;
}
return ret;
}
int get_partition(partnum){
if(partnum > 4)
return -1;
//int i = (12 * partnum);
int i = 0;
if(partnum == 1)
i = 190;
else if(partnum == 2)
i = 206;
else if(partnum == 3)
i = 222;
else
i = 190;
int ret = 0;
char *buf = malloc(1024);
ata_read_master(buf,1,0x00);
ret = buf[(i + 2)];
return ret;
}
int _intlen(int i){
int ret = 0;
while(i){
ret++;
i/=10;
}
return ret;
}
int _hex2int(char c){
if(c == '0')
return 0;
else if(c == '1')
return 1;
else if(c == '2')
return 2;
else if(c == '3')
return 3;
else if(c == '4')
return 4;
else if(c == '5')
return 5;
else if(c == '6')
return 6;
else if(c == '7')
return 7;
else if(c == '8')
return 8;
else if(c == '9')
return 9;
else if(c == 'A')
return 10;
else if(c == 'B')
return 11;
else if(c == 'C')
return 12;
else if(c == 'D')
return 13;
else if(c == 'E')
return 14;
else if(c == 'F')
return 15;
}
int hex2int(char c){
int i = c;
}
int comb(const char *str,int n){
int i = 0;
int ret = 0;
while(i < n){
//if(str[i] == 0x01)
// kprintf("(:");
/*int j = str[i];
int k = 0;
int m = 0;
if(j < 10)
j*=10;
else
while(j > 0){
k+=(10 ^ (_intlen(j) - m)) * j % 10;
m++;
j/=10;
}
//kprintf("%d",j);
//if(j == 1)
// kprintf("(:");*/
i++;
}
//ret = (char)ret;
ret = (char)str
int ret = 0;
int i = 0;
char *s = malloc(1024);
/*while(i < n){
//kstrcat(s,&((char*)buf[i]));
n++;
}*/
return ret;
//kprintf("\n");
//return ret;
}
struct ext2_superblock *parse_sblk(int partnum){
int i = get_partition(partnum);
if(i > 0)
kprintf("[EXT2_SUPERBLOCK]Found partition!\n");
else
i = 0;
struct ext2_superblock *ret;
struct ext2_superblock retnp;
char *buf = malloc(1024);
int i1 = 0;
//char *tmpbuf = malloc(4);
/*if(i != 0)
ata_read_master(buf,((i * 4)/256),0x00);
else{
kprintf("[WRN]: Looking for superblock at offset 1024\n");
ata_read_master(buf,4,0x00);
}*/
ata_read_master(buf,2,0x00);
const char *cmp = strsep(buf,0,4);
retnp.ninode = comb(strsep(buf,0,4),4);
retnp.nblock = comb(strsep(buf,4,4),4);
retnp.nsblock = comb(strsep(buf,8,4),4);
retnp.nunallocb = comb(strsep(buf,12,4),4);
retnp.nunalloci = comb(strsep(buf,16,4),4);
retnp.supernum = comb(strsep(buf,20,4),4);
retnp.leftshiftbs = comb(strsep(buf,24,4),4);
retnp.leftshiftfs = comb(strsep(buf,28,4),4);
retnp.numofblockpg= comb(strsep(buf,32,4),4);
// retnp.numofffpbg= comb(strsep(buf,36,4));
retnp.numoffpbg = comb(strsep(buf,36,4),4);
retnp.numofinpbg = comb(strsep(buf,40,4),4);
retnp.lastmount = comb(strsep(buf,44,4),4);
retnp.lastwrite = comb(strsep(buf,48,4),4);
retnp.fsckpass = comb(strsep(buf,52,2),2);
retnp.fsckallow = comb(strsep(buf,54,2),2);
retnp.sig = comb(strsep(buf,56,2),2);
retnp.state = comb(strsep(buf,58,2),2);
retnp.erroropp = comb(strsep(buf,60,2),2);
retnp.minorpor = comb(strsep(buf,52,2),2);
retnp.ptimefsck = comb(strsep(buf,64,4),4);
retnp.inter = comb(strsep(buf,68,4),4);
retnp.osid = comb(strsep(buf,72,4),4);
retnp.mpv = comb(strsep(buf,76,4),4);
retnp.uid = comb(strsep(buf,80,2),2);
retnp.gid = comb(strsep(buf,82,2),2);
ret = &retnp;
return ret;
i1 = 0;
}
If there is anyway of avoiding conversion and successfully implementing ext2 I would be glad to hear it. I would prefer it to be in c,but assembly is also okay.
If you have this:
const uint8_t bytes[] = { 0, 0, 1 };
and you want to consider that the bytes of a (24-bit) unsigned integer in little-endian order, you can convert to the actual integer using:
const uint32_t value = ((uint32_t) bytes[2] << 16) | (bytes[1] << 8) | bytes[0];
This will set value equal to 65536.
You can use std::istringstream or sscanf instead of writing your own.
char const * hex_text[] = "0x100";
const std::string hex_str(hex_text);
std::istringstream text_stream(hex_str);
unsigned int value;
text_stream >> std::ios::hex >> value;
std::cout << "Decimal value of 0x100: " << value << "\n";
Or using sscanf:
sscanf(hex_text, "0x%X", &value);
std::cout << "Decimal value of 0x100: " << value << "\n";
A good idea is to search your C++ reference for existing functions or search the internet, before writing your own.
To roll your own:
unsigned int hex2dec(const std::string& hex_text)
{
unsigned int value = 0U;
const unsigned int length = hex_text.length();
for (unsigned int i = 0; i < length; ++i)
{
const char c = hex_text[i];
if ((c >= '0') && (c <= '9'))
{
value = value * 16 + (c - '0');
}
else
{
c = toupper(c);
if ((c >= 'A') && (c <= 'Z'))
{
value = value * 16 + (c - 'A') + 10;
}
}
}
return value;
}
To convert to use C-style character strings, change the parameter type and use strlen for the length.

CUDA not returning result

I am trying to make a fraction calculator that calculates on a cuda devise, below is first the sequential version and then my try for a parallel version.
It runs without error, but for some reason do it not give the result back, I have been trying to get this to work for 2 weeks now, but can’t find the error!
Serilized version
int f(int x, int c, int n);
int gcd(unsigned int u, unsigned int v);
int main ()
{
clock_t start = clock();
srand ( time(NULL) );
int x = 1;
int y = 2;
int d = 1;
int c = rand() % 100;
int n = 323;
if(n % y == 0)
d = y;
while(d == 1)
{
x = f(x, c, n);
y = f(f(y, c, n), c, n);
int abs = x - y;
if(abs < 0)
abs = abs * -1;
d = gcd(abs, n);
if(d == n)
{
printf("\nd == n");
c = 0;
while(c == 0 || c == -2)
c = rand() % 100;
x = 2;
y = 2;
}
}
int d2 = n/d;
printf("\nTime elapsed: %f", ((double)clock() - start) / CLOCKS_PER_SEC);
printf("\nResult: %d", d);
printf("\nResult2: %d", d2);
int dummyReadForPause;
scanf_s("%d",&dummyReadForPause);
}
int f(int x, int c, int n)
{
return (int)(pow((float)x, 2) + c) % n;
}
int gcd(unsigned int u, unsigned int v){
int shift;
/ * GCD(0,x) := x * /
if (u == 0 || v == 0)
return u | v;
/ * Let shift := lg K, where K is the greatest power of 2
dividing both u and v. * /
for (shift = 0; ((u | v) & 1) == 0; ++shift) {
u >>= 1;
v >>= 1;
}
while ((u & 1) == 0)
u >>= 1;
/ * From here on, u is always odd. * /
do {
while ((v & 1) == 0) / * Loop X * /
v >>= 1;
/ * Now u and v are both odd, so diff(u, v) is even.
Let u = min(u, v), v = diff(u, v)/2. * /
if (u < v) {
v -= u;
} else {
int diff = u - v;
u = v;
v = diff;
}
v >>= 1;
} while (v != 0);
return u << shift;
}
parallel version
#define threads 512
#define MaxBlocks 65535
#define RunningTheads (512*100)
__device__ int gcd(unsigned int u, unsigned int v)
{
int shift;
if (u == 0 || v == 0)
return u | v;
for (shift = 0; ((u | v) & 1) == 0; ++shift) {
u >>= 1;
v >>= 1;
}
while ((u & 1) == 0)
u >>= 1;
do {
while ((v & 1) == 0)
v >>= 1;
if (u < v) {
v -= u;
} else {
int diff = u - v;
u = v;
v = diff;
}
v >>= 1;
} while (v != 0);
return u << shift;
}
__device__ bool cuda_found;
__global__ void cudaKernal(int *cArray, int n, int *outr)
{
int index = blockIdx.x * threads + threadIdx.x;
int x = 1;
int y = 2;
int d = 4;
int c = cArray[index];
while(d == 1 && !cuda_found)
{
x = (int)(pow((float)x, 2) + c) % n;
y = (int)(pow((float)y, 2) + c) % n;
y = (int)(pow((float)y, 2) + c) % n;
int abs = x - y;
if(abs < 0)
abs = abs * -1;
d = gcd(abs, n);
}
if(d != 1 && !cuda_found)
{
cuda_found = true;
outr = &d;
}
}
int main ()
{
int n = 323;
int cArray[RunningTheads];
cArray[0] = 1;
for(int i = 1; i < RunningTheads-1; i++)
{
cArray[i] = i+2;
}
int dresult = 0;
int *dev_cArray;
int *dev_result;
HANDLE_ERROR(cudaMalloc((void**)&dev_cArray, RunningTheads*sizeof(int)));
HANDLE_ERROR(cudaMalloc((void**)&dev_result, sizeof(int)));
HANDLE_ERROR(cudaMemcpy(dev_cArray, cArray, RunningTheads*sizeof(int), cudaMemcpyHostToDevice));
int TotalBlocks = ceil((float)RunningTheads/(float)threads);
if(TotalBlocks > MaxBlocks)
TotalBlocks = MaxBlocks;
printf("Blocks: %d\n", TotalBlocks);
printf("Threads: %d\n\n", threads);
cudaKernal<<<TotalBlocks,threads>>>(dev_cArray, n, dev_result);
HANDLE_ERROR(cudaMemcpy(&dresult, dev_result, sizeof(int), cudaMemcpyDeviceToHost));
HANDLE_ERROR(cudaFree(dev_cArray));
HANDLE_ERROR(cudaFree(dev_result));
if(dresult == 0)
dresult = 1;
int d2 = n/dresult;
printf("\nResult: %d", dresult);
printf("\nResult2: %d", d2);
int dummyReadForPause;
scanf_s("%d",&dummyReadForPause);
}
Lets have a look at your kernel code:
__global__ void cudaKernal(int *cArray, int n, int *outr)
{
int index = blockIdx.x * threads + threadIdx.x;
int x = 1;
int y = 2;
int d = 4;
int c = cArray[index];
while(d == 1 && !cuda_found) // always false because d is always 4
{
x = (int)(pow((float)x, 2) + c) % n;
y = (int)(pow((float)y, 2) + c) % n;
y = (int)(pow((float)y, 2) + c) % n;
int abs = x - y;
if(abs < 0)
abs = abs * -1;
d = gcd(abs, n); // never writes to d because the loop won't
// be executed
}
if(d != 1 && !cuda_found) // maybe true if cuda_found was initalized
// with false
{
cuda_found = true; // Memory race here.
outr = &d; // you are changing the adresse where outr
// points to; the host code does not see this
// change. your cudaMemcpy dev -> host will copy
// the exact values back from device that have
// been uploaded by cudaMemcpy host -> dev
// if you want to set outr to 4 than write:
// *outr = d;
}
}
One of the problems is you don't return the result. In your code you just change outr which has local scope in your kernel function (i.e. changes are not seen outside this function). You should write *outr = d; to change the value of memory you're pointing with outr.
and I'm not sure if CUDA initializes global variables with zero. I mean are you sure cuda_found is always initialized with false?

A memory-efficient SHA1 implementation

I'm working with a very restrictive embedded processor, which only has 128 bytes of ram. I'd like to implement SHA1 on it. RFC3174 describes, in 'method 2', a way of implementing SHA1 that doesn't require allocating an array of 80 32-bit words (which, at 320 bytes, is obviously not practical), and seems like it ought to be usable on my processor. I'm unable to find any implementations of 'method 2', though, and the sample code in the RFC only implements the default method.
Is anyone aware of a memory-efficient implementation of SHA1 in C or C++?
You should be able to quickly adapt the method 1 source to method 2. The function to change is Sha1ProcessMessageBlock() in method 1. Initialize w[0:15] from message, then do a loop of 0 to 79, where you only do w[] manipulation after iteration 16, and temp calculation depends on ts value (0-19 uses one, 20-39 uses another, etc). The important thing to remember is using index%16 or index & 0x0f whenever you are addressing the w[] array.
A quick modification would be something like this (double check all accesses to w to make sure I haven't missed the t & 0x0f):
void SHA1ProcessMessageBlock(SHA1Context *context)
{
const uint32_t K[] = { /* Constants defined in SHA-1 */
0x5A827999,
0x6ED9EBA1,
0x8F1BBCDC,
0xCA62C1D6
};
int t; /* Loop counter */
uint32_t temp; /* Temporary word value */
uint32_t W[16]; /* Word sequence */
uint32_t A, B, C, D, E; /* Word buffers */
/*
* Initialize the first 16 words in the array W. You can move this to your
* context.
*/
for(t = 0; t < 16; t++)
{
W[t] = context->Message_Block[t * 4] << 24;
W[t] |= context->Message_Block[t * 4 + 1] << 16;
W[t] |= context->Message_Block[t * 4 + 2] << 8;
W[t] |= context->Message_Block[t * 4 + 3];
}
A = context->Intermediate_Hash[0];
B = context->Intermediate_Hash[1];
C = context->Intermediate_Hash[2];
D = context->Intermediate_Hash[3];
E = context->Intermediate_Hash[4];
for(t = 0; t < 80; t++) {
if (t >= 16) {
W[t&0xf] = SHA1CircularShift(1,W[(t-3)&0xf] ^ W[(t-8)&0xf] ^ W[(t-14)&0xf] ^ W[t&0xf]);
}
if (t<20) {
temp = SHA1CircularShift(5,A) +
((B & C) | ((~B) & D)) + E + W[t&0xf] + K[0];
}
else if (t<40) {
temp = SHA1CircularShift(5,A) + (B ^ C ^ D) + E + W[t&0xf] + K[1];
}
else if (t < 60) {
temp = SHA1CircularShift(5,A) +
((B & C) | (B & D) | (C & D)) + E + W[t&0xf] + K[2];
}
else {
temp = SHA1CircularShift(5,A) + (B ^ C ^ D) + E + W[t&0xf] + K[3];
}
E = D;
D = C;
C = SHA1CircularShift(30,B);
B = A;
A = temp;
}
context->Intermediate_Hash[0] += A;
context->Intermediate_Hash[1] += B;
context->Intermediate_Hash[2] += C;
context->Intermediate_Hash[3] += D;
context->Intermediate_Hash[4] += E;
context->Message_Block_Index = 0;
}
There are still savings to be made: get rid of W[] array on stack and put it in context pre-initialized with the data you get.
Also, you need a lot of pre-processing before calling this function. For example, if all your messages are less than 55 bytes, you can put it in W array, add padding, and process immediately. If not, you'll have to call process twice: first with your partially padded input, and again with the rest of the pad, etc. That sort of thing would be very application specific, and I doubt you'll be able to find the code to do it for you.
By the way, the code above is a straight adaptation from the type 1 source from your link. You can probably squeeze a bit more out of it if you try to optimize it further.
I couldn't think of a way to get any savings on the intermediate hash, so you will need a total of 108 bytes for this (109 if counter is also in RAM), and 24 of which is local to this function, and can be reused in other places - so long as they are also temporary. So it is very hard to do what you want to do.
EDIT: If all your messages are less than 55 bytes, you can save another 20 bytes in your context by getting rid of the intermediate_hash[] storage. Simply initialize A-E from the constants, and add the constants at the end. Finally, instead of storing them in a separate variable, overwrite your input when this function ends.
I have implemented SHA-1 for several memory-constrained environments. You can get by with
DWORD W[16] ; // instead of H[80]
DWORD H[5] ; // Intermediate hash value
DWORD BitCount[2] ; // Probably a single DWORD is enough here
plus a few bytes of housekeeping. W is updated on the fly, as a circular buffer, instead of being generated at the start of each round.
working example:
#include<iostream>
#include<stdio.h>
#include<stdlib.h>
#include<string>
using namespace std;
unsigned CircularShift(int bits, unsigned word)
{
return ((word << bits) & 0xFFFFFFFF) | ((word & 0xFFFFFFFF) >> (32-bits));
}
int main(void)
{
string mess;
cin >> mess;
unsigned int lm = mess.length();
unsigned int lmb = lm*8;
unsigned char *messc;
messc=(unsigned char*)malloc((sizeof(unsigned char))*64);
for (unsigned short int i =0;i<64;i++)
{
messc[i]=char(0x00);
}
for(int i=0;i<mess.length();i++)
{
messc[i]=mess[i];
}
messc[lm]=(unsigned char)128;
messc[56] = (lmb >> 24) & 0xFF;
messc[57] = (lmb >> 16) & 0xFF;
messc[58] = (lmb >> 8) & 0xFF;
// messc[59] = (lmb) & 0xFF;
messc[60] = (lmb >> 24) & 0xFF;
messc[61] = (lmb >> 16) & 0xFF;
messc[62] = (lmb >> 8) & 0xFF;
messc[63] = (lmb) & 0xFF;
for(int i =0 ;i<64;i++)
{
cout<< hex << (int)messc[i] << " ";
}
unsigned *H;
H=(unsigned*)malloc(5*sizeof(unsigned));
H[0] = 0x67452301;
H[1] = 0xEFCDAB89;
H[2] = 0x98BADCFE;
H[3] = 0x10325476;
H[4] = 0xC3D2E1F0;
const unsigned K[]={0x5A827999,0x6ED9EBA1,0x8F1BBCDC,0xCA62C1D6};
int t;
unsigned temp;
unsigned *W;
unsigned A, B, C, D, E;
W=(unsigned*)malloc(80*sizeof(unsigned));
unsigned char *messh;
messh=(unsigned char*)malloc(64*sizeof(unsigned char));
int k;
for(t = 0; t < 16; t++)
{
W[t] = ((unsigned) messc[t * 4])<< 24; ;
W[t] |= ((unsigned) messc[t * 4 + 1])<< 16;
W[t] |= ((unsigned) messc[t * 4 + 2]) << 8;
W[t] |= ((unsigned) messc[t * 4 + 3]);
}
for(t = 16; t < 80; t++)
{
W[t] = CircularShift(1,W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16]);
}
A = H[0];
B = H[1];
C = H[2];
D = H[3];
E = H[4];
for(t = 0; t < 20; t++)
{
temp = CircularShift(5,A) + ((B & C) | ((~B) & D)) + E + W[t] + K[0];
temp &= 0xFFFFFFFF;
E = D;
D = C;
C = CircularShift(30,B);
B = A;
A = temp;
}
for(t = 20; t < 40; t++)
{
temp = CircularShift(5,A) + (B ^ C ^ D) + E + W[t] + K[1];
temp &= 0xFFFFFFFF;
E = D;
D = C;
C = CircularShift(30,B);
B = A;
A = temp;
}
for(t = 40; t < 60; t++)
{
temp = CircularShift(5,A) +
((B & C) | (B & D) | (C & D)) + E + W[t] + K[2];
temp &= 0xFFFFFFFF;
E = D;
D = C;
C = CircularShift(30,B);
B = A;
A = temp;
}
for(t = 60; t < 80; t++)
{
temp = CircularShift(5,A) + (B ^ C ^ D) + E + W[t] + K[3];
temp &= 0xFFFFFFFF;
E = D;
D = C;
C = CircularShift(30,B);
B = A;
A = temp;
}
H[0] = (H[0] + A) & 0xFFFFFFFF;
H[1] = (H[1] + B) & 0xFFFFFFFF;
H[2] = (H[2] + C) & 0xFFFFFFFF;
H[3] = (H[3] + D) & 0xFFFFFFFF;
H[4] = (H[4] + E) & 0xFFFFFFFF;
cout <<"\nTHIS IS SHHHHHAAAAAAAAAAA\n";
for(int i=0;i<5;i++)
{
cout << hex << H[i] << " ";
}
//Message_Block_Index = 0;
}
All things considered, looking at your requirements, I think you are going to have to change your specs. Either a bigger chip, or a simpler algorithm. Even implementing SHA-1 (without HMAC) would be a challenge, but it should be doable.