high performance calculations and saving of the threads identificators - c++

I write grid-stride loop to have High Performance Calculations, where large N, for example long long N 1<<36, or even more. From total grid I need only some indexes, which have to satisfy the define condition.
__global__ void Indexes(int *array, int N) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
while( index<N)
{
if (condition)
{....//do something to save index in array}
index += blockDim.x * gridDim.x;
}
}
Of course, it is possible use the Thrust, which allows to have both host and device arrays. But in this case obviously the calculation will be extremely ineffective, because need firstly to create a lot of non-needed elements, then to delete these.
What is the most effective way to save the indexes directly in array in device to pass in CPU?

If your output is relatively dense (i.e. a lot of indices and relatively few zeros), then the stream compaction approach suggested in comments is a good solution. There are a lot of ready-to-go stream compaction implementations which you can probably adapt to your purposes.
If your output is sparse, so you need to save relatively few indices for a lot of inputs, then stream compaction isn't such a great solution because it will waste a lot of GPU memory. In that case (and you can roughly estimate an upper bound of the number of output indices) something like this:
template <typename T>
struct Array
{
T* p;
int Nmax;
int* next;
Array() = default;
__host__ __device__
Array(T* _p, int _Nmax, int* _next) : p(_p), Nmax(_Nmax), next(_next) {};
__device__
int append(T& val)
{
int pos = atomicAdd(next, 1);
if (pos > Nmax) {
atomicExch(next, Nmax);
return -1;
} else {
p[pos] = val;
return pos;
}
};
};
is probably more appropriate. Here, the idea is to use an atomically incremented position in the output array to keep track of where a thread should store its index. The code will signal if you fill the index array, and there will be information from which you can work out a restart strategy to stop the current kernel and then start from the last known index which you were able to store.
A complete example:
$ cat append.cu
#include <iostream>
#include <thrust/device_ptr.h>
#include <thrust/device_vector.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/copy.h>
namespace AppendArray
{
template <typename T>
struct Array
{
T* p;
int Nmax;
int* next;
Array() = default;
__host__ __device__
Array(T* _p, int _Nmax, int* _next) : p(_p), Nmax(_Nmax), next(_next) {};
__device__
int append(T& val)
{
int pos = atomicAdd(next, 1);
if (pos > Nmax) {
atomicExch(next, Nmax);
return -1;
} else {
p[pos] = val;
return pos;
}
};
};
}
__global__
void kernelfind(int* input, int N, AppendArray::Array<int> indices)
{
int idx = threadIdx.x + blockIdx.x * blockDim.x;
for(; idx < N; idx += gridDim.x*blockDim.x) {
if (input[idx] % 10000 == 0) {
if (indices.append(idx) < 0) return;
}
}
}
int main()
{
const int Ninputs = 1 << 20;
thrust::device_vector<int> inputs(Ninputs);
thrust::counting_iterator<int> vals(1);
thrust::copy(vals, vals + Ninputs, inputs.begin());
int* d_input = thrust::raw_pointer_cast(inputs.data());
int Nindices = Ninputs >> 12;
thrust::device_vector<int> indices(Nindices);
int* d_indices = thrust::raw_pointer_cast(indices.data());
int* pos; cudaMallocManaged(&pos, sizeof(int)); *pos = 0;
AppendArray::Array<int> index(d_indices, Nindices-1, pos);
int gridsize, blocksize;
cudaOccupancyMaxPotentialBlockSize(&gridsize, &blocksize, kernelfind, 0, 0);
kernelfind<<<gridsize, blocksize>>>(d_input, Ninputs, index);
cudaDeviceSynchronize();
for(int i = 0; i < *pos; ++i) {
int idx = indices[i];
std::cout << i << " " << idx << " " << inputs[idx] << std::endl;
}
return 0;
}
$ nvcc -std=c++11 -arch=sm_52 -o append append.cu
$ ./append
0 9999 10000
1 19999 20000
2 29999 30000
3 39999 40000
4 49999 50000
5 69999 70000
6 79999 80000
7 59999 60000
8 89999 90000
9 109999 110000
10 99999 100000
11 119999 120000
12 139999 140000
13 129999 130000
14 149999 150000
15 159999 160000
16 169999 170000
17 189999 190000
18 179999 180000
19 199999 200000
20 209999 210000
21 219999 220000
22 239999 240000
23 249999 250000
24 229999 230000
25 279999 280000
26 269999 270000
27 259999 260000
28 319999 320000
29 329999 330000
30 289999 290000
31 299999 300000
32 339999 340000
33 349999 350000
34 309999 310000
35 359999 360000
36 379999 380000
37 399999 400000
38 409999 410000
39 369999 370000
40 429999 430000
41 419999 420000
42 389999 390000
43 439999 440000
44 459999 460000
45 489999 490000
46 479999 480000
47 449999 450000
48 509999 510000
49 539999 540000
50 469999 470000
51 499999 500000
52 569999 570000
53 549999 550000
54 519999 520000
55 589999 590000
56 529999 530000
57 559999 560000
58 619999 620000
59 579999 580000
60 629999 630000
61 669999 670000
62 599999 600000
63 609999 610000
64 699999 700000
65 639999 640000
66 649999 650000
67 719999 720000
68 659999 660000
69 679999 680000
70 749999 750000
71 709999 710000
72 689999 690000
73 729999 730000
74 779999 780000
75 799999 800000
76 809999 810000
77 739999 740000
78 849999 850000
79 759999 760000
80 829999 830000
81 789999 790000
82 769999 770000
83 859999 860000
84 889999 890000
85 879999 880000
86 819999 820000
87 929999 930000
88 869999 870000
89 839999 840000
90 909999 910000
91 939999 940000
92 969999 970000
93 899999 900000
94 979999 980000
95 959999 960000
96 949999 950000
97 1019999 1020000
98 1009999 1010000
99 989999 990000
100 1029999 1030000
101 919999 920000
102 1039999 1040000
103 999999 1000000

Related

How to extract data from Minecraft .mca files

I would like to generate a map for my own world, so I wrote a program try to analyze data from files in C++.
According to this page, the region file begins with a 4KB head which tells the positions of each chunk.
I wrote a program, but it outputs the wrong stuff.
This is my program
#include<bits/stdc++.h>
#include<fstream>
using namespace std;
char DataRaw[4100];
uint8_t DataUnsigned8[4100];
struct F4K{int pos,sz,dfn;} _chunk[40][40];
int main()
{
ifstream file("first4K.sample",ios::binary|ios::in|ios::ate);
ifstream::pos_type n=file.tellg();
freopen("offset.out","w",stdout);
file.seekg(0);
file.read((char*)(&DataRaw),n);
DataRaw[n]='\0';
file.close();
for (int i=0;i<n;i++)
DataUnsigned8[i+1]=uint8_t(DataRaw[i]);
for (int i=0;i<32;i++)
{
for (int j=0;j<32;j++)
{
int id=4*((i&31)+(j&31)*32);
_chunk[i][j].pos=(DataUnsigned8[id+1]<<16)|(DataUnsigned8[id+2]<<8)|(DataUnsigned8[id+3]);
_chunk[i][j].sz=DataUnsigned8[id+4];
}
}
cout<<" X Z offset sz\n-------------------"<<endl;
for (int i=0;i<32;i++)
{
for (int j=0;j<32;j++)
cout<<setw(2)<<i<<setw(3)<<j<<setw(9)<<_chunk[i][j].pos<<setw(4)<<_chunk[i][j].sz<<endl;
}
return 0;
}
And it outputs this
X Z offset sz
-------------------
0 0 2098751 32
0 1 2098252 2
0 2 139296 73
0 3 2113312 32
0 4 286978 32
0 5 2099058 3
0 6 2098275 2
0 7 139271 65
...
31 25 7602464 32
31 26 2556192 1
31 27 2105407 32
31 28 6546821 168
31 29 10927590 179
31 30 15109023 35
31 31 15315359 230
I expect that the offset is sorted and begins with 8192,but it was totally wrong! Some addressed (for example X:31,Y:31) is even bigger than the file size (The size is only 8,048,640 Bytes)
May anyone tell me why?

Periodic Latency Spikes in Writing to Shared Memory on Linux

I have the following code:
#pragma pack(4)
struct RECORD_HEADER {
uint64_t msgType;
uint64_t rdtsc;
};
struct BODY {
char content[488];
};
#pragma pack()
class SerializedRDTSC {
public:
typedef unsigned long long timeunit_t;
static timeunit_t start(void) {
unsigned cycles_high, cycles_low;
__asm__ __volatile__ ( "CPUID\n\t"
"RDTSC\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t": "=r" (cycles_high), "=r" (cycles_low)::
"%rax", "%rbx", "%rcx", "%rdx");
return ( (unsigned long long)cycles_low)|( ((unsigned long long)cycles_high)<<32 );
}
static timeunit_t end(void) {
unsigned cycles_high, cycles_low;
__asm__ __volatile__( "RDTSCP\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t"
"CPUID\n\t": "=r" (cycles_high), "=r" (cycles_low):: "%rax",
"%rbx", "%rcx", "%rdx");
return ( (unsigned long long)cycles_low)|( ((unsigned long long)cycles_high)<<32 );
}
};
char* createSHM() noexcept {
const auto sharedMemHandle = shm_open("testing", O_RDWR | O_CREAT, 0666);
if (-1 == sharedMemHandle) {
std::cout << "failed to open named shared memory: " << std::endl;
return nullptr;
}
constexpr int32_t size = (1 << 26);
ftruncate(sharedMemHandle, size);
char* ptr = (char*) mmap(nullptr, size, PROT_READ | PROT_WRITE,
MAP_SHARED, sharedMemHandle, 0);
if (MAP_FAILED == ptr) {
std::cout << errno << std::endl;
return nullptr;
}
const auto rc = fchmod(sharedMemHandle, 0666);
if (rc == -1) {
fprintf(stderr,
"Can't change permissions to 0666 on shared mem segment: %m\n");
fflush(stderr);
}
return ptr;
}
int main() {
BODY update;
srand(time(nullptr));
char* ptr = createSHM();
constexpr uint64_t n = 700;
constexpr uint64_t n2 = 10;
uint64_t m_data[n * n2];
memset(m_data, 0, sizeof(m_data));
uint64_t r = 0;
for (uint64_t i = 0; i < n; i++) {
for (uint64_t k = 0; k < n2; k++) {
// populate the header
const auto msgType = rand();
const auto rdtsc = rand();
// populate the struct randomly
uint32_t* tmp = reinterpret_cast<uint32_t*>(&update);
for (uint32_t j = 0; j < sizeof(BODY) / sizeof(uint32_t); j++) {
const uint32_t v = rand() % 32767;
tmp[j] = v;
}
// write the struct
const auto s = SerializedRDTSC::start();
memcpy(ptr, (char*)&msgType, sizeof(uint64_t));
ptr+= sizeof(uint64_t);
memcpy(ptr, (char*)&rdtsc, sizeof(uint64_t));
ptr+= sizeof(uint64_t);
memcpy(ptr, &update, sizeof(BODY));
ptr+= sizeof(BODY);
const auto e = SerializedRDTSC::end();
m_data[r++] = e - s;
}
usleep(249998);
}
for (uint32_t i = 0; i < r; i++) {
std::cout << i << "," << m_data[i] << std::endl;
}
}
And for some reason, there are periodic latency spike according to the output:
0 9408
1 210
2 162
3 176
4 172
5 164
6 172
7 8338
8 174
9 186
10 612
11 380
12 380
13 374
14 358
15 13610
16 190
17 186
18 164
19 168
20 246
21 196
22 170
23 5066
24 176
25 176
26 168
27 174
28 166
29 440
30 232
31 214
32 5128
33 180
34 178
35 172
36 174
37 184
38 170
39 162
40 5964
41 182
42 174
43 164
44 180
45 180
46 162
47 172
I already isolated the core and double-checked with htop to make sure no other processes were using the core.
My machine has an i7 CPU (nothing fancy).
And then I tried with an Xeon CPU. The pattern is about the same -- every 7-11 write, there was a spike.
With i7 CPU, I compiled with GCC 7.2 with c++17 and ran it on CentOS 7.3.
With Xeon CPU, I compiled with GCC 4.6 with c++0x and ran it on CentOS 6.5.
My questions are:
1. Why there were periodic latency spikes? (I checked with strace. And I don't see weird system call involved)
2. Any suggestion on how to investigate/understand the spike? More for my learning.
Thanks in advance!
P.S. Yes, some people object to use rdtsc to measure latency because temperature affects TSC. Tho, I don't see any better option as I don't have PTP, and clock_gettime() sometimes will have latency spikes too. If you have any suggestion, it is more than welcome :)
A memory page is 4K bytes. Every time you start writing on a new page, that page needs mapped into the process address space. Since the data you're writing every loop is 8 + 8 + 488 = 504 bytes, you'll get a spike every 8 or 9 time thru the loop.
Since the CPU can speculatively prefetch data from memory, the page fault for the 2nd page (which should occur on the 8th loop) occurs one loop earlier than expected, when the hardware prefetcher tries to access the page.

Decimate vector in eigen

I have a float array Eigen::ArrayXf which I need to decimate (i.e. pick 1 out of f.i. 8 samples).
Eigen::ArrayXf decimatedSignal = Eigen::Map<Eigen::ArrayXf, 0, Eigen::InnerStride<8> >(signal.data(), length, 1).eval();
which works, with a caveat: I need to know how long length is, and it can be specified too long, leading to runtime errors.
Q: is there a way to decimate all that is possible, so that resultant length is == signal.size() / 8 ?
Two things. You are using the c'tor for mapping a matrix:
Map (
PointerArgType dataPtr,
Index nbRows,
Index nbCols,
const StrideType & a_stride = StrideType()
)
Constructor in the dynamic-size matrix case.
Parameters
dataPtr pointer to the array to map
nbRows the number of rows of the matrix expression
nbCols the number of columns of the matrix expression
a_stride optional Stride object, passing the strides.
I think you want the c'tor for a vector:
Map ( PointerArgType dataPtr,
Index a_size,
const StrideType & a_stride = StrideType()
)
Constructor in the dynamic-size vector case.
Parameters
dataPtr pointer to the array to map
a_size the size of the vector expression
a_stride optional Stride object, passing the strides.
The second thing is that you want length == signal.size())/8. Is that always a whole integer, or are you rounding up? If the data is 16 in length and you want the positions [0] and [8], then use 1+(signal.size()-1)/8 as the length parameter:
Eigen::ArrayXf decimatedSignal = Eigen::Map<Eigen::ArrayXf, 0, Eigen::InnerStride<8> >(signal.data(), 1+((signal.size()-1)/8) ).eval();
For example:
#include <Eigen/Core>
#include <iostream>
using std::cout;
using std::endl;
int main(int argc, char *argv[])
{
Eigen::VectorXf signal;
signal.setLinSpaced(64, 0.0, 63.);
cout << "Original signal:" << endl << signal.transpose() << endl;
Eigen::ArrayXf decimatedSignal = Eigen::Map<Eigen::ArrayXf, 0,
Eigen::InnerStride<8> >(signal.data(), 1+((signal.size()-1)/8)).eval();
cout << endl << "Decimated:" << endl << decimatedSignal.transpose() << endl;
return 0;
}
outputs
Original signal:
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
Decimated:
0 8 16 24 32 40 48 56
which I think is exactly what you want.

Running a hello world HElib program

After going over this tutorial
http://tommd.github.io/
which uses the HElib library:
https://github.com/shaih/HElib
I get the following output:
The output is getting corrupted. Given that the example has Level 16, there should be plenty of room to perform these operations.
Is there a problem with the parameters ?
Code:
#include "FHE.h"
#include "EncryptedArray.h"
#include <NTL/lzz_pXFactoring.h>
#include <fstream>
#include <sstream>
#include <sys/time.h>
using namespace std;
/**
*
*/
int main(int argc, char** argv) {
/* On our trusted system we generate a new key
* (or read one in) and encrypt the secret data set.
*/
long m=0, p=2, r=1; // Native plaintext space
// Computations will be 'modulo p'
long L=16; // Levels
long c=3; // Columns in key switching matrix
long w=64; // Hamming weight of secret key
long d=0;
long security = 128;
ZZX G;
m = FindM(security,L,c,p, d, 0, 0);
FHEcontext context(m, p, r);
// initialize context
buildModChain(context, L, c);
// modify the context, adding primes to the modulus chain
FHESecKey secretKey(context);
// construct a secret key structure
const FHEPubKey& publicKey = secretKey;
// an "upcast": FHESecKey is a subclass of FHEPubKey
//if(0 == d)
G = context.alMod.getFactorsOverZZ()[0];
secretKey.GenSecKey(w);
// actually generate a secret key with Hamming weight w
addSome1DMatrices(secretKey);
cout << "Generated key" << endl;
EncryptedArray ea(context, G);
// constuct an Encrypted array object ea that is
// associated with the given context and the polynomial G
long nslots = ea.size();
vector<long> v1;
for(int i = 0 ; i < nslots; i++) {
v1.push_back(i*2);
}
Ctxt ct1(publicKey);
ea.encrypt(ct1, publicKey, v1);
vector<long> v2;
Ctxt ct2(publicKey);
for(int i = 0 ; i < nslots; i++) {
v2.push_back(i*3);
}
ea.encrypt(ct2, publicKey, v2);
// On the public (untrusted) system we
// can now perform our computation
Ctxt ctSum = ct1;
Ctxt ctProd = ct1;
ctSum += ct2;
ctProd *= ct2;
vector<long> res;
ea.decrypt(ctSum, secretKey, res);
cout << "All computations are modulo " << p << "." << endl;
for(int i = 0; i < res.size(); i ++) {
cout << v1[i] << " + " << v2[i] << " = " << res[i] << endl;
}
ea.decrypt(ctProd, secretKey, res);
for(int i = 0; i < res.size(); i ++) {
cout << v1[i] << " * " << v2[i] << " = " << res[i] << endl;
}
return 0;
}
Generated key
All computations are modulo 2.
0 + 0 = 0
2 + 3 = 1
4 + 6 = 0
6 + 9 = 1
8 + 12 = 0
10 + 15 = 1
12 + 18 = 0
14 + 21 = 1
16 + 24 = 0
18 + 27 = 1
20 + 30 = 0
22 + 33 = 1
24 + 36 = 0
26 + 39 = 1
28 + 42 = 0
30 + 45 = 1
32 + 48 = 0
34 + 51 = 1
36 + 54 = 0
38 + 57 = 1
40 + 60 = 0
42 + 63 = 1
44 + 66 = 0
46 + 69 = 1
48 + 72 = 0
50 + 75 = 1
52 + 78 = 0
54 + 81 = 1
56 + 84 = 0
58 + 87 = 1
60 + 90 = 0
... Some sum output omitted
0 * 0 = 0
2 * 3 = 0
4 * 6 = 0
6 * 9 = 0
8 * 12 = 0
10 * 15 = 0
12 * 18 = 0
14 * 21 = 0
16 * 24 = 0
18 * 27 = 0
20 * 30 = 0
22 * 33 = 0
24 * 36 = 0
26 * 39 = 0
28 * 42 = 0
30 * 45 = 0
32 * 48 = 0
34 * 51 = 0
36 * 54 = 0
38 * 57 = 0
40 * 60 = 0
42 * 63 = 0
44 * 66 = 0
46 * 69 = 0
48 * 72 = 0
50 * 75 = 0
52 * 78 = 0
54 * 81 = 0
56 * 84 = 0
58 * 87 = 0
60 * 90 = 0
62 * 93 = 0
64 * 96 = 0
66 * 99 = 0
68 * 102 = 0
70 * 105 = 0
72 * 108 = 0
74 * 111 = 0
76 * 114 = 0
78 * 117 = 0
80 * 120 = 0
82 * 123 = 0
84 * 126 = 0
86 * 129 = 0
....
Ah, so this is a misunderstanding of the operations being performed. Notice the constant p=2. I have the text All computations are modulo 2.. Perhaps also stating All inputs are modulo 2 would help hammer the point home. Lets look at some of our computations:
0 + 0 mod 2 = 0
2 + 3 mod 2 = 1
4 + 6 mod 2 = 0
6 + 9 mod 2 = 1
All looks good - addition ring 2 is just exclusive OR. How about multiplication? In ring 2 (binary) that's just AND:
0 * 0 = 0
2 * 3 = 6 mod 2 = 0
4 * 6 = 24 mod 2 = 0
6 * 9 = 54 mod 2 = 0
So that all checks out as well. Finally, look back at the blog and see that I called this out again and give you a way to operate on something you might consider more pleasing:
In this case, I am building for GF(2) - so my homormorphic addition
is XOR and multiplication is AND. Changing this is as easy as changing
the value of p. Folks wanting to see 2+2=4 should set p to something
that matches their desired domain, such as 257 to obtain 8 bit Ints.
However, HELib has regressed in this aspect - setting p equal to anything larger than 2 did not work last time I tried it. Shai confirmed this is a known regression.

How to convert a HexBytes Array to a String in C/C++ on Arduino?

I realized converting a String into a hexarray now need to convert the new array into a new string,because the function Sha256.write needs a char, which would be the way?
char hexstring[] = "020000009ecb752aac3e6d2101163b36f3e6bd67d0c95be402918f2f00000000000000001036e4ee6f31bc9a690053320286d84fbfe4e5ee0594b4ab72339366b3ff1734270061536c89001900000000";
int i;
int n;
uint8_t bytearray[80];
Serial.println("Starting...");
char tmp[3];
tmp[2] = '\0';
int j = 0;
//GET ARRAY
for(i=0;i<strlen(hexstring);i+=2) {
tmp[0] = hexstring[i];
tmp[1] = hexstring[i+1];
bytearray[j] = strtol(tmp,0,16);
j+=1;
}
for(i=0;i<80;i+=1) {
Serial.println( bytearray[i]);
}
int _batchSize;
unsigned char hash[32];
SHA256_CTX ctx;
int idx;
Serial.println("SHA256...");
for(_batchSize = 100000; _batchSize > 0; _batchSize--){
bytearray[76] = nonce;
// Sha256.write(bytearray);
sha256_init(&ctx);
sha256_update(&ctx,bytearray,80);
sha256_final(&ctx,hash); //
sha256_init(&ctx);
sha256_update(&ctx,hash,32);
sha256_final(&ctx,hash); //are this corrent? i'm need in bytes too
// print_hash(hash);
int zeroBytes = 0;
for (int i = 31; i >= 28; i--, zeroBytes++)
if(hash[i] > 0)
break;
if(zeroBytes == 4){ // SOLUTION TRUE, NOW I'M NEED THIS IN STRING
printf("0x");
for (n = 0; n < 32; n++)
Serial.println(printf("%02x", hash[n])); //ERROR :(
}
//increase
if(++nonce == 4294967295)
nonce = 0;
}
}
}
output array on Serial port:
2
0
0
0
158
203
117
42
172
62
109
33
1
22
59
54
243
230
189
103
208
201
91
228
2
145
143
47
0
0
0
0
0
0
0
0
16
54
228
238
111
49
188
154
105
0
83
50
2
134
216
79
191
228
229
238
5
148
180
171
114
51
147
102
179
255
23
52
39
0
97
83
108
137
0
25
0
0
0
0
how to convert this to a hexstring char back?
UPDATED
this solutions works for me, thanks all!
void printHash(uint8_t* hash) {
int id;
for (id=0; id<32; id++) {
Serial.print("0123456789abcdef"[hash[id]>>4]);
Serial.print("0123456789abcdef"[hash[id]&0xf]);
}
Serial.println();
}
Skip to the section Addressing your code... at bottom for most relevant content
(this stuff up here is barely useful blither)
The purpose of your function:
Sha256.write((char *)bytearray);
I believe is to write more data to the running hash. (from this)
Therefore, I am not sure in the context of your question how to convert this to a hex-string char back? how this relates to the way you are using it.
Let me offer another approach for the sake of illustrating how you might go about returning the array of ints back into the form of a "hexadecimal string":
From Here
Here is a code fragment that will calculate the digest for the string "abc"
SHA256_CTX ctx;
u_int8_t results[SHA256_DIGEST_LENGTH];
char *buf;
int n;
buf = "abc";
n = strlen(buf);
SHA256_Init(&ctx);
SHA256_Update(&ctx, (u_int8_t *)buf, n);
SHA256_Final(results, &ctx);
/* Print the digest as one long hex value */
printf("0x");
for (n = 0; n < SHA256_DIGEST_LENGTH; n++)
printf("%02x", results[n]);
putchar('\n');
resulting in:
"0xba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad".
In this example The array I believe you want, is contained in u_int8_t results
There is not enough description in your post to be sure this will help, let me know in the comments, and I will try to address further questions.
Added after your edit:
Continuing from the example above, to put the array contents of results back into a string, you can do something like this:
char *newString;
newString = malloc(sizeof(char)*SHA256_DIGEST_LENGTH*2);
memset(newString, 0, sizeof(char)*SHA256_DIGEST_LENGTH*2);
strcat(newString, "0x");
for(i=0;i<SHA256_DIGEST_LENGTH;i++)
{
sprintf(newString, "%s%02x\n", newString, results[i]);
}
//use newString for stuff...
free(newString);
Addressing your code, and your question directly:
Your code block:
for(_batchSize = 100000; _batchSize > 0; _batchSize--){
bytearray[76] = _batchSize;
Sha256.write((char *)bytearray); //here are the error
}
is not necessary if all you want to do is to convert an array of int into a "hexadecimal string"
Your int array, defined as:
int bytearray[80];
Already contains all the necessary values at this point, as you illustrated with your latest edit. If you want to return this data to a "hexadecimal string" form, then this will do that for you: (replacing result with your bytearray)
char *newString;
newString = malloc(sizeof(char)*SHA256_DIGEST_LENGTH*2);//if these defines are not in your environment
memset(newString, 0, sizeof(char)*SHA256_DIGEST_LENGTH*2);//then replace them with appropriate value for length
strcat(newString, "0x");
for(i=0;i<sizeof(bytearray)/sizeof(bytearray[0]);i++)
{
sprintf(newString, "%s%02x\n", newString, bytearray[i]);
}
//use newString for stuff...
free(newString);