Does anyone have a concise answer for this below? I saw this on career cup. http://www.careercup.com/question?id=4860021380743168
Given a binary representation of an integer say 15 as 1111, find the maximum longest continuous sequence of 0s. The twist is it needs to be done in log N.
For example. 10000101
the answer should be 4, because there are 4 continuous zeroes.
If you have an answer in c++ that would be best for me
Pretty trivial, just go through the binary notation, one linear pass. The binary notation has length log(N), so it will take log(N) time.
Seems like this has been asked before.
However, when I feel the need for bit-twiddling, I reach for my copy of the incomparable Hackers Delight. As it turns out, it contains discussions on finding the longest string of 1 bits, including a "logarithmic" implementation that could be used here on the bit/flipped (not) input:
int fmaxstr0(unsigned x, int *apos) {
// invert bits.
x = ~x;
unsigned x2, x4, x8, x16, y, t;
int s;
if (x == 0) {*apos = 32; return 0;}
x2 = x & (x << 1);
if (x2 == 0) {s = 1; y = x; goto L1;}
x4 = x2 & (x2 << 2);
if (x4 == 0) {s = 2; y = x2; goto L2;}
x8 = x4 & (x4 << 4);
if (x8 == 0) {s = 4; y = x4; goto L4;}
x16 = x8 & (x8 << 8);
if (x16 == 0) {s = 8; y = x8; goto L8;}
if (x == 0xFFFFFFFF) {*apos = 0; return 32;}
s = 16; y = x16;
L16: t = y & (x8 << s);
if (t != 0) {s = s + 8; y = t;}
L8: t = y & (x4 << s);
if (t != 0) {s = s + 4; y = t;}
L4: t = y & (x2 << s);
if (t != 0) {s = s + 2; y = t;}
L2: t = y & (x << s);
if (t != 0) {s = s + 1; y = t;}
L1: *apos = nlz(y);
return s;
}
Have fun!
Related
While optimizing a ray tracer I was trying to improve data locality for the intersection datastructure using a Morton space filling curve, except that my 3D space is not cubic (eg. 512x512x256). All sides are a power of two, but not all sides are the same length.
I have been unable to find any examples for non square Morton curves where the sides are a power of two. If it matters I can guarantee that the x/y axis are the same size with only the z axis being a different length.
Note how the width is 2x height, but it could also be 3x or 4x or any other. I have been unable to find a way how to do this.
Ideally the solution would be fast as the Morton code has to be calculated a lot. So my question is: How do I generate a space filling morton curve for non-cubic spaces? This is specifically for the GPU (Cuda).
The conditions on the dimensions are:
x, y, z are a power of two
x == y
x, y >= z
Or if easier
x, y > z
It would probably break for, say, nz=11, but for half of the size of the XY square it seems to work for me
#include <cstdint>
#include <iostream>
static inline uint32_t spread(uint32_t x)
{
x = (x | (x << 10)) & 0x000F801F;
x = (x | (x << 4)) & 0x00E181C3;
x = (x | (x << 2)) & 0x03248649;
x = (x | (x << 2)) & 0x09249249;
return x;
}
static inline uint32_t morton(const uint32_t x, const uint32_t y, const uint32_t z)
{
return spread(x) << 0 | spread(y) << 1 | spread(z) << 2;
}
auto main() -> int {
int nx = 32;
int ny = 32;
int nz = 16;
for (int iz = 0; iz != nz; ++iz)
{
for (int iy = 0; iy != ny; ++iy)
{
for (int ix = 0; ix != nx; ++ix)
{
auto m = morton(ix, iy, iz);
std::cout << m << '\n';
}
}
}
return 0;
}
UPDATE
How to make Morton code work for, say, 256x256x64 (8bit*8bit*6bit): you have to spread X and Y non-equidistantly, taking into account number of bits in Z. Basically, for cube you spread evenly: each bit at position
0, 3, 6, 9, 12, 15, 18, 21, 24,
leaving space for other two bits from orthogonal axes.
So there is equidistant spread for a cube. But for the case when you have only 6 bits from Z to insert, you have to have 6 distances of 3, but no Z bits for last gap, thus last gap for X and Y spread should be only 1 bit wide. Thus, non-equidistant spread in X and Y.
Something along the line: if Nx=Ny are number of bits in XY plane, and Nz!=Nx or Ny is number of bits along Z axis,
spread gap should be 2 bits for Nz bits and gap of 1 bit for what is left. So two spread routines - one for X&Y with non-equidistant spread which now depends on Nz, and existing spread function for Z axis.
Ok, here is a working version, seems to be doing right thing
#include <cstdint>
#include <iostream>
#define func auto
func spreadZ(uint32_t v) -> uint32_t { // 2bit gap spread
v = (v | (v << 10)) & 0x000F801F;
v = (v | (v << 4)) & 0x00E181C3;
v = (v | (v << 2)) & 0x03248649;
v = (v | (v << 2)) & 0x09249249;
return v;
}
func spreadXY(const uint32_t v, const uint32_t bitsZ) -> uint32_t {
uint32_t mask_z = (1U << bitsZ) - 1U; // to mask bits which are going to have 2bit gap
uint32_t lo{ v & mask_z }; // lower part of the value where there are Z bits
lo = spreadZ(lo); // 2bit gap spread
uint32_t hi = v >> bitsZ; // high part of the value, 1bit gap
// 1bit gap spread
hi = (hi ^ (hi << 8)) & 0x00ff00ffU;
hi = (hi ^ (hi << 4)) & 0x0f0f0f0fU;
hi = (hi ^ (hi << 2)) & 0x33333333U;
hi = (hi ^ (hi << 1)) & 0x55555555U;
return lo + (hi << 3*bitsZ); // combine them all together
}
func morton(const uint32_t x, const uint32_t y, const uint32_t z, const uint32_t bitsZ) -> uint32_t {
return spreadXY(x, bitsZ) << 0 | spreadXY(y, bitsZ) << 1 | spreadZ(z) << 2;
}
func ispowerof2(const uint32_t n) -> bool {
return n && (!(n & (n - 1u)));
}
func bit_pos(const uint32_t n) -> uint32_t {
if (!ispowerof2(n))
throw -1;
uint32_t i{ 1u }, pos{ 1u };
while (!(i & n)) { // Iterate through bits of n till we find a set bit, i&n will be non-zero only when 'i' and 'n' have a same bit
i = i << 1; // unset current bit and set the next bit in 'i'
++pos; // increment position
}
return pos;
}
func main() -> int {
int nx = 256;
int ny = 256;
int nz = 256; //256...128...64...32...16...8...4...2...1 all works
int bitsZ = bit_pos(nz) - 1; // should be doing try/catch
for (int iz = 0; iz != nz; ++iz)
{
for (int iy = 0; iy != ny; ++iy)
{
for (int ix = 0; ix != nx; ++ix)
{
auto m = morton(ix, iy, iz, bitsZ);
std::cout << m << '\n';
}
}
}
return 0;
}
I am struggling since a couple of months to re-write Matlab code into C++
The bigger part of the work as been already done (fft, ifft, xcorr and binFreq functions have been already written in c++), now I am stuck to one function. the function is supposed to estimate delay between two signals S1 and S2. The function relies on FFT-based cross-correlation.
I am stuck in this line " e = [e, eIter];" where e is a vector with variable length ( e was declared earlier as : "e =[]";)
original function in Matlab:
% ****************************************************************
% estimates delay and scaling factor
% ****************************************************************
function [delay_samples, coeff] = iterDelayEst(s1, s2)
n = numel(s1);
halfN = floor(n/2);
assert(numel(s2) == n, 'signals must have same length');
% ****************************************************************
% constants
% ****************************************************************
% exit if uncertainty below threshold
thr_samples = 1e-7;
% exit after fixed number of iterations
nIter = 25;
% frequency domain representation of signals
fd1 = fft(s1);
fd2 = fft(s2);
% first round: No delay was applied
tau = []; %(1,0) ;
fd2Tau = fd2; % delayed s2 in freq. domain
% frequency corresponding to each FFT bin -0.5..0.5
f = binFreq(n);
% uncertainty plot data
e = [];
% normalization factor
nf = sqrt((fd1 * fd1') * (fd2 * fd2')) / n; % normalizes to 1
%nf = sqrt((fd1 .* conj(fd1)) * (fd2 .* conj(fd2))') / n;
% search window:
% known maximum and two surrounding points
x1 = -1;
x2 = -1;
x3 = -1;
y1 = -1;
y2 = -1;
y3 = -1;
% ****************************************************************
% iteration loop
% ****************************************************************
for count = 1:nIter
% ****************************************************************
% crosscorrelation with time-shifted signal
% ****************************************************************
xcorr = abs(ifft(fd2Tau .* conj(fd1)))/ nf;
% ****************************************************************
% detect peak
% ****************************************************************
if isempty(tau)
% ****************************************************************
% startup
% initialize with three adjacent bins around peak
% ****************************************************************
ix = find(xcorr == max(xcorr));
ix = ix(1); % use any, if multiple bitwise identical peaks
% indices of three bins around peak
ixLow = mod(ix-1-1, n) + 1; % one below
ixMid = ix;
ixHigh = mod(ix-1+1, n) + 1; % one above
% delay corresponding to the three bins
tauLow = mod(ixLow -1 + halfN, n) - halfN;
tauMid = mod(ixMid -1 + halfN, n) - halfN;
tauHigh = mod(ixHigh -1 + halfN, n) - halfN;
% crosscorrelation value for the three bins
xcLow = xcorr(ixLow);
xcMid = xcorr(ixMid);
xcHigh = xcorr(ixHigh);
x1 = tauLow;
x2 = tauMid;
x3 = tauHigh;
y1 = xcLow;
y2 = xcMid;
y3 = xcHigh;
else
% ****************************************************************
% only main peak at first bin is of interest
% ****************************************************************
tauMid = tau;
xcMid = xcorr(1);
if xcMid > y2
% ****************************************************************
% improve midpoint
% ****************************************************************
if tauMid > x2
% midpoint becomes lower point
x1 = x2;
y1 = y2;
else
% midpoint becomes upper point
x3 = x2;
y3 = y2;
end
x2 = tauMid;
y2 = xcMid;
elseif tauMid < x2
% ****************************************************************
% improve low point
% ****************************************************************
assert(tauMid >= x1); % bitwise identical is OK
assert(tauMid > x1 || xcMid > y1); % expect improvement
x1 = tauMid;
y1 = xcMid;
elseif tauMid > x2
% ****************************************************************
% improve high point
% ****************************************************************
assert(tauMid <= x3); % bitwise identical is OK
assert((tauMid < x3) || (xcMid > y3)); % expect improvement
x3 = tauMid;
y3 = xcMid;
else
assert(false, '?? evaluated for existing tau ??');
end
end
% ****************************************************************
% calculate uncertainty (window width)
% ****************************************************************
eIter = abs(x3 - x1);
e = [e, eIter];
if eIter < thr_samples
% disp('threshold reached, exiting');
break;
end
if y1 == y2 || y2 == y3
% reached limit of numerical accuracy on one side
usePoly = 0;
else
% ****************************************************************
% fit 2nd order polynomial and find maximum
% ****************************************************************
num = (x2^2-x1^2)*y3+(x1^2-x3^2)*y2+(x3^2-x2^2)*y1;
denom = (2*x2-2*x1)*y3+(2*x1-2*x3)*y2+(2*x3-2*x2)*y1;
if denom ~= 0
tau = (num / denom);
% is the point within [x1, x3]?
usePoly = ((tau > x1) && (tau < x3));
else
usePoly = 0;
end
end
if ~usePoly
% revert to linear interpolation on the side with the
% less-accurate outer sample
% Note: There is no guarantee that the side with the more accurate
% outer sample is the right one, as the samples aren't
% placed on a regular grid!
% Therefore, iterate to improve the "worse" side, which will
% eventually become the "better side", and iteration converges.
tauLow = (x1 + x2) / 2;
tauHigh = (x2 + x3) / 2;
if y1 < y3
o = [tauLow, tauHigh];
else
o = [tauHigh, tauLow];
end
% don't choose point that is identical to one that is already known
tau = o(1);
if tau == x1 || tau == x2 || tau == x3
tau = o(2);
if tau == x1 || tau == x2 || tau == x3
break;
end
end
end
% ****************************************************************
% advance 2nd signal according to location of maximum
% phase shift in frequency domain - delay in time domain
% ****************************************************************
fd2Tau = fd2 .* exp(2i * pi * f * tau);
end % for
% ****************************************************************
% the delay estimate is the final location of the delay that
% maximized crosscorrelation (center of window).
% ****************************************************************
delay_samples = x2;
% ****************************************************************
% Coefficient: Turn signal 1 into signal 2
% ****************************************************************
coeff = fd2Tau * fd1' ./ (fd1 * fd1');
end
C++ equivalent code I have written so far :
//iterDelayEst.cpp
#include <cstddef>
#include <utility>
#include <numeric>
#include <vector>
std::vector<std::size_t> max_indices(CArray const& y) {
struct acc_t {
std::vector<std::size_t> max_idcs;
double max_value;
std::size_t current_idx;
acc_t&& next() {
++current_idx;
return std::move(*this);
}
acc_t&& next_with_current() {
max_idcs.push_back(current_idx++);
return std::move(*this);
}
acc_t&& next_with(Complex const& c) {
max_value = c.real();
max_idcs.clear();
return next_with_current();
}
};
return std::accumulate(
std::begin(y), std::end(y), acc_t{},
[](acc_t acc, Complex const& c) {
return c.real() < acc.max_value ? acc.next()
: c.real() > acc.max_value ? acc.next_with(c)
: acc.next_with_current();
}
).max_idcs;
}
/****************************************************************************/
void tet(CArray const& y) {
auto const max_idcs = max_indices(y);
std::cout << "The max is " << y[max_idcs.front()] << " at indices [";
auto first = true;
for (auto const idx : max_idcs) {
if (!first) {
std::cout << ", ";
} else {
first = false;
}
std::cout << idx;
}
std::cout << "]\n";
}
/****************xcorr function**************************************************************/
void xcorr( CArray & x){
int i;
int n=32;
fft(x);
x *=x.apply(std::conj);
ifft(x);
for ( i = 0 ; i < n ; i++ ){
cout << "x[" << i <<"] =" << x[i] << endl;
}
}
double iterDelayEst(int n,CArray& x, CArray& y)
{
/***************************constants************************************************
/************************************************************************************/
//exit if uncertainty below threshold
int halfN=std::floor(n/2);
double thr_samples = 1e-7;
Complex eIter;
Complex e;
//exit after fixed number of iterations
double nIter = 25;
fft(x);
fft(y);
//frequency domain representation of signals
typedef std::vector < std::complex < double > > tau;
auto f = binFreq(n);
std::vector<double> e;
Complex nf3(0.0,0.0);
int j;
for ( j = 0 ; j < n ; j++ )
{
auto nf1 = ((x * x.apply(std::conj)) * (y * y.apply(std::conj)));
nf3 += nf1[j];
}
auto nf2 =std::sqrt(nf3);
auto nf =nf2/(double)n;
cout << "nf3" << nf3 <<endl;
cout << "nf2" << nf2 <<endl;
cout << "nf" << nf <<endl;
Complex x1(-1,0);
Complex x2(-1,0);
Complex x3(-1,0);
Complex y1(-1,0);
Complex y2(-1,0);
Complex y3(-1,0);
int i;
/***************************************iteration loop**********************************************
**************************************************************************************************/
//for(i=0; i<nIter; i++)
x = x.apply(std::conj);
y *= x;
ifft(y);
y =std::abs(y);
y=y/nf;
for ( i = 0 ; i < n ; i++ ){
cout << "y[" << i <<"] =" << y[i] << endl;
}
if (tau.empty())
{
tet(y);
Complex ix=y[0]; //use any, if multiple bitwise identical peaks
/***********indices of three bins around peak***********/
Complex ixLow= std::fmod(ix-1-1, n) +1 //one below
Complex ixMid=ix;
Complex ixHigh= std::fmod(ix-1+1, n) +1 //one above
/**********delay corresponding to the three bins*********/
Complex tauLow = std::fmod(ixLow -1 + halfN, n) - halfN;
Complex tauMid = std::fmod(ixMid -1 + halfN, n) - halfN;
Complex tauHigh = std::fmod(ixHigh -1 + halfN, n) - halfN;
/**********crosscorrelation value for the three bins******/
Complex xcLow = xcorr(ixLow);
Complex xcMid = xcorr(ixMid);
Complex xcHigh = xcorr(ixHigh);
x1 = tauLow;
x2 = tauMid;
x3 = tauHigh;
y1 = xcLow;
y2 = xcMid;
y3 = xcHigh;
}
else
{
/**********only main peak at first bin is of interest****/
tauMid =tau;
xcMid = xcorr(1);
if (xcMid > y2){
/**********improve midpoint***************************/
if(tauMid > x2){
//midpoint becomes lower point
x1 = x2;
y1 = y2;
}
else{
//midpoint becomes upper point
x3 = x2;
y3 = y2;
}
x2 = tauMid;
y2 = xcMid;
else if (tauMid < x2){
//improve high point
assert(tauMid >= x1); // bitwise identical is OK
assert(tauMid > x1 || xcMid > y1); //expect improvement
x1 = tauMid;
y1 = xcMid;
}
else if (tauMid > x2){
//improve high point
assert(tauMid <= x3); // bitwise identical is OK
assert((tauMid < x3) || (xcMid > y3)); // expect improvement
x3 = tauMid;
y3 = xcMid;
}
else{
assert(("?? evaluated for existing tau ??", false));
}
}
/*************Calculate uncertainty (Window Width)********************/
eIter =abs(x3-x1);
}
The entire Matlab program can be found here :
https://www.dsprelated.com/showcode/288.php
I will be very grateful for any help
Well, my Matlab training is really far away in time, but assuming that e = [e eIter] means that you append a value to e, then you need use a vector
define your e vector somewhere like this: you already done that in your code. Maybe you copied/pasted some code, I don't know, it looks rather neat.
std::vector<double> e;
However I doubt that will compile since you also have a Complex e declaration in the same scope...
// then in your loop populate it like this
eIter = abs(x3 - x1); // (this is your code)
e.push_back(eIter); // push_back was the method you need
Then you can access elements of e just like a standard C-array:
e[i]
To perform a loop, I use an old-style C++ iteration. I know there is better but my C++ is a bit rusty since 2006. Comments/edits are welcome to improve this:
// replace const_iterator by iterator to be able to modify the values
std::vector<double>::const_iterator it;
for (it = e.begin(); it != e.end(); it++)
{
const double &value = *it; // (remove const to be able to change the value)
...
I am trying to implement my own square root function which gives square root's integral part only e.g. square root of 3 = 1.
I saw the method here and tried to implement the method
int mySqrt(int x)
{
int n = x;
x = pow(2, ceil(log(n) / log(2)) / 2);
int y=0;
while (y < x)
{
y = (x + n / x) / 2;
x = y;
}
return x;
}
The above method fails for input 8. Also, I don't get why it should work.
Also, I tried the method here
int mySqrt(int x)
{
if (x == 0) return 0;
int x0 = pow(2, (log(x) / log(2))/2) ;
int y = x0;
int diff = 10;
while (diff>0)
{
x0 = (x0 + x / x0) / 2; diff = y - x0;
y = x0;
if (diff<0) diff = diff * (-1);
}
return x0;
}
In this second way, for input 3 the loop continues ... indefinitely (x0 toggles between 1 and 2).
I am aware that both are essentially versions of Netwon's method but I can't figure out why they fail in certain cases and how could I make them work for all cases. I guess i have the correct logic in implementation. I debugged my code but still I can't find a way to make it work.
This one works for me:
uintmax_t zsqrt(uintmax_t x)
{
if(x==0) return 0;
uintmax_t yn = x; // The 'next' estimate
uintmax_t y = 0; // The result
uintmax_t yp; // The previous estimate
do{
yp = y;
y = yn;
yn = (y + x/y) >> 1; // Newton step
}while(yn ^ yp); // (yn != yp) shortcut for dumb compilers
return y;
}
returns floor(sqrt(x))
Instead of testing for 0 with a single estimate, test with 2 estimates.
When I was writing this, I noticed the result estimate would sometimes oscillate. This is because, if the exact result is a fraction, the algorithm could only jump between the two nearest values. So, terminating when the next estimate is the same as the previous will prevent an infinite loop.
Try this
int n,i;//n is the input number
i=0;
while(i<=n)
{
if((i*i)==n)
{
cout<<"The number has exact root : "<<i<<endl;
}
else if((i*i)>n)
{
cout<<"The integer part is "<<(i-1)<<endl;
}
i++;
}
Hope this helps.
You can try there C sqrt implementations :
// return the number that was multiplied by itself to reach N.
unsigned square_root_1(const unsigned num) {
unsigned a, b, c, d;
for (b = a = num, c = 1; a >>= 1; ++c);
for (c = 1 << (c & -2); c; c >>= 2) {
d = a + c;
a >>= 1;
if (b >= d)
b -= d, a += c;
}
return a;
}
// return the number that was multiplied by itself to reach N.
unsigned square_root_2(unsigned n){
unsigned a = n > 0, b;
if (n > 3)
for (a = n >> 1, b = (a + n / a) >> 1; b < a; a = b, b = (a + n / a) >> 1);
return a ;
}
Example of usage :
#include <assert.h>
int main(void){
unsigned num, res ;
num = 1847902954, res = square_root_1(num), assert(res == 42987);
num = 2, res = square_root_2(num), assert(res == 1);
num = 0, res = square_root_2(num), assert(res == 0);
}
Source
I need to convert a whole number that should be considered a float into its hexadecimal equivalent.
For example:
Float To Hex
1 = 0x3f800000
2 = 0x40000000
12345 = 0x4640e400
It will always be whole numbers, never fractions such as 0.5.
This could be done with memory assignment or a formatting function, but the situation its being used it, it has no memory access and no API calls at all.
I tried this idea, but it doesn't work at all
http://bytes.com/topic/c/answers/219928-how-convert-float-hex#post886069
Function floatAsUint_s() below re-interprets a 32-bit IEEE-754 float as an unsigned int for any input x for which |x| is in [1, 2128), or zero. The information is extracted from the float one bit at a time and the resulting unsigned int is built from those bits one bit at at time. Provided both the input and output reside in processor registers rather than memory, no additional memory is required during the re-interpretation process.
/* re-interpret IEEE-754 float x, |x| in [1, 2**128) or 0, as unsigned int */
unsigned int floatAsUint_s (float x)
{
unsigned int i;
/* extract sign bit, proceed with absolute value */
i = (((x == 0.0f) ? (1.0f / x) : x) < 0.0f) ? 0x80000000 : 0x00000000;
x = (((x == 0.0f) ? (1.0f / x) : x) < 0.0f) ? -x : x;
/* extract exponent, which is positive per specification */
if (x >= 1.84467441e19f) { x /= 1.84467441e19f; i |= 1 << 29; }
if (x >= 4.29496730e9f) { x /= 4.29496730e9f; i |= 1 << 28; }
if (x >= 65536.0f) { x /= 65536.0f; i |= 1 << 27; }
if (x >= 256.0f) { x /= 256.0f; i |= 1 << 26; }
if (x >= 16.0f) { x /= 16.0f; i |= 1 << 25; }
if (x >= 4.0f) { x /= 4.0f; i |= 1 << 24; }
if (x >= 2.0f) { x /= 2.0f; i |= 1 << 23; }
i += (x == 0.0f) ? 0 : (127 << 23); // add IEEE-754 specified exponent bias
/* extract mantissa */
x = x - 1.0f; // remove hidden bit
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 22; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 21; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 20; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 19; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 18; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 17; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 16; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 15; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 14; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 13; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 12; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 11; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 10; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 9; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 8; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 7; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 6; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 5; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 4; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 3; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 2; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 1; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 0; }
return i;
}
/* reference implementation */
unsigned int floatAsUint (float a)
{
unsigned int i;
unsigned char *ap = (unsigned char *)&a, *ip = (unsigned char*)&i;
for (unsigned int c = 0; c < sizeof (i); c++) {
*ip++ = *ap++;
}
return i;
}
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
int main (void)
{
unsigned int res, ref;
float s = -1.0f;
while (s < 2.0f) {
float x = 0.0f;
while (x < 3.40282346e38f) {
ref = floatAsUint (s * x);
res = floatAsUint_s (s * x);
if (ref != res) {
printf ("error # % 15.8e: res= %08x ref=%08x\n", x, res, ref);
exit (EXIT_FAILURE);
}
x = (x == 0.0f) ? 1.0f : nextafterf (x, 3.40282346e38f);
}
s += 2.0f;
}
return EXIT_SUCCESS;
}
An alternative interpretation of the specification in the question is as follows: Given an int x, |x| in [0, 224], produce the IEEE-754 single-precision encoding of the value of x, stored in a uint32_t. Use only integer operations for the transformation.
The bit pattern of a positive non-zero integer <= 224 is identical to the bit pattern of the mantissa (with hidden bit restored) of the IEEE-754 float it is converted to, only appropriately shifted. We therefore need to normalize by left shifting the integer until its most significant 1-bit is in the position of the hidden mantissa bit, which is bit 23. The number of shifts needed to normalize tells us the magnitude of the integer in powers of two, and thus determines the exponent of the floating-point number. We need to remember to add the exponent bias prescribed by IEEE-754, then combine the sign, exponent, and mantissa portions for the final result.
The function make_float_s() in the code below implements the algorithm described above.
#include <stdint.h>
/* For |a| in [0,2**24], generate IEEE-754 float encoding with same value */
uint32_t make_float_s (int a)
{
uint32_t i;
int e = 0;
i = (a < 0) ? 0x80000000 : 0x00000000; // sign bit
if (a) {
a = (a < 0) ? -a : a;
while (a < 0x00800000) { // normalize mantissa
e++;
a = a + a;
}
e = 127 + (22 - e); // determine biased exponent
i += (e << 23) + a; // combine sign, exponent, mantissa
}
return i;
}
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
uint32_t float_as_uint (float a)
{
uint32_t i;
memcpy (&i, &a, sizeof(i));
return i;
}
/* reference function */
uint32_t make_float (int a)
{
return float_as_uint ((float)a);
}
int main (void)
{
uint32_t res, ref;
int a, s;
a=1; printf("%d encoded as IEEE-754 float: %08x\n", a, make_float_s(a));
a=2; printf("%d encoded as IEEE-754 float: %08x\n", a, make_float_s(a));
a=12345; printf("%d encoded as IEEE-754 float: %08x\n", a, make_float_s(a));
s = -1;
while (s < 2) {
a = 0;
while (a <= 16777216) {
res = make_float_s (s * a);
ref = make_float (s * a);
if (res != ref) {
printf ("error # % 7d: res=%08x ref=%08x\n", s * a, res, ref);
exit (EXIT_FAILURE);
}
a++;
}
s += 2;
}
return EXIT_SUCCESS;
}
You can use union for this.
Something like:
union data {
float f_data;
char c_data[4];
};
And usage:
data d1;
d1.f_data = 12345;
After that d1.c_data contain hex values you need.
I'm trying to find prime roots with this algorithm:
std::vector<unsigned long long> Keyexchange::primroot(unsigned long long val) {
std::vector<unsigned long long> res;
for (unsigned long long i = 2; i<val - 1; i++) {
unsigned long long start = 1;
bool flag = 1;
for (unsigned long long j = 0; j<val / 2; j++) {
start = (start * i) % val;
if (start % val == 1) {
flag = 0;
break;
}
}
if (flag) {
res.push_back(i);
}
}
return res;
}
It works great but it is very very slow.
I want to calculate the primitive roots of big numbers like 1073741789. It would be the best if there is a possibility to set a range because I am calculating the whole set right now.
So basicely I am looking for a way [code snipet would be great] to generate about 100.000 of the biggest primitive roots out of that given big number.
I know that it is much faster with the Eulersche φ-function but I have no idea how to implement it.
Thanks a lot.
First, if you pick a random integer from 2 to p-1 then it has a decent chance of being a primitive root. So you pick a random integer (or you start with 2), check it, and if it fails, you pick the next one etc.
To check that x is a primitive root: It means that x^(p-1) = 1 (modulo p), but no smaller power of p is. Take for example p = 31, p-1 = 30 = 2 x 3 x 5. If p is not a primitive root, then one of x^(30/2), x^(30/3) and x^(30/5) must be 1 (modulo p).
Factor p-1 in its prime factors, calculate x^((p-1)/f) (modulo p) for every prime factor f, and x is a primitive root if none of the results is 1.
Of course x^y (modulo p) needs to be calculated with repeated squaring/multiplying. For example to calculate x^10 you would calculate x^2, x^4, x^5, x^10 in that order.
Once you found a primitive root g, g^k is a primitive root if gcd (k, p-1) = 1. But it would be a rare situation where you care for more than one primitive root.
If the input number is semi-prime and you have its (two) prime factors at hand, then you can use this:
vector<uint64> Roots(uint64 p,uint64 q)
{
vector<uint64> roots;
uint64 zstar = p*q;
for (uint64 y=1; y<zstar; y++)
{
if (GCD(zstar,y) == 1 && InQR(y,p,q))
{
uint64 yp = PowMod(y,(p+1)/4,p);
uint64 yq = PowMod(y,(q+1)/4,q);
uint64 r1 = Map(0+yp,0+yq,p,q);
uint64 r2 = Map(0+yp,q-yq,p,q);
uint64 r3 = Map(p-yp,0+yq,p,q);
uint64 r4 = Map(p-yp,q-yq,p,q);
roots.push_back(r1);
roots.push_back(r2);
roots.push_back(r3);
roots.push_back(r4);
}
}
return roots;
}
Here are the auxiliary functions:
uint64 GCD(uint64 a,uint64 b)
{
uint64 c = a%b;
if (c == 0)
return b;
return GCD(b,c);
}
uint64 PowMod(uint64 x,uint64 e,uint64 n)
{
uint64 y = 1;
while (e > 0)
{
if (e & 1)
y = (y*x)%n;
x = (x*x)%n;
e >>= 1;
}
return y;
}
bool InQR(uint64 y,uint64 p)
{
return PowMod(y,(p-1)/2,p) == 1;
}
bool InQR(uint64 y,uint64 p,uint64 q)
{
return InQR(y,p) && InQR(y,q);
}
uint64 Map(uint64 u,uint64 v,uint64 p,uint64 q)
{
uint64 a = q*Inverse(p,q);
uint64 b = p*Inverse(q,p);
return (u*a+v*b)%(p*q);
}
uint64 Inverse(uint64 n,uint64 a)
{
int64 x1 = 1;
int64 x2 = 0;
int64 y1 = 0;
int64 y2 = 1;
uint64 r1 = n;
uint64 r2 = a;
while (r2 != 0)
{
uint64 r3 = r1%r2;
uint64 q3 = r1/r2;
int64 x3 = x1-q3*x2;
int64 y3 = y1-q3*y2;
x1 = x2;
x2 = x3;
y1 = y2;
y2 = y3;
r1 = r2;
r2 = r3;
}
return (uint64)(y1>0? y1:y1+n);
}