C++ convert float to hex using only whole number math - c++

I need to convert a whole number that should be considered a float into its hexadecimal equivalent.
For example:
Float To Hex
1 = 0x3f800000
2 = 0x40000000
12345 = 0x4640e400
It will always be whole numbers, never fractions such as 0.5.
This could be done with memory assignment or a formatting function, but the situation its being used it, it has no memory access and no API calls at all.
I tried this idea, but it doesn't work at all
http://bytes.com/topic/c/answers/219928-how-convert-float-hex#post886069

Function floatAsUint_s() below re-interprets a 32-bit IEEE-754 float as an unsigned int for any input x for which |x| is in [1, 2128), or zero. The information is extracted from the float one bit at a time and the resulting unsigned int is built from those bits one bit at at time. Provided both the input and output reside in processor registers rather than memory, no additional memory is required during the re-interpretation process.
/* re-interpret IEEE-754 float x, |x| in [1, 2**128) or 0, as unsigned int */
unsigned int floatAsUint_s (float x)
{
unsigned int i;
/* extract sign bit, proceed with absolute value */
i = (((x == 0.0f) ? (1.0f / x) : x) < 0.0f) ? 0x80000000 : 0x00000000;
x = (((x == 0.0f) ? (1.0f / x) : x) < 0.0f) ? -x : x;
/* extract exponent, which is positive per specification */
if (x >= 1.84467441e19f) { x /= 1.84467441e19f; i |= 1 << 29; }
if (x >= 4.29496730e9f) { x /= 4.29496730e9f; i |= 1 << 28; }
if (x >= 65536.0f) { x /= 65536.0f; i |= 1 << 27; }
if (x >= 256.0f) { x /= 256.0f; i |= 1 << 26; }
if (x >= 16.0f) { x /= 16.0f; i |= 1 << 25; }
if (x >= 4.0f) { x /= 4.0f; i |= 1 << 24; }
if (x >= 2.0f) { x /= 2.0f; i |= 1 << 23; }
i += (x == 0.0f) ? 0 : (127 << 23); // add IEEE-754 specified exponent bias
/* extract mantissa */
x = x - 1.0f; // remove hidden bit
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 22; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 21; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 20; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 19; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 18; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 17; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 16; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 15; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 14; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 13; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 12; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 11; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 10; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 9; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 8; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 7; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 6; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 5; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 4; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 3; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 2; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 1; }
x = x + x; if (x >= 1.0f) { x -= 1.0f; i |= 1 << 0; }
return i;
}
/* reference implementation */
unsigned int floatAsUint (float a)
{
unsigned int i;
unsigned char *ap = (unsigned char *)&a, *ip = (unsigned char*)&i;
for (unsigned int c = 0; c < sizeof (i); c++) {
*ip++ = *ap++;
}
return i;
}
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
int main (void)
{
unsigned int res, ref;
float s = -1.0f;
while (s < 2.0f) {
float x = 0.0f;
while (x < 3.40282346e38f) {
ref = floatAsUint (s * x);
res = floatAsUint_s (s * x);
if (ref != res) {
printf ("error # % 15.8e: res= %08x ref=%08x\n", x, res, ref);
exit (EXIT_FAILURE);
}
x = (x == 0.0f) ? 1.0f : nextafterf (x, 3.40282346e38f);
}
s += 2.0f;
}
return EXIT_SUCCESS;
}
An alternative interpretation of the specification in the question is as follows: Given an int x, |x| in [0, 224], produce the IEEE-754 single-precision encoding of the value of x, stored in a uint32_t. Use only integer operations for the transformation.
The bit pattern of a positive non-zero integer <= 224 is identical to the bit pattern of the mantissa (with hidden bit restored) of the IEEE-754 float it is converted to, only appropriately shifted. We therefore need to normalize by left shifting the integer until its most significant 1-bit is in the position of the hidden mantissa bit, which is bit 23. The number of shifts needed to normalize tells us the magnitude of the integer in powers of two, and thus determines the exponent of the floating-point number. We need to remember to add the exponent bias prescribed by IEEE-754, then combine the sign, exponent, and mantissa portions for the final result.
The function make_float_s() in the code below implements the algorithm described above.
#include <stdint.h>
/* For |a| in [0,2**24], generate IEEE-754 float encoding with same value */
uint32_t make_float_s (int a)
{
uint32_t i;
int e = 0;
i = (a < 0) ? 0x80000000 : 0x00000000; // sign bit
if (a) {
a = (a < 0) ? -a : a;
while (a < 0x00800000) { // normalize mantissa
e++;
a = a + a;
}
e = 127 + (22 - e); // determine biased exponent
i += (e << 23) + a; // combine sign, exponent, mantissa
}
return i;
}
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
uint32_t float_as_uint (float a)
{
uint32_t i;
memcpy (&i, &a, sizeof(i));
return i;
}
/* reference function */
uint32_t make_float (int a)
{
return float_as_uint ((float)a);
}
int main (void)
{
uint32_t res, ref;
int a, s;
a=1; printf("%d encoded as IEEE-754 float: %08x\n", a, make_float_s(a));
a=2; printf("%d encoded as IEEE-754 float: %08x\n", a, make_float_s(a));
a=12345; printf("%d encoded as IEEE-754 float: %08x\n", a, make_float_s(a));
s = -1;
while (s < 2) {
a = 0;
while (a <= 16777216) {
res = make_float_s (s * a);
ref = make_float (s * a);
if (res != ref) {
printf ("error # % 7d: res=%08x ref=%08x\n", s * a, res, ref);
exit (EXIT_FAILURE);
}
a++;
}
s += 2;
}
return EXIT_SUCCESS;
}

You can use union for this.
Something like:
union data {
float f_data;
char c_data[4];
};
And usage:
data d1;
d1.f_data = 12345;
After that d1.c_data contain hex values you need.

Related

Morton curve for non cubic areas that are a power of two

While optimizing a ray tracer I was trying to improve data locality for the intersection datastructure using a Morton space filling curve, except that my 3D space is not cubic (eg. 512x512x256). All sides are a power of two, but not all sides are the same length.
I have been unable to find any examples for non square Morton curves where the sides are a power of two. If it matters I can guarantee that the x/y axis are the same size with only the z axis being a different length.
Note how the width is 2x height, but it could also be 3x or 4x or any other. I have been unable to find a way how to do this.
Ideally the solution would be fast as the Morton code has to be calculated a lot. So my question is: How do I generate a space filling morton curve for non-cubic spaces? This is specifically for the GPU (Cuda).
The conditions on the dimensions are:
x, y, z are a power of two
x == y
x, y >= z
Or if easier
x, y > z
It would probably break for, say, nz=11, but for half of the size of the XY square it seems to work for me
#include <cstdint>
#include <iostream>
static inline uint32_t spread(uint32_t x)
{
x = (x | (x << 10)) & 0x000F801F;
x = (x | (x << 4)) & 0x00E181C3;
x = (x | (x << 2)) & 0x03248649;
x = (x | (x << 2)) & 0x09249249;
return x;
}
static inline uint32_t morton(const uint32_t x, const uint32_t y, const uint32_t z)
{
return spread(x) << 0 | spread(y) << 1 | spread(z) << 2;
}
auto main() -> int {
int nx = 32;
int ny = 32;
int nz = 16;
for (int iz = 0; iz != nz; ++iz)
{
for (int iy = 0; iy != ny; ++iy)
{
for (int ix = 0; ix != nx; ++ix)
{
auto m = morton(ix, iy, iz);
std::cout << m << '\n';
}
}
}
return 0;
}
UPDATE
How to make Morton code work for, say, 256x256x64 (8bit*8bit*6bit): you have to spread X and Y non-equidistantly, taking into account number of bits in Z. Basically, for cube you spread evenly: each bit at position
0, 3, 6, 9, 12, 15, 18, 21, 24,
leaving space for other two bits from orthogonal axes.
So there is equidistant spread for a cube. But for the case when you have only 6 bits from Z to insert, you have to have 6 distances of 3, but no Z bits for last gap, thus last gap for X and Y spread should be only 1 bit wide. Thus, non-equidistant spread in X and Y.
Something along the line: if Nx=Ny are number of bits in XY plane, and Nz!=Nx or Ny is number of bits along Z axis,
spread gap should be 2 bits for Nz bits and gap of 1 bit for what is left. So two spread routines - one for X&Y with non-equidistant spread which now depends on Nz, and existing spread function for Z axis.
Ok, here is a working version, seems to be doing right thing
#include <cstdint>
#include <iostream>
#define func auto
func spreadZ(uint32_t v) -> uint32_t { // 2bit gap spread
v = (v | (v << 10)) & 0x000F801F;
v = (v | (v << 4)) & 0x00E181C3;
v = (v | (v << 2)) & 0x03248649;
v = (v | (v << 2)) & 0x09249249;
return v;
}
func spreadXY(const uint32_t v, const uint32_t bitsZ) -> uint32_t {
uint32_t mask_z = (1U << bitsZ) - 1U; // to mask bits which are going to have 2bit gap
uint32_t lo{ v & mask_z }; // lower part of the value where there are Z bits
lo = spreadZ(lo); // 2bit gap spread
uint32_t hi = v >> bitsZ; // high part of the value, 1bit gap
// 1bit gap spread
hi = (hi ^ (hi << 8)) & 0x00ff00ffU;
hi = (hi ^ (hi << 4)) & 0x0f0f0f0fU;
hi = (hi ^ (hi << 2)) & 0x33333333U;
hi = (hi ^ (hi << 1)) & 0x55555555U;
return lo + (hi << 3*bitsZ); // combine them all together
}
func morton(const uint32_t x, const uint32_t y, const uint32_t z, const uint32_t bitsZ) -> uint32_t {
return spreadXY(x, bitsZ) << 0 | spreadXY(y, bitsZ) << 1 | spreadZ(z) << 2;
}
func ispowerof2(const uint32_t n) -> bool {
return n && (!(n & (n - 1u)));
}
func bit_pos(const uint32_t n) -> uint32_t {
if (!ispowerof2(n))
throw -1;
uint32_t i{ 1u }, pos{ 1u };
while (!(i & n)) { // Iterate through bits of n till we find a set bit, i&n will be non-zero only when 'i' and 'n' have a same bit
i = i << 1; // unset current bit and set the next bit in 'i'
++pos; // increment position
}
return pos;
}
func main() -> int {
int nx = 256;
int ny = 256;
int nz = 256; //256...128...64...32...16...8...4...2...1 all works
int bitsZ = bit_pos(nz) - 1; // should be doing try/catch
for (int iz = 0; iz != nz; ++iz)
{
for (int iy = 0; iy != ny; ++iy)
{
for (int ix = 0; ix != nx; ++ix)
{
auto m = morton(ix, iy, iz, bitsZ);
std::cout << m << '\n';
}
}
}
return 0;
}

How to convert uint to string in solidity?

In Solidity, is there a way I can convert my int to string ?
Example:
pragma solidity ^0.4.4;
contract someContract {
uint i;
function test() pure returns (string) {
return "Here and Now is Happiness!";
}
function love() pure returns(string) {
i = i +1;
return "I love " + functionname(i) + " persons" ;
}
}
What is functionname?Thanks!
solidity ^0.8.0
import "#openzeppelin/contracts/utils/Strings.sol";
Strings.toString(myUINT)
works for me.
https://github.com/OpenZeppelin/openzeppelin-contracts/blob/master/contracts/utils/Strings.sol#L15-L35
UPDATE for Solidity 0.8.0:
The uint2str() function from https://github.com/provable-things/ethereum-api/blob/master/provableAPI_0.6.sol is now outdated, and will not work, but here is the updated code, that uses solidity 0.8.0:
(there was an Overflow bug in the last version but solidity <0.8.0 ignored that as it did not affect the answer, but that now throws an error)
byte was also changed to bytes1 and +,-,* and so on work like they would from the SafeMath library.
function uint2str(uint _i) internal pure returns (string memory _uintAsString) {
if (_i == 0) {
return "0";
}
uint j = _i;
uint len;
while (j != 0) {
len++;
j /= 10;
}
bytes memory bstr = new bytes(len);
uint k = len;
while (_i != 0) {
k = k-1;
uint8 temp = (48 + uint8(_i - _i / 10 * 10));
bytes1 b1 = bytes1(temp);
bstr[k] = b1;
_i /= 10;
}
return string(bstr);
}
The two post here are giving the responses :
https://ethereum.stackexchange.com/questions/10811/solidity-concatenate-uint-into-a-string
https://ethereum.stackexchange.com/questions/10932/how-to-convert-string-to-int
function uintToString(uint v) constant returns (string str) {
uint maxlength = 100;
bytes memory reversed = new bytes(maxlength);
uint i = 0;
while (v != 0) {
uint remainder = v % 10;
v = v / 10;
reversed[i++] = byte(48 + remainder);
}
bytes memory s = new bytes(i + 1);
for (uint j = 0; j <= i; j++) {
s[j] = reversed[i - j];
}
str = string(s);
}
Regards
Concrete_Buddhas answer does not work in solidity 0.8.0. This is a revised version:
function uint2str(
uint256 _i
)
internal
pure
returns (string memory str)
{
if (_i == 0)
{
return "0";
}
uint256 j = _i;
uint256 length;
while (j != 0)
{
length++;
j /= 10;
}
bytes memory bstr = new bytes(length);
uint256 k = length;
j = _i;
while (j != 0)
{
bstr[--k] = bytes1(uint8(48 + j % 10));
j /= 10;
}
str = string(bstr);
}
The provable-things code suggested in the comments to the accepted answer worked for me, but my linter threw a warning namely: "uintToStr": Avoid assigning to function parameters. [security/no-assign-params]. The below changes the original code slightly to correct this (reassigning the parameter _i to another variable called number):
/// #notice converts number to string
/// #dev source: https://github.com/provable-things/ethereum-api/blob/master/oraclizeAPI_0.5.sol#L1045
/// #param _i integer to convert
/// #return _uintAsString
function uintToStr(uint _i) internal pure returns (string memory _uintAsString) {
uint number = _i;
if (number == 0) {
return "0";
}
uint j = number;
uint len;
while (j != 0) {
len++;
j /= 10;
}
bytes memory bstr = new bytes(len);
uint k = len - 1;
while (number != 0) {
bstr[k--] = byte(uint8(48 + number % 10));
number /= 10;
}
return string(bstr);
}
If you need to optionally convert to scientific notation, for instance for a more compact number representation, here's a modifed version for that purpose:
function uintToString(uint v, bool scientific) public pure returns (string memory str) {
if (v == 0) {
return "0";
}
uint maxlength = 100;
bytes memory reversed = new bytes(maxlength);
uint i = 0;
while (v != 0) {
uint remainder = v % 10;
v = v / 10;
reversed[i++] = byte(uint8(48 + remainder));
}
uint zeros = 0;
if (scientific) {
for (uint k = 0; k < i; k++) {
if (reversed[k] == '0') {
zeros++;
} else {
break;
}
}
}
uint len = i - (zeros > 2 ? zeros : 0);
bytes memory s = new bytes(len);
for (uint j = 0; j < len; j++) {
s[j] = reversed[i - j - 1];
}
str = string(s);
if (scientific && zeros > 2) {
str = string(abi.encodePacked(s, "e", uintToString(zeros, false)));
}
}
Some unit tests:
function testUintToString() public {
Assert.equal(Utils.uintToString(0, true), '0', '0');
Assert.equal(Utils.uintToString(1, true), '1', '1');
Assert.equal(Utils.uintToString(123, true), '123', '123');
Assert.equal(Utils.uintToString(107680546035, true), '107680546035', '107680546035');
Assert.equal(Utils.uintToString(1e9, true), '1e9', '1e9');
Assert.equal(Utils.uintToString(1 ether, true), '1e18', '1 ether');
Assert.equal(Utils.uintToString(550e8, true), '55e9', '55e9');
}
The code snippets above are compatible with solidity 0.6.0.
While the accepted answer seems correct, it is quite inefficient on large numbers. Here is how I would do it:
function itoa32 (uint x) private pure returns (uint y) {
unchecked {
require (x < 1e32);
y = 0x3030303030303030303030303030303030303030303030303030303030303030;
y += x % 10; x /= 10;
y += x % 10 << 8; x /= 10;
y += x % 10 << 16; x /= 10;
y += x % 10 << 24; x /= 10;
y += x % 10 << 32; x /= 10;
y += x % 10 << 40; x /= 10;
y += x % 10 << 48; x /= 10;
y += x % 10 << 56; x /= 10;
y += x % 10 << 64; x /= 10;
y += x % 10 << 72; x /= 10;
y += x % 10 << 80; x /= 10;
y += x % 10 << 88; x /= 10;
y += x % 10 << 96; x /= 10;
y += x % 10 << 104; x /= 10;
y += x % 10 << 112; x /= 10;
y += x % 10 << 120; x /= 10;
y += x % 10 << 128; x /= 10;
y += x % 10 << 136; x /= 10;
y += x % 10 << 144; x /= 10;
y += x % 10 << 152; x /= 10;
y += x % 10 << 160; x /= 10;
y += x % 10 << 168; x /= 10;
y += x % 10 << 176; x /= 10;
y += x % 10 << 184; x /= 10;
y += x % 10 << 192; x /= 10;
y += x % 10 << 200; x /= 10;
y += x % 10 << 208; x /= 10;
y += x % 10 << 216; x /= 10;
y += x % 10 << 224; x /= 10;
y += x % 10 << 232; x /= 10;
y += x % 10 << 240; x /= 10;
y += x % 10 << 248;
}
}
function itoa (uint x) internal pure returns (string memory s) {
unchecked {
if (x == 0) return "0";
else {
uint c1 = itoa32 (x % 1e32);
x /= 1e32;
if (x == 0) s = string (abi.encode (c1));
else {
uint c2 = itoa32 (x % 1e32);
x /= 1e32;
if (x == 0) {
s = string (abi.encode (c2, c1));
c1 = c2;
} else {
uint c3 = itoa32 (x);
s = string (abi.encode (c3, c2, c1));
c1 = c3;
}
}
uint z = 0;
if (c1 >> 128 == 0x30303030303030303030303030303030) { c1 <<= 128; z += 16; }
if (c1 >> 192 == 0x3030303030303030) { c1 <<= 64; z += 8; }
if (c1 >> 224 == 0x30303030) { c1 <<= 32; z += 4; }
if (c1 >> 240 == 0x3030) { c1 <<= 16; z += 2; }
if (c1 >> 248 == 0x30) { z += 1; }
assembly {
let l := mload (s)
s := add (s, z)
mstore (s, sub (l, z))
}
}
}
}
Explanation
The itoa32 function converts a number below 10^32 into exactly 32 digits, padding it with zeros if necessary.
The itoa function calls the itoa32 up to three times to convert an arbitrary 256-bit number, then concatenates the results and removes leading zeros. It uses binary search to find out the exact number of leading zeros to be removed and removes leading zeros from a string in-place.
If your uint is coming from originally encoding a string into uint
string(abi.encode(myUint))

SDL2.0 screen nullptr on render of Window

Hey so I'm relatively new to the SDL library and just trying to get to grips with it.
I found a C++ conversion for Minecraft4k but it was based on SDL1.x so I'm trying to convert it to SDL2.0
At present the build is successful, but when it gets to;
plot(x, y, rgbmul(col, fxmul(br, ddist)));
It throws a read access violation exception:
screen was nullptr
This is my code;
// C++ port of Minecraft 4k JS (http://jsdo.it/notch/dB1E)
// By The8BitPimp
// See: the8bitpimp.wordpress.com
#include <SDL.h>
#include <math.h>
#include <windows.h>
#include <tchar.h>
#include "plot.h"
#include "llist.h"
const int w = 320;
const int h = 240;
SDL_Surface *screen = nullptr;
const float math_pi = 3.14159265359f;
static inline float math_sin(float x) {
return sinf(x);
}
static inline float math_cos(float x) {
return cosf(x);
}
// the texture map
int texmap[16 * 16 * 16 * 3];
// the voxel map
char map[64 * 64 * 64];
static inline int random(int max) {
return (rand() ^ (rand() << 16)) % max;
}
static inline void plot(int x, int y, int c) {
int *p = (int*)screen->pixels;
p[y * w + x] = c;
}
static void makeTextures(void) {
// each texture
for (int j = 0; j<16; j++) {
int k = 255 - random(96);
// each pixel in the texture
for (int m = 0; m<16 * 3; m++)
for (int n = 0; n<16; n++) {
int i1 = 0x966C4A;
int i2 = 0;
int i3 = 0;
if (j == 4)
i1 = 0x7F7F7F;
if ((j != 4) || (random(3) == 0))
k = 255 - random(96);
if (j == 1)
{
if (m < (((n * n * 3 + n * 81) >> 2) & 0x3) + 18)
i1 = 0x6AAA40;
else if (m < (((n * n * 3 + n * 81) >> 2) & 0x3) + 19)
k = k * 2 / 3;
}
if (j == 7)
{
i1 = 0x675231;
if ((n > 0) && (n < 15) && (((m > 0) && (m < 15)) || ((m > 32) && (m < 47))))
{
i1 = 0xBC9862;
i2 = n - 7;
i3 = (m & 0xF) - 7;
if (i2 < 0)
i2 = 1 - i2;
if (i3 < 0)
i3 = 1 - i3;
if (i3 > i2)
i2 = i3;
k = 196 - random(32) + i2 % 3 * 32;
}
else if (random(2) == 0)
k = k * (150 - (n & 0x1) * 100) / 100;
}
if (j == 5)
{
i1 = 0xB53A15;
if (((n + m / 4 * 4) % 8 == 0) || (m % 4 == 0))
i1 = 0xBCAFA5;
}
i2 = k;
if (m >= 32)
i2 /= 2;
if (j == 8)
{
i1 = 5298487;
if (random(2) == 0)
{
i1 = 0;
i2 = 255;
}
}
// fixed point colour multiply between i1 and i2
i3 =
((((i1 >> 16) & 0xFF) * i2 / 255) << 16) |
((((i1 >> 8) & 0xFF) * i2 / 255) << 8) |
((i1 & 0xFF) * i2 / 255);
// pack the colour away
texmap[n + m * 16 + j * 256 * 3] = i3;
}
}
}
static void makeMap(void) {
// add random blocks to the map
for (int x = 0; x < 64; x++) {
for (int y = 0; y < 64; y++) {
for (int z = 0; z < 64; z++) {
int i = (z << 12) | (y << 6) | x;
float yd = (y - 32.5) * 0.4;
float zd = (z - 32.5) * 0.4;
map[i] = random(16);
float th = random(256) / 256.0f;
if (th > sqrtf(sqrtf(yd * yd + zd * zd)) - 0.8f)
map[i] = 0;
}
}
}
}
static void init(void) {
makeTextures();
makeMap();
}
// fixed point byte byte multiply
static inline int fxmul(int a, int b) {
return (a*b) >> 8;
}
// fixed point 8bit packed colour multiply
static inline int rgbmul(int a, int b) {
int _r = (((a >> 16) & 0xff) * b) >> 8;
int _g = (((a >> 8) & 0xff) * b) >> 8;
int _b = (((a)& 0xff) * b) >> 8;
return (_r << 16) | (_g << 8) | _b;
}
static void render(void) {
float now = (float)(SDL_GetTicks() % 10000) / 10000.f;
float xRot = math_sin(now * math_pi * 2) * 0.4 + math_pi / 2;
float yRot = math_cos(now * math_pi * 2) * 0.4;
float yCos = math_cos(yRot);
float ySin = math_sin(yRot);
float xCos = math_cos(xRot);
float xSin = math_sin(xRot);
float ox = 32.5 + now * 64.0;
float oy = 32.5;
float oz = 32.5;
// for each column
for (int x = 0; x < w; x++) {
// get the x axis delta
float ___xd = ((float)x - (float)w / 2.f) / (float)h;
// for each row
for (int y = 0; y < h; y++) {
// get the y axis delta
float __yd = ((float)y - (float)h / 2.f) / (float)h;
float __zd = 1;
float ___zd = __zd * yCos + __yd * ySin;
float _yd = __yd * yCos - __zd * ySin;
float _xd = ___xd * xCos + ___zd * xSin;
float _zd = ___zd * xCos - ___xd * xSin;
int col = 0;
int br = 255;
float ddist = 0;
float closest = 32.f;
// for each principle axis x,y,z
for (int d = 0; d < 3; d++) {
float dimLength = _xd;
if (d == 1)
dimLength = _yd;
if (d == 2)
dimLength = _zd;
float ll = 1.0f / (dimLength < 0.f ? -dimLength : dimLength);
float xd = (_xd)* ll;
float yd = (_yd)* ll;
float zd = (_zd)* ll;
float initial = ox - floor(ox);
if (d == 1) initial = oy - floor(oy);
if (d == 2) initial = oz - floor(oz);
if (dimLength > 0) initial = 1 - initial;
float dist = ll * initial;
float xp = ox + xd * initial;
float yp = oy + yd * initial;
float zp = oz + zd * initial;
if (dimLength < 0) {
if (d == 0) xp--;
if (d == 1) yp--;
if (d == 2) zp--;
}
// while we are concidering a ray that is still closer then the best so far
while (dist < closest) {
// quantize to the map grid
int tex = map[(((int)zp & 63) << 12) | (((int)yp & 63) << 6) | ((int)xp & 63)];
// if this voxel has a texture applied
if (tex > 0) {
// find the uv coordinates of the intersection point
int u = ((int)((xp + zp) * 16.f)) & 15;
int v = ((int)(yp * 16.f) & 15) + 16;
// fix uvs for alternate directions?
if (d == 1) {
u = ((int)(xp * 16.f)) & 15;
v = (((int)(zp * 16.f)) & 15);
if (yd < 0)
v += 32;
}
// find the colour at the intersection point
int cc = texmap[u + v * 16 + tex * 256 * 3];
// if the colour is not transparent
if (cc > 0) {
col = cc;
ddist = 255 - ((dist / 32 * 255));
br = 255 * (255 - ((d + 2) % 3) * 50) / 255;
// we now have the closest hit point (also terminates this ray)
closest = dist;
}
}
// advance the ray
xp += xd;
yp += yd;
zp += zd;
dist += ll;
}
}
plot(x, y, rgbmul(col, fxmul(br, ddist)));
}
}
}
int main(int argc, char *argv[]) {
SDL_Init(SDL_INIT_EVERYTHING);
SDL_Window *screen;
screen = SDL_CreateWindow(
"Minecraft4k", // window title
SDL_WINDOWPOS_CENTERED, // initial x position
SDL_WINDOWPOS_CENTERED, // initial y position
320, // width, in pixels
240, // height, in pixels
SDL_WINDOW_OPENGL // flags - see below
);
SDL_Renderer* renderer;
renderer = SDL_CreateRenderer(screen, -1, SDL_RENDERER_ACCELERATED);
if (screen == nullptr) {
return 1;
}
init();
bool running = true;
while (running) {
SDL_Event event;
while (SDL_PollEvent(&event)) {
running &= (event.type != SDL_QUIT);
}
SDL_RenderPresent(renderer);
render();
}
SDL_DestroyWindow(screen);
SDL_Quit();
return 0;
}
When I actually run the code I do get a black screen, but the debugger lands on the line
plot(x, y, rgbmul(col, fxmul(br, ddist)));
in ;
static void render(void)
This is all just "for fun" so any information or guidance is appreciated.
You define screen twice (the first time as a global variable, the second time within your main), but you initialize it only once (within your main).
Because of that, the global variable screen actually is set to nullptr and plot fails trying to use it, as the error message states.

Square Root in C/C++

I am trying to implement my own square root function which gives square root's integral part only e.g. square root of 3 = 1.
I saw the method here and tried to implement the method
int mySqrt(int x)
{
int n = x;
x = pow(2, ceil(log(n) / log(2)) / 2);
int y=0;
while (y < x)
{
y = (x + n / x) / 2;
x = y;
}
return x;
}
The above method fails for input 8. Also, I don't get why it should work.
Also, I tried the method here
int mySqrt(int x)
{
if (x == 0) return 0;
int x0 = pow(2, (log(x) / log(2))/2) ;
int y = x0;
int diff = 10;
while (diff>0)
{
x0 = (x0 + x / x0) / 2; diff = y - x0;
y = x0;
if (diff<0) diff = diff * (-1);
}
return x0;
}
In this second way, for input 3 the loop continues ... indefinitely (x0 toggles between 1 and 2).
I am aware that both are essentially versions of Netwon's method but I can't figure out why they fail in certain cases and how could I make them work for all cases. I guess i have the correct logic in implementation. I debugged my code but still I can't find a way to make it work.
This one works for me:
uintmax_t zsqrt(uintmax_t x)
{
if(x==0) return 0;
uintmax_t yn = x; // The 'next' estimate
uintmax_t y = 0; // The result
uintmax_t yp; // The previous estimate
do{
yp = y;
y = yn;
yn = (y + x/y) >> 1; // Newton step
}while(yn ^ yp); // (yn != yp) shortcut for dumb compilers
return y;
}
returns floor(sqrt(x))
Instead of testing for 0 with a single estimate, test with 2 estimates.
When I was writing this, I noticed the result estimate would sometimes oscillate. This is because, if the exact result is a fraction, the algorithm could only jump between the two nearest values. So, terminating when the next estimate is the same as the previous will prevent an infinite loop.
Try this
int n,i;//n is the input number
i=0;
while(i<=n)
{
if((i*i)==n)
{
cout<<"The number has exact root : "<<i<<endl;
}
else if((i*i)>n)
{
cout<<"The integer part is "<<(i-1)<<endl;
}
i++;
}
Hope this helps.
You can try there C sqrt implementations :
// return the number that was multiplied by itself to reach N.
unsigned square_root_1(const unsigned num) {
unsigned a, b, c, d;
for (b = a = num, c = 1; a >>= 1; ++c);
for (c = 1 << (c & -2); c; c >>= 2) {
d = a + c;
a >>= 1;
if (b >= d)
b -= d, a += c;
}
return a;
}
// return the number that was multiplied by itself to reach N.
unsigned square_root_2(unsigned n){
unsigned a = n > 0, b;
if (n > 3)
for (a = n >> 1, b = (a + n / a) >> 1; b < a; a = b, b = (a + n / a) >> 1);
return a ;
}
Example of usage :
#include <assert.h>
int main(void){
unsigned num, res ;
num = 1847902954, res = square_root_1(num), assert(res == 42987);
num = 2, res = square_root_2(num), assert(res == 1);
num = 0, res = square_root_2(num), assert(res == 0);
}
Source

CUDA not returning result

I am trying to make a fraction calculator that calculates on a cuda devise, below is first the sequential version and then my try for a parallel version.
It runs without error, but for some reason do it not give the result back, I have been trying to get this to work for 2 weeks now, but can’t find the error!
Serilized version
int f(int x, int c, int n);
int gcd(unsigned int u, unsigned int v);
int main ()
{
clock_t start = clock();
srand ( time(NULL) );
int x = 1;
int y = 2;
int d = 1;
int c = rand() % 100;
int n = 323;
if(n % y == 0)
d = y;
while(d == 1)
{
x = f(x, c, n);
y = f(f(y, c, n), c, n);
int abs = x - y;
if(abs < 0)
abs = abs * -1;
d = gcd(abs, n);
if(d == n)
{
printf("\nd == n");
c = 0;
while(c == 0 || c == -2)
c = rand() % 100;
x = 2;
y = 2;
}
}
int d2 = n/d;
printf("\nTime elapsed: %f", ((double)clock() - start) / CLOCKS_PER_SEC);
printf("\nResult: %d", d);
printf("\nResult2: %d", d2);
int dummyReadForPause;
scanf_s("%d",&dummyReadForPause);
}
int f(int x, int c, int n)
{
return (int)(pow((float)x, 2) + c) % n;
}
int gcd(unsigned int u, unsigned int v){
int shift;
/ * GCD(0,x) := x * /
if (u == 0 || v == 0)
return u | v;
/ * Let shift := lg K, where K is the greatest power of 2
dividing both u and v. * /
for (shift = 0; ((u | v) & 1) == 0; ++shift) {
u >>= 1;
v >>= 1;
}
while ((u & 1) == 0)
u >>= 1;
/ * From here on, u is always odd. * /
do {
while ((v & 1) == 0) / * Loop X * /
v >>= 1;
/ * Now u and v are both odd, so diff(u, v) is even.
Let u = min(u, v), v = diff(u, v)/2. * /
if (u < v) {
v -= u;
} else {
int diff = u - v;
u = v;
v = diff;
}
v >>= 1;
} while (v != 0);
return u << shift;
}
parallel version
#define threads 512
#define MaxBlocks 65535
#define RunningTheads (512*100)
__device__ int gcd(unsigned int u, unsigned int v)
{
int shift;
if (u == 0 || v == 0)
return u | v;
for (shift = 0; ((u | v) & 1) == 0; ++shift) {
u >>= 1;
v >>= 1;
}
while ((u & 1) == 0)
u >>= 1;
do {
while ((v & 1) == 0)
v >>= 1;
if (u < v) {
v -= u;
} else {
int diff = u - v;
u = v;
v = diff;
}
v >>= 1;
} while (v != 0);
return u << shift;
}
__device__ bool cuda_found;
__global__ void cudaKernal(int *cArray, int n, int *outr)
{
int index = blockIdx.x * threads + threadIdx.x;
int x = 1;
int y = 2;
int d = 4;
int c = cArray[index];
while(d == 1 && !cuda_found)
{
x = (int)(pow((float)x, 2) + c) % n;
y = (int)(pow((float)y, 2) + c) % n;
y = (int)(pow((float)y, 2) + c) % n;
int abs = x - y;
if(abs < 0)
abs = abs * -1;
d = gcd(abs, n);
}
if(d != 1 && !cuda_found)
{
cuda_found = true;
outr = &d;
}
}
int main ()
{
int n = 323;
int cArray[RunningTheads];
cArray[0] = 1;
for(int i = 1; i < RunningTheads-1; i++)
{
cArray[i] = i+2;
}
int dresult = 0;
int *dev_cArray;
int *dev_result;
HANDLE_ERROR(cudaMalloc((void**)&dev_cArray, RunningTheads*sizeof(int)));
HANDLE_ERROR(cudaMalloc((void**)&dev_result, sizeof(int)));
HANDLE_ERROR(cudaMemcpy(dev_cArray, cArray, RunningTheads*sizeof(int), cudaMemcpyHostToDevice));
int TotalBlocks = ceil((float)RunningTheads/(float)threads);
if(TotalBlocks > MaxBlocks)
TotalBlocks = MaxBlocks;
printf("Blocks: %d\n", TotalBlocks);
printf("Threads: %d\n\n", threads);
cudaKernal<<<TotalBlocks,threads>>>(dev_cArray, n, dev_result);
HANDLE_ERROR(cudaMemcpy(&dresult, dev_result, sizeof(int), cudaMemcpyDeviceToHost));
HANDLE_ERROR(cudaFree(dev_cArray));
HANDLE_ERROR(cudaFree(dev_result));
if(dresult == 0)
dresult = 1;
int d2 = n/dresult;
printf("\nResult: %d", dresult);
printf("\nResult2: %d", d2);
int dummyReadForPause;
scanf_s("%d",&dummyReadForPause);
}
Lets have a look at your kernel code:
__global__ void cudaKernal(int *cArray, int n, int *outr)
{
int index = blockIdx.x * threads + threadIdx.x;
int x = 1;
int y = 2;
int d = 4;
int c = cArray[index];
while(d == 1 && !cuda_found) // always false because d is always 4
{
x = (int)(pow((float)x, 2) + c) % n;
y = (int)(pow((float)y, 2) + c) % n;
y = (int)(pow((float)y, 2) + c) % n;
int abs = x - y;
if(abs < 0)
abs = abs * -1;
d = gcd(abs, n); // never writes to d because the loop won't
// be executed
}
if(d != 1 && !cuda_found) // maybe true if cuda_found was initalized
// with false
{
cuda_found = true; // Memory race here.
outr = &d; // you are changing the adresse where outr
// points to; the host code does not see this
// change. your cudaMemcpy dev -> host will copy
// the exact values back from device that have
// been uploaded by cudaMemcpy host -> dev
// if you want to set outr to 4 than write:
// *outr = d;
}
}
One of the problems is you don't return the result. In your code you just change outr which has local scope in your kernel function (i.e. changes are not seen outside this function). You should write *outr = d; to change the value of memory you're pointing with outr.
and I'm not sure if CUDA initializes global variables with zero. I mean are you sure cuda_found is always initialized with false?