C++ - serialize double to binary file in little endian - c++

I'm trying to implement a function that writes double to binary file in little endian byte order.
So far I have class BinaryWriter implementation:
void BinaryWriter::open_file_stream( const String& path )
{
// open output stream
m_fstream.open( path.c_str(), std::ios_base::out | std::ios_base::binary);
m_fstream.imbue(std::locale::classic());
}
void BinaryWriter::write( int v )
{
char data[4];
data[0] = static_cast<char>(v & 0xFF);
data[1] = static_cast<char>((v >> 8) & 0xFF);
data[2] = static_cast<char>((v >> 16) & 0xFF);
data[3] = static_cast<char>((v >> 24) & 0xFF);
m_fstream.write(data, 4);
}
void BinaryWriter::write( double v )
{
// TBD
}
void BinaryWriter::write( int v ) was implemented using Sven answer to What is the correct way to output hex data to a file? post.
Not sure how to implement void BinaryWriter::write( double v ).
I tried naively follow void BinaryWriter::write( int v ) implementation but it didn't work. I guess I don't fully understand the implementation.
Thank you guys

You didn't write this, but I'm assuming the machine you're running on is BIG endian, otherwise writing a double is the same as writing an int, only it's 8 bytes.
const int __one__ = 1;
const bool isCpuLittleEndian = 1 == *(char*)(&__one__); // CPU endianness
const bool isFileLittleEndian = false; // output endianness - you choose :)
void BinaryWriter::write( double v )
{
if (isCpuLittleEndian ^ isFileLittleEndian) {
char data[8], *pDouble = (char*)(double*)(&v);
for (int i = 0; i < 8; ++i) {
data[i] = pDouble[7-i];
}
m_fstream.write(data, 8);
}
else
m_fstream.write((char*)(&v), 8);
}

But don't forget generally int is 4 octects and double is 8 octets.
Other problem is static_cast. See this example :
double d = 6.1;
char c = static_cast(d); //c == 6
Solution reinterpret value with pointer :
double d = 6.1;
char* c = reinterpret_cast<char*>(&d);
After, you can use write( Int_64 *v ), which is a extension from write( Int_t v ).
You can use this method with :
double d = 45612.9874
binary_writer.write64(reinterpret_cast<int_64*>(&d));
Don't forget size_of(double) depend of system.

A little program converting doubles to an IEEE little endian representation.
Besides the test in to_little_endian, it should work on any machine.
include <cmath>
#include <cstdint>
#include <cstring>
#include <iostream>
#include <limits>
#include <sstream>
#include <random>
bool to_little_endian(double value) {
enum { zero_exponent = 0x3ff };
uint8_t sgn = 0; // 1 bit
uint16_t exponent = 0; // 11 bits
uint64_t fraction = 0; // 52 bits
double d = value;
if(std::signbit(d)) {
sgn = 1;
d = -d;
}
if(std::isinf(d)) {
exponent = 0x7ff;
}
else if(std::isnan(d)) {
exponent = 0x7ff;
fraction = 0x8000000000000;
}
else if(d) {
int e;
double f = frexp(d, &e);
// A leading one is implicit.
// Hence one has has a zero fraction and the zero_exponent:
exponent = uint16_t(e + zero_exponent - 1);
unsigned bits = 0;
while(f) {
f *= 2;
fraction <<= 1;
if (1 <= f) {
fraction |= 1;
f -= 1;
}
++bits;
}
fraction = (fraction << (53 - bits)) & ((uint64_t(1) << 52) - 1);
}
// Little endian representation.
uint8_t data[sizeof(double)];
for(unsigned i = 0; i < 6; ++i) {
data[i] = fraction & 0xFF;
fraction >>= 8;
}
data[6] = (exponent << 4) | fraction;
data[7] = (sgn << 7) | (exponent >> 4);
// This test works on a little endian machine, only.
double result = *(double*) &data;
if(result == value || (std::isnan(result) && std::isnan(value))) return true;
else {
struct DoubleLittleEndian {
uint64_t fraction : 52;
uint64_t exp : 11;
uint64_t sgn : 1;
};
DoubleLittleEndian little_endian;
std::memcpy(&little_endian, &data, sizeof(double));
std::cout << std::hex
<< " Result: " << result << '\n'
<< "Fraction: " << little_endian.fraction << '\n'
<< " Exp: " << little_endian.exp << '\n'
<< " Sgn: " << little_endian.sgn << '\n'
<< std::endl;
std::memcpy(&little_endian, &value, sizeof(value));
std::cout << std::hex
<< " Value: " << value << '\n'
<< "Fraction: " << little_endian.fraction << '\n'
<< " Exp: " << little_endian.exp << '\n'
<< " Sgn: " << little_endian.sgn
<< std::endl;
return false;
}
}
int main()
{
to_little_endian(+1.0);
to_little_endian(+0.0);
to_little_endian(-0.0);
to_little_endian(+std::numeric_limits<double>::infinity());
to_little_endian(-std::numeric_limits<double>::infinity());
to_little_endian(std::numeric_limits<double>::quiet_NaN());
std::uniform_real_distribution<double> distribute(-100, +100);
std::default_random_engine random;
for (unsigned loop = 0; loop < 10000; ++loop) {
double value = distribute(random);
to_little_endian(value);
}
return 0;
}

Related

Convert decimal to a fraction [duplicate]

Given a decimal floating-point value, how can you find its fractional equivalent/approximation? For example:
as_fraction(0.1) -> 1/10
as_fraction(0.333333) -> 1/3
as_fraction(514.0/37.0) -> 514/37
Is there a general algorithm that can convert a decimal number to fractional form? How can this be implemented simply and efficiently in C++?
First get the fractional part and then take the gcd. Use the Euclidean algorithm http://en.wikipedia.org/wiki/Euclidean_algorithm
void foo(double input)
{
double integral = std::floor(input);
double frac = input - integral;
const long precision = 1000000000; // This is the accuracy.
long gcd_ = gcd(round(frac * precision), precision);
long denominator = precision / gcd_;
long numerator = round(frac * precision) / gcd_;
std::cout << integral << " + ";
std::cout << numerator << " / " << denominator << std::endl;
}
long gcd(long a, long b)
{
if (a == 0)
return b;
else if (b == 0)
return a;
if (a < b)
return gcd(a, b % a);
else
return gcd(b, a % b);
}
#include <iostream>
#include <valarray>
using namespace std;
void as_fraction(double number, int cycles = 10, double precision = 5e-4){
int sign = number > 0 ? 1 : -1;
number = number * sign; //abs(number);
double new_number,whole_part;
double decimal_part = number - (int)number;
int counter = 0;
valarray<double> vec_1{double((int) number), 1}, vec_2{1,0}, temporary;
while(decimal_part > precision & counter < cycles){
new_number = 1 / decimal_part;
whole_part = (int) new_number;
temporary = vec_1;
vec_1 = whole_part * vec_1 + vec_2;
vec_2 = temporary;
decimal_part = new_number - whole_part;
counter += 1;
}
cout<<"x: "<< number <<"\tFraction: " << sign * vec_1[0]<<'/'<< vec_1[1]<<endl;
}
int main()
{
as_fraction(3.142857);
as_fraction(0.1);
as_fraction(0.333333);
as_fraction(514.0/37.0);
as_fraction(1.17171717);
as_fraction(-1.17);
}
x: 3.14286 Fraction: 22/7
x: 0.1 Fraction: 1/10
x: 0.333333 Fraction: 1/3
x: 13.8919 Fraction: 514/37
x: 1.17172 Fraction: 116/99
x: 1.17 Fraction: -117/100
Sometimes you would want to approximate the decimal, without needing the equivalence. Eg pi=3.14159 is approximated as 22/7 or 355/113. We could use the cycles argument to obtain these:
as_fraction(3.14159, 1);
as_fraction(3.14159, 2);
as_fraction(3.14159, 3);
x: 3.14159 Fraction: 22/7
x: 3.14159 Fraction: 333/106
x: 3.14159 Fraction: 355/113
(Too long for a comment.)
Some comments claim that this is not possible. But I am of a contrary opinion.
I am of the opinion that it is possible in the right interpretation, but it is too easy to misstate the question or misunderstand the answer.
The question posed here is to find rational approximation(s) to a given floating point value.
This is certainly possible since floating point formats used in C++ can only store rational values, most often in the form of sign/mantissa/exponent. Taking IEEE-754 single precision format as an example (to keep the numbers simpler), 0.333 is stored as 1499698695241728 * 2^(-52). That is equivalent to the fraction 1499698695241728 / 2^52 whose convergents provide increasingly accurate approximations, all the way up to the original value: 1/3, 333/1000, 77590/233003, 5586813/16777216.
Two points of note here.
For a variable float x = 0.333; the best rational approximation is not necessarily 333 / 1000, since the stored value is not exactly 0.333 but rather 0.333000004291534423828125 because of the limited precision of the internal representation of floating points.
Once assigned, a floating point value has no memory of where it came from, or whether the source code had it defined as float x = 0.333; vs. float x = 0.333000004; because both of those values have the same internal representation. This is why the (related, but different) problem of separating a string representation of a number (for example, a user-entered value) into integer and fractional parts cannot be solved by first converting to floating point then running floating point calculations on the converted value.
[ EDIT ]   Following is the step-by-step detail of the 0.333f example.
The code to convert a float to an exact fraction.
#include <cfloat>
#include <cmath>
#include <limits>
#include <iostream>
#include <iomanip>
void flo2frac(float val, unsigned long long* num, unsigned long long* den, int* pwr)
{
float mul = std::powf(FLT_RADIX, FLT_MANT_DIG);
*den = (unsigned long long)mul;
*num = (unsigned long long)(std::frexp(val, pwr) * mul);
pwr -= FLT_MANT_DIG;
}
void cout_flo2frac(float val)
{
unsigned long long num, den; int pwr;
flo2frac(val, &num, &den, &pwr);
std::cout.precision(std::numeric_limits<float>::max_digits10);
std::cout << val << " = " << num << " / " << den << " * " << FLT_RADIX << "^(" << pwr << ")" << std::endl;
}
int main()
{
cout_flo2frac(0.333f);
}
Output.
0.333000004 = 11173626 / 16777216 * 2^(-1)
This gives the rational representation of float val = 0.333f; as 5586813/16777216.
What remains to be done is determine the convergents of the exact fraction, which can be done using integer calculations, only. The end result is (courtesy WA):
0, 1/3, 333/1000, 77590/233003, 5586813/16777216
I came up with an algorithm for this problem, but I think it is too lengthy and can be accomplished with less lines of code. Sorry about the poor indentation it is hard trying to align everything on overflow.
#include <iostream>
using namespace std;
// converts the string half of the inputed decimal number into numerical values void converting
(string decimalNumber, float&numerator, float& denominator )
{ float number; string valueAfterPoint =decimalNumber.substr(decimalNumber.find("." ((decimalNumber.length() -1) )); // store the value after the decimal into a valueAfterPoint
int length = valueAfterPoint.length(); //stores the length of the value after the decimal point into length
numerator = atof(valueAfterPoint.c_str()); // converts the string type decimal number into a float value and stores it into the numerator
// loop increases the decimal value of the numerator by multiples of ten as long as the length is above zero of the decimal
for (; length > 0; length--)
numerator *= 10;
do
denominator *=10;
while (denominator < numerator);
// simplifies the the converted values of the numerator and denominator into simpler values for an easier to read output
void simplifying (float& numerator, float& denominator) { int maximumNumber = 9; //Numbers in the tenths place can only range from zero to nine so the maximum number for a position in a position for the decimal number will be nine
bool isDivisble; // is used as a checker to verify whether the value of the numerator has the found the dividing number that will a value of zero
// Will check to see if the numerator divided denominator is will equal to zero
if(int(numerator) % int(denominator) == 0) {
numerator /= denominator;
denominator = 1;
return; }
//check to see if the maximum number is greater than the denominator to simplify to lowest form while (maximumNumber < denominator) { maximumNumber *=10; }
// the maximum number loops from nine to zero. This conditions stops if the function isDivisible is true
for(; maximumNumber > 0;maximumNumber --){
isDivisble = ((int(numerator) % maximumNumber == 0) && int(denominator)% maximumNumber == 0);
if(isDivisble)
{
numerator /= maximumNumber; // when is divisible true numerator be devided by the max number value for example 25/5 = numerator = 5
denominator /= maximumNumber; //// when is divisible true denominator be devided by themax number value for example 100/5 = denominator = 20
}
// stop value if numerator and denominator is lower than 17 than it is at the lowest value
int stop = numerator + denominator;
if (stop < 17)
{
return;
} } }
I agree completely with dxiv's solution but I needed a more general function (I threw in the signed stuff for fun because my use cases only included positive values):
#include <concepts>
/**
* \brief Multiply two numbers together checking for overflow.
* \tparam T The unsigned integral type to check for multiplicative overflow.
* \param a The multiplier.
* \param b The multicland.
* \return The result and a value indicating whether the multiplication
* overflowed.
*/
template<std::unsigned_integral T>
auto mul_overflow(T a, T b) -> std::tuple<T, bool>
{
size_t constexpr shift{ std::numeric_limits<T>::digits / 2 };
T constexpr mask{ (T{ 1 } << shift) - T{ 1 } };
T const a_high = a >> shift;
T const a_low = a & mask;
T const b_high = b >> shift;
T const b_low = b & mask;
T const low_low{ a_low * b_low };
if (!(a_high || b_high))
{
return { low_low, false };
}
bool overflowed = a_high && b_high;
T const low_high{ a_low * b_high };
T const high_low{ a_high * b_low };
T const ret{ low_low + ((low_high + high_low) << shift) };
return
{
ret,
overflowed
|| ret < low_low
|| (low_high >> shift) != 0
|| (high_low >> shift) != 0
};
}
/**
* \brief Converts a floating point value to a numerator and
* denominator pair.
*
* If the floating point value is larger than the maximum that the Tout
* type can hold, the results are silly.
*
* \tparam Tout The integral output type.
* \tparam Tin The floating point input type.
* \param f The value to convert to a numerator and denominator.
* \return The numerator and denominator.
*/
template <std::integral Tout, std::floating_point Tin>
auto to_fraction(Tin f) -> std::tuple<Tout, Tout>
{
const Tin multiplier
{
std::pow(std::numeric_limits<Tin>::radix,
std::numeric_limits<Tin>::digits)
};
uint64_t denominator{ static_cast<uint64_t>(multiplier) };
int power;
Tout num_fix{ 1 };
if constexpr (std::is_signed_v<Tout>)
{
num_fix = f < static_cast<Tin>(0) ? -1 : 1;
f = std::abs(f);
}
uint64_t numerator
{
static_cast<uint64_t>(std::frexp(f, &power) * multiplier)
};
uint64_t const factor
{
static_cast<uint64_t>(std::pow(
std::numeric_limits<Tin>::radix, std::abs(power)))
};
if (power > 0)
{
while(true)
{
auto const [res, overflow]{ mul_overflow(numerator, factor) };
if (!overflow)
{
numerator = res;
break;
}
numerator >>= 1;
denominator >>= 1;
}
}
else
{
while (true)
{
auto const [res, overflow]{ mul_overflow(denominator, factor) };
if (!overflow)
{
denominator = res;
break;
}
numerator >>= 1;
denominator >>= 1;
}
}
// get the results into the requested sized integrals.
while ((numerator > std::numeric_limits<Tout>::max()
|| denominator > std::numeric_limits<Tout>::max())
&& denominator > 1)
{
numerator >>= 1;
denominator >>= 1;
}
return
{
num_fix * static_cast<Tout>(numerator),
static_cast<Tout>(denominator)
};
}
You can call this like:
auto [n, d] { to_fraction<int8_t>(-124.777f) };
And you get n=-124, d=1;
auto [n, d] { to_fraction<uint64_t>(.33333333333333) };
gives n=6004799503160601, d=18014398509481984
#include<iostream>
#include<cmath>
#include<algorithm>
#include<functional>
#include<iomanip>
#include<string>
#include<vector>
#include <exception>
#include <sstream>
// note using std = c++11
// header section
#ifndef rational_H
#define rational_H
struct invalid : std::exception {
const char* what() const noexcept { return "not a number\n"; }};
struct Fraction {
public:
long long value{0};
long long numerator{0};
long long denominator{0};
}; Fraction F;
class fraction : public Fraction{
public:
fraction() {}
void ctf(double &);
void get_fraction(std::string& w, std::string& d, long double& n) {
F.value = (long long )n;
set_whole_part(w);
set_fraction_part(d);
make_fraction();
}
long long set_whole_part(std::string& w) {
return whole = std::stoll(w);
}
long long set_fraction_part(std::string& d) {
return decimal = std::stoll(d);
}
void make_fraction();
bool cmpf(long long&, long long&, const long double& epsilon);
int Euclids_method(long long&, long long&);
long long get_f_part() { return decimal; };
void convert(std::vector<long long>&);
bool is_negative{ false };
friend std::ostream& operator<<(std::ostream& os, fraction& ff);
struct get_sub_length;
private:
long long whole{ 0 };
long long decimal{ 0 };
};
#endif // rational_H
// definitions/source
struct get_sub_length {
size_t sub_len{};
size_t set_decimal_length(size_t& n) {
sub_len = n;
return sub_len;
}
size_t get_decimal_length() { return sub_len; }
}; get_sub_length slen;
struct coefficient {
std::vector<long long>coef;
}; coefficient C;
//compare's the value returned by convert with the original
// decimal value entered.
//if its within the tolarence of the epsilon consider it the best
//approximation you can get.
//feel free to experiment with the epsilon.
//for better results.
bool fraction::cmpf(long long& n1, long long& d1, const long double& epsilon = 0.0000005)
{
long double ex = pow(10, slen.get_decimal_length());
long long d = get_f_part(); // the original fractional part to use for comparison.
long double a = (long double)d / ex;
long double b = ((long double)d1 / (long double)n1);
if ((fabs(a - b) <= epsilon)) { return true; }
return false;
}
//Euclids algorithm returns the cofficients of a continued fraction through recursive division,
//for example: 0.375 -> 1/(375/1000) (note: for the fractional portion only).
// 1000/375 -> Remainder of 2.6666.... and 1000 -(2*375)=250,using only the integer value
// 375/250 -> Remainder of 1.5 and 375-(1*250)=125,
// 250/125 -> Remainder of 2.0 and 250-(2*125)=2
//the coefficients of the continued fraction are the integer values 2,1,2
// These are generally written [0;2,1,2] or [0;2,1,1,1] were 0 is the whole number value.
int fraction::Euclids_method(long long& n_dec, long long& exp)
{
long long quotient = 0;
if ((exp >= 1) && (n_dec != 0)) {
quotient = exp / n_dec;
C.coef.push_back(quotient);
long long divisor = n_dec;
long long dividend = exp - (quotient * n_dec);
Euclids_method(dividend, divisor); // recursive division
}
return 0;
}
// Convert is adding the elements stored in coef as a simple continued fraction
// which should result in a good approximation of the original decimal number.
void fraction::convert(std::vector<long long>& coef)
{
std::vector<long long>::iterator pos;
pos = C.coef.begin(), C.coef.end();
long long n1 = 0;
long long n2 = 1;
long long d1 = 1;
long long d2 = 0;
for_each(C.coef.begin(), C.coef.end(), [&](size_t pos) {
if (cmpf(n1, d1) == false) {
F.numerator = (n1 * pos) + n2;
n2 = n1;
n1 = F.numerator;
F.denominator = (d1 * pos) + d2;
d2 = d1;
d1 = F.denominator;
}
});
//flip the fraction back over to format the correct output.
F.numerator = d1;
F.denominator = n1;
}
// creates a fraction from the decimal component
// insures its in its abs form to ease calculations.
void fraction::make_fraction() {
size_t count = slen.get_decimal_length();
long long n_dec = decimal;
long long exp = (long long)pow(10, count);
Euclids_method(n_dec, exp);
convert(C.coef);
}
std::string get_w(const std::string& s)
{
std::string st = "0";
std::string::size_type pos;
pos = s.find(".");
if (pos - 1 == std::string::npos) {
st = "0";
return st;
}
else { st = s.substr(0, pos);
return st;
}
if (!(s.find("."))){
st = "0";
return st;
}
return st;
}
std::string get_d(const std::string& s)
{
std::string st = "0";
std::string::size_type pos;
pos = s.find(".");
if (pos == std::string::npos) {
st = "0";
return st;
}
std::string sub = s.substr(pos + 1);
st = sub;
size_t sub_len = sub.length();
slen.set_decimal_length(sub_len);
return st;
}
void fraction::ctf(double& nn)
{
//using stringstream for conversion to string
std::istringstream is;
is >> nn;
std::ostringstream os;
os << std::fixed << std::setprecision(14) << nn;
std::string s = os.str();
is_negative = false; //reset for loops
C.coef.erase(C.coef.begin(), C.coef.end()); //reset for loops
long double n = 0.0;
int m = 0;
//The whole number part will be seperated from the decimal part leaving a pure fraction.
//In such cases using Euclids agorithm would take the reciprocal 1/(n/exp) or exp/n.
//for pure continued fractions the cf must start with 0 + 1/(n+1/(n+...etc
//So the vector is initilized with zero as its first element.
C.coef.push_back(m);
std::cout << '\n';
if (s == "q") { // for loop structures
exit(0);
}
if (s.front() == '-') { // flag negative values.
is_negative = true; // represent nagative in output
s.erase(remove(s.begin(), s.end(), '-'), s.end()); // using abs
}
// w, d, seperate the string components
std::string w = get_w(s);
std::string d = get_d(s);
try
{
if (!(n = std::stold(s))) {throw invalid(); } // string_to_double()
get_fraction(w, d, n);
}
catch (std::exception& e) {
std::cout << e.what();
std::cout <<'\n'<< std::endl;
}
}
// The ostream formats and displays the various outputs
std::ostream& operator<<(std::ostream& os, fraction& f)
{
std::cout << '\n';
if (f.is_negative == true) {
os << "The coefficients are [" << '-' << f.whole << ";";
for (size_t i = 1; i < C.coef.size(); ++i) {
os << C.coef[i] << ',';
}
std::cout << "]" << '\n';
os << "The cf is: " << '-' << f.whole;
for (size_t i = 1; i < C.coef.size(); ++i) {
os << "+1/(" << C.coef[i];
}
for (size_t i = 1; i < C.coef.size(); ++i) {
os << ')';
}
std::cout << '\n';
if (F.value >= 1 && F.numerator == 0 && F.denominator == 1) {
F.numerator = abs(f.whole);
os << '-' << F.numerator << '/' << F.denominator << '\n';
return os;
}
else if (F.value == 0 && F.numerator == 0 && F.denominator == 1) {
os << F.numerator << '/' << F.denominator << '\n';
return os;
}
else if (F.value == 0 && F.numerator != 0 && F.denominator != 0) {
os << '-' << abs(F.numerator) << '/' << abs(F.denominator) << '\n';
return os;
}
else if (F.numerator == 0 && F.denominator == 0) {
os << '-' << f.whole << '\n';
return os;
}
else
os << '-' << (abs(f.whole) * abs(F.denominator) + abs(F.numerator)) << '/' << abs(F.denominator) << '\n';
}
if (f.is_negative == false) {
os << "The coefficients are [" << f.whole << ";";
for (size_t i = 1; i < C.coef.size(); ++i) {
os << C.coef[i] << ',';
}
std::cout << "]" << '\n';
os << "The cf is: " << f.whole;
for (size_t i = 1; i < C.coef.size(); ++i) {
os << "+1/(" << C.coef[i];
}
for (size_t i = 1; i < C.coef.size(); ++i) {
os << ')';
}
std::cout << '\n';
if (F.value >= 1 && F.numerator == 0 && F.denominator == 1) {
F.numerator = abs(f.whole);
os << F.numerator << '/' << F.denominator << '\n';
return os;
}
else if (F.value == 0 && F.numerator != 0 && F.denominator != 0) {
os << abs(F.numerator) << '/' << abs(F.denominator) << '\n';
return os;
}
else if (F.numerator == 0 && F.denominator == 0) {
os << f.whole << '\n';
return os;
}
else
os << (abs(f.whole) * abs(F.denominator) + abs(F.numerator)) << '/' << abs(F.denominator) << '\n';
os << f.whole << ' ' << F.numerator << '/' << F.denominator << '\n';
}
return os;
}
int main()
{
fraction f;
double s = 0;
std::cout << "Enter a number to convert to a fraction\n";
std::cout << "Enter a \"q\" to quit\n";
// uncomment for a loop
while (std::cin >> s) {
f.ctf(s);
std::cout << f << std::endl;
}
// comment out these lines if you want the loop
//std::cin >> s;
//f.ctf(s);
//std::cout << f << std::endl;
}

Float array to double array and back, quickly

I need to convert large arrays of float in memory to arrays of double and back. Are there any SSE compiler intrinsics in Visual C++ 15 update 3 that would help?
EDIT: it's a conversion between two wire formats, so #define won't help. A data structure is stored as floats, but a third party processing library expects an array of double.
You can use SSE for this:
float -> double: _mm_cvtps_pd
double -> float: _mm_cvtpd_ps
Try a simple scalar loop first though as (a) the compiler may vectorize for you anyway and (b) you may well be memory-bound, so SIMD optimisation may not help much.
This is not an actual answer to your question but just a example how to make only ALU work on conversion. You can parallel it with FPU cast to get more speed if you implement it properly. This solution should be 100% IEEE compatible.
Update: I make this slower and more readable but IEEE compatible as intel implement it in 3rd generation of i7 (to the point where even NAN conversion is binary equale)
#include <iostream>
#include <chrono>
#include <math.h>
void toDouble(float *inData, double *outData, int count)
{
if (count % 2)
{
std::cout << "Error count must be divided by 2" << std::endl;
return;
}
unsigned long long *pfData = (unsigned long long *)(inData);
unsigned long long *pdData = (unsigned long long *)(outData);
unsigned long long *pfDataEnd = pfData + count / 2;
for (int i = 0; pfData<pfDataEnd; pfData++, pdData++, i += 2)
{
unsigned long long cl;
unsigned long long S1 = (*pfData & 0x80000000ull) << 32;
unsigned long long fE1 = (*pfData & 0x7F800000ull) << 32;
unsigned long long F1 = (*pfData & 0x007FFFFFull) << 29;
for (cl = 0; !fE1 && F1 && !(F1 & 0x7FF0000000000000ull); cl++)
F1 <<= 1;
if (cl > 0)
cl--;
unsigned long long dE1 = (fE1 == 0x7F80000000000000ull) ? 0x7FF0000000000000 : ((fE1 | F1) ? (fE1 >> 3) + 0x3800000000000000ull - cl * 0x0010000000000000ull : 0ull);
F1 &= 0x000FFFFFFFFFFFFFull;
*pdData = S1 | dE1 | F1;
pdData++;
unsigned long long S2 = *pfData & 0x8000000000000000ull;
unsigned long long fE2 = (*pfData & 0x7F80000000000000ull);
unsigned long long F2 = (*pfData & 0x007FFFFF00000000ull) >> 3;
for (cl = 0; !fE2 && F2 && !(F2 & 0x7FF0000000000000ull); cl++)
F2 <<= 1;
if (cl > 0)
cl--;
unsigned long long dE2 = (fE2==0x7F80000000000000ull) ? 0x7FF0000000000000 : ( (fE2 | F2) ? (fE2 >> 3) + 0x3800000000000000ull - cl * 0x0010000000000000ull : 0ull);
F2 &= 0x000FFFFFFFFFFFFFull;
*pdData = S2 | dE2 | F2;
if (i == 126)
continue;
}
}
void toFloat(double *inData, float *outData, int count)
{
if (count % 2)
{
std::cout << "Error count must be divided by 2" << std::endl;
return;
}
unsigned long long *pdData = (unsigned long long *)(inData);
unsigned long long *pfData = (unsigned long long *)(outData);
unsigned long long *pfDataEnd = pfData + count / 2;
for (int i=0; pfData<pfDataEnd; pfData++, pdData+=2,i+=2)
{
unsigned long long S1 = (*pdData & 0x8000000000000000ull);
unsigned long long dE1 = (*pdData & 0x7FF0000000000000ull);
unsigned long long fE1 = (dE1 <= 0x3800000000000000ull) ? 0ull : ((dE1 >= 0x4800000000000000ull) ? 0x0FF0000000000000ull : (dE1 - 0x3800000000000000ull));
unsigned long long F1 = (dE1 <= 0x3800000000000000ull) ? ((dE1 < 0x3600000000000000ull) ? 0ull : ((*pdData & 0x000FFFFFFFFFFFFFull | 0x0010000000000000ull) >> ((0x3800000000000000ull - dE1 >> 52) + 1))) : ((dE1 >= 0x47F0000000000000ull) ? (((dE1 == 0x7FF0000000000000ull) && (*pdData & 0x000FFFFFFFFFFFFFull)) ? 0x0008000000000000ull : 0ull) : (*pdData & 0x000FFFFFFFFFFFFFull));
F1 += (((F1 & 0x0000000010000000ull) && ((F1 & 0x0000000020000000ull) || (F1 & 0x000000000FFFFFFFull))) ? 0x0000000020000000ull : 0ull); //rounding
fE1 += F1 & 0x7FF0000000000000ull;
F1 &= 0x000FFFFFE0000000ull;
unsigned long long S2 = (*(pdData+1) & 0x8000000000000000ull);
unsigned long long dE2 = (*(pdData+1) & 0x7FF0000000000000ull);
unsigned long long fE2 = ( dE2 <= 0x3800000000000000ull) ? 0ull : ((dE2 >= 0x4800000000000000ull) ? 0x0FF0000000000000ull : (dE2 - 0x3800000000000000ull));
unsigned long long F2 = (dE2 <= 0x3800000000000000ull) ? ((dE2 < 0x3600000000000000ull) ? 0ull : ((*(pdData + 1) & 0x000FFFFFFFFFFFFFull | 0x0010000000000000ull) >> ((0x3800000000000000ull - dE2 >> 52) + 1))) : ((dE2 >= 0x47F0000000000000ull) ? (((dE2 == 0x7FF0000000000000ull) && (*(pdData+1) & 0x000FFFFFFFFFFFFFull)) ? 0x0008000000000000ull : 0ull) : (*(pdData + 1) & 0x000FFFFFFFFFFFFFull));
F2 += (((F2 & 0x0000000010000000ull) && ((F2 & 0x0000000020000000ull) || (F2 & 0x000000000FFFFFFFull))) ? 0x0000000020000000ull : 0ull); //rounding
fE2 += F2 & 0x7FF0000000000000ull;
F2 &= 0x000FFFFFE0000000ull;
*pfData = S2 | ((fE2 | F2) << 3) | ((S1 | ((fE1 | F1) << 3)) >> 32);
if (i == 88)
continue;
}
}
int valTestFtoD(float *inData, double *outData, int count)
{
for (int i = 0; i < count; i++)
{
if ((((double)inData[i]) != outData[i]) && ((inData[i] == inData[i]) || (outData[i] == outData[i])))
return i;
}
return -1;
}
int valTestDtoF(double *inData, float*outData, int count)
{
for (int i = 0; i < count; i++)
{
if ((((float)inData[i]) != outData[i]) && ((inData[i] == inData[i]) || (outData[i] == outData[i])))
return i;
}
return -1;
}
void testFloatToDouble()
{
std::cout << "\nSTART Float to Double TEST\n";
int elemNum = 1024 * 1024 * 8;
float *f_arr = new float[elemNum];
double *d_arr = new double[elemNum];
auto start = std::chrono::steady_clock::now();
f_arr[0] = 2.0f;
for (int i = 1; i < elemNum; i++)
{
f_arr[i] = i / f_arr[i - 1];
d_arr[i] = 0.0f;
}
long long duration = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - start).count();
std::cout << "init of floats and doubles done in " << duration << std::endl;
start = std::chrono::steady_clock::now();
for (int i = 0; i < elemNum; i++)
{
d_arr[i] = f_arr[i];
}
duration = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - start).count();
std::cout << "cast to double done in " << duration << std::endl;
start = std::chrono::steady_clock::now();
float pi = 3.14159265358979323846;
float e = 2.71828182845904523536;
f_arr[0] = pi;
d_arr[0] = 0.0;
for (int i = 1; i < elemNum; i++)
{
f_arr[i] = (e + i) / f_arr[i - 1];
d_arr[i] = 0.0;
}
duration = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - start).count();
std::cout << "init of floats and doubles done in " << duration << std::endl;
start = std::chrono::steady_clock::now();
toDouble(f_arr, d_arr, elemNum);
duration = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - start).count();
std::cout << "toDouble done in " << duration << std::endl;
std::cout << "toDouble validation test ";
int errorPos = valTestFtoD(f_arr, d_arr, elemNum);
if (errorPos < 0)
std::cout << "OK" << std::endl;
else
{
std::cout << "FAIL at " << errorPos << std::endl;
std::cout << "float [" << errorPos << "]= " << f_arr[errorPos] << std::endl;
std::cout << "double[" << errorPos << "]= " << d_arr[errorPos] << std::endl;
}
delete[] f_arr;
delete[] d_arr;
std::cout << "END TEST\n";
}
void testDoubleToFloat()
{
std::cout << "\nSTART Double to Float TEST\n";
int elemNum = 1024 *1024 * 8;
float *f_arr = new float[elemNum];
double *d_arr = new double[elemNum];
auto start = std::chrono::steady_clock::now();
d_arr[0] = 2.0f;
for (int i = 1; i < elemNum; i++)
{
d_arr[i] = i / d_arr[i - 1];
f_arr[i] = 0.0f;
}
long long duration = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - start).count();
std::cout << "init of floats and doubles done in " << duration << std::endl;
start = std::chrono::steady_clock::now();
for (int i = 0; i < elemNum; i++)
{
f_arr[i] = (float)d_arr[i];
}
duration = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - start).count();
std::cout << "cast to float done in " << duration << std::endl;
start = std::chrono::steady_clock::now();
double pi = 3.14159265358979323846;
double e = 2.71828182845904523536;
d_arr[0] = pi;
f_arr[0] = 0.0f;
for (int i = 1; i < elemNum; i++)
{
d_arr[i] = (e+i) / d_arr[i - 1];
f_arr[i] = 0.0f;
}
duration = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - start).count();
std::cout << "init of floats and doubles done in " << duration << std::endl;
start = std::chrono::steady_clock::now();
toFloat(d_arr, f_arr, elemNum);
duration = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - start).count();
std::cout << "toFloat done in " << duration << std::endl;
std::cout << "toFloat validation test ";
int errorPos = valTestDtoF(d_arr, f_arr, elemNum);
if (errorPos < 0)
std::cout << "OK" << std::endl;
else
{
std::cout << "FAIL at " << errorPos << std::endl;
std::cout << "double[" << errorPos << "]= " << d_arr[errorPos] << std::endl;
std::cout << "float[" << errorPos << "]= " << f_arr[errorPos] << std::endl;
}
delete[] f_arr;
delete[] d_arr;
std::cout << "END TEST\n";
}
int main()
{
testFloatToDouble();
testDoubleToFloat();
}
online example
A data structure is stored as floats, but a third party processing library expects an array of double.
Can it process in cache-size chunks?
If it wasn't stuck in a 3rd-party library, the best thing would be to convert on the fly, loading a pair of doubles from a pair of floats with _mm_cvtps_pd, and similarly storing back to float, so you never have an array of double in memory.
But if you can't do that, you can at least feed the data to the library while it's still hot in L1 or L2 cache after reading some floats and writing some doubles.
Actually, if it's a "wire format", then presumably the data has to go through the CPU on the way to memory in the first place, unless you have a zero-copy receive API that DMAs right into your buffer. The ideal place for conversion might be in small chunks as you receive each packet. Either copy with conversion directly to double, or copy to both float and double arrays if you also need the original float data.

How to take input 128 bit unsigned integer in c++

I am new to c++. I want to take input a unsigned 128 bit integer using scanf and print it using printf. As I am new to c++ , I only know these two methods for input output. Can someone help me out?
You could use boost, but this library set must be installed yourself:
#include <boost/multiprecision/cpp_int.hpp>
#include <iostream>
int main()
{
using namespace boost::multiprecision;
uint128_t v = 0;
std::cin >> v; // read
std::cout << v << std::endl; // write
return 0;
}
If you want to get along without boost, you can store the value into two uint64_t as such:
std::string input;
std::cin >> input;
uint64_t high = 0, low = 0, tmp;
for(char c : input)
{
high *= 10;
tmp = low * 10;
if(tmp / 10 != low)
{
high += ((low >> 32) * 10 + ((low & 0xf) * 10 >> 32)) >> 32;
}
low = tmp;
tmp = low + c - '0';
high += tmp < low;
low = tmp;
}
Printing then, however, gets more ugly:
std::vector<uint64_t> v;
while(high | low)
{
uint64_t const pow10 = 100000000;
uint64_t const mod = (((uint64_t)1 << 32) % pow10) * (((uint64_t)1 << 32) % pow10) % pow10;
tmp = high % pow10;
uint64_t temp = tmp * mod % pow10 + low % pow10;
v.push_back((tmp * mod + low) % pow10);
low = low / pow10 + tmp * 184467440737 + tmp * /*0*/9551616 / pow10 + (temp >= pow10);
high /= pow10;
}
std::vector<uint64_t>::reverse_iterator i = v.rbegin();
while(i != v.rend() && *i == 0)
{
++i;
}
if(i == v.rend())
{
std::cout << 0;
}
else
{
std::cout << *i << std::setfill('0');
for(++i; i != v.rend(); ++i)
{
std::cout << std::setw(8) << *i;
}
}
Above solution works up to (including)
340282366920938463463374516198409551615
= 0x ffff ffff ffff ffff ffff ad06 1410 beff
Above, there is an error.
Note: pow10 can be varied, then some other constants need to be adjusted, e. g. pow10 = 10:
low = low / pow10 + tmp * 1844674407370955161 + tmp * 6 / pow10 + (temp >= pow10);
and
std::cout << std::setw(1) << *i; // setw also can be dropped in this case
Increasing results in reducing the maximum number for which printing still works correctly, decreasing raises the maximum. With pow10 = 10, maximum is
340282366920938463463374607431768211425
= ffff ffff ffff ffff ffff ffff ffff ffe1
I don't know where the error for the very highest numbers comes from, yet, possibly some unconsidered overflow. Any suggestions appreciated, then I'll improve the algorithm. Until then, I'd reduce pow10 to 10 and introduce a special handling for the highest 30 failing numbers:
std::string const specialValues[0] = { /*...*/ };
if(high == 0xffffffffffffffff && low > 0xffffffffffffffe1)
{
std::cout << specialValues[low - 0xffffffffffffffe2];
}
else
{
/* ... */
}
So at least, we can handle all valid 128-bit values correctly.
You can try from_string_128_bits and to_string_128_bits with 128 bits unsigned integers in C :
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
__uint128_t from_string_128_bits(const char *str) {
__uint128_t res = 0;
for (; *str; res = res * 10 + *str++ - '0');
return res;
}
static char *to_string_128_bits(__uint128_t num) {
__uint128_t mask = -1;
size_t a, b, c = 1, d;
char *s = malloc(2);
strcpy(s, "0");
for (mask -= mask / 2; mask; mask >>= 1) {
for (a = (num & mask) != 0, b = c; b;) {
d = ((s[--b] - '0') << 1) + a;
s[b] = "0123456789"[d % 10];
a = d / 10;
}
for (; a; s = realloc(s, ++c + 1), memmove(s + 1, s, c), *s = "0123456789"[a % 10], a /= 10);
}
return s;
}
int main(void) {
__uint128_t n = from_string_128_bits("10000000000000000000000000000000000001");
n *= 7;
char *s = to_string_128_bits(n);
puts(s);
free(s); // string must be freed
// print 70000000000000000000000000000000000007
}

Converting a floating-point decimal value to a fraction

Given a decimal floating-point value, how can you find its fractional equivalent/approximation? For example:
as_fraction(0.1) -> 1/10
as_fraction(0.333333) -> 1/3
as_fraction(514.0/37.0) -> 514/37
Is there a general algorithm that can convert a decimal number to fractional form? How can this be implemented simply and efficiently in C++?
First get the fractional part and then take the gcd. Use the Euclidean algorithm http://en.wikipedia.org/wiki/Euclidean_algorithm
void foo(double input)
{
double integral = std::floor(input);
double frac = input - integral;
const long precision = 1000000000; // This is the accuracy.
long gcd_ = gcd(round(frac * precision), precision);
long denominator = precision / gcd_;
long numerator = round(frac * precision) / gcd_;
std::cout << integral << " + ";
std::cout << numerator << " / " << denominator << std::endl;
}
long gcd(long a, long b)
{
if (a == 0)
return b;
else if (b == 0)
return a;
if (a < b)
return gcd(a, b % a);
else
return gcd(b, a % b);
}
#include <iostream>
#include <valarray>
using namespace std;
void as_fraction(double number, int cycles = 10, double precision = 5e-4){
int sign = number > 0 ? 1 : -1;
number = number * sign; //abs(number);
double new_number,whole_part;
double decimal_part = number - (int)number;
int counter = 0;
valarray<double> vec_1{double((int) number), 1}, vec_2{1,0}, temporary;
while(decimal_part > precision & counter < cycles){
new_number = 1 / decimal_part;
whole_part = (int) new_number;
temporary = vec_1;
vec_1 = whole_part * vec_1 + vec_2;
vec_2 = temporary;
decimal_part = new_number - whole_part;
counter += 1;
}
cout<<"x: "<< number <<"\tFraction: " << sign * vec_1[0]<<'/'<< vec_1[1]<<endl;
}
int main()
{
as_fraction(3.142857);
as_fraction(0.1);
as_fraction(0.333333);
as_fraction(514.0/37.0);
as_fraction(1.17171717);
as_fraction(-1.17);
}
x: 3.14286 Fraction: 22/7
x: 0.1 Fraction: 1/10
x: 0.333333 Fraction: 1/3
x: 13.8919 Fraction: 514/37
x: 1.17172 Fraction: 116/99
x: 1.17 Fraction: -117/100
Sometimes you would want to approximate the decimal, without needing the equivalence. Eg pi=3.14159 is approximated as 22/7 or 355/113. We could use the cycles argument to obtain these:
as_fraction(3.14159, 1);
as_fraction(3.14159, 2);
as_fraction(3.14159, 3);
x: 3.14159 Fraction: 22/7
x: 3.14159 Fraction: 333/106
x: 3.14159 Fraction: 355/113
(Too long for a comment.)
Some comments claim that this is not possible. But I am of a contrary opinion.
I am of the opinion that it is possible in the right interpretation, but it is too easy to misstate the question or misunderstand the answer.
The question posed here is to find rational approximation(s) to a given floating point value.
This is certainly possible since floating point formats used in C++ can only store rational values, most often in the form of sign/mantissa/exponent. Taking IEEE-754 single precision format as an example (to keep the numbers simpler), 0.333 is stored as 1499698695241728 * 2^(-52). That is equivalent to the fraction 1499698695241728 / 2^52 whose convergents provide increasingly accurate approximations, all the way up to the original value: 1/3, 333/1000, 77590/233003, 5586813/16777216.
Two points of note here.
For a variable float x = 0.333; the best rational approximation is not necessarily 333 / 1000, since the stored value is not exactly 0.333 but rather 0.333000004291534423828125 because of the limited precision of the internal representation of floating points.
Once assigned, a floating point value has no memory of where it came from, or whether the source code had it defined as float x = 0.333; vs. float x = 0.333000004; because both of those values have the same internal representation. This is why the (related, but different) problem of separating a string representation of a number (for example, a user-entered value) into integer and fractional parts cannot be solved by first converting to floating point then running floating point calculations on the converted value.
[ EDIT ]   Following is the step-by-step detail of the 0.333f example.
The code to convert a float to an exact fraction.
#include <cfloat>
#include <cmath>
#include <limits>
#include <iostream>
#include <iomanip>
void flo2frac(float val, unsigned long long* num, unsigned long long* den, int* pwr)
{
float mul = std::powf(FLT_RADIX, FLT_MANT_DIG);
*den = (unsigned long long)mul;
*num = (unsigned long long)(std::frexp(val, pwr) * mul);
pwr -= FLT_MANT_DIG;
}
void cout_flo2frac(float val)
{
unsigned long long num, den; int pwr;
flo2frac(val, &num, &den, &pwr);
std::cout.precision(std::numeric_limits<float>::max_digits10);
std::cout << val << " = " << num << " / " << den << " * " << FLT_RADIX << "^(" << pwr << ")" << std::endl;
}
int main()
{
cout_flo2frac(0.333f);
}
Output.
0.333000004 = 11173626 / 16777216 * 2^(-1)
This gives the rational representation of float val = 0.333f; as 5586813/16777216.
What remains to be done is determine the convergents of the exact fraction, which can be done using integer calculations, only. The end result is (courtesy WA):
0, 1/3, 333/1000, 77590/233003, 5586813/16777216
I came up with an algorithm for this problem, but I think it is too lengthy and can be accomplished with less lines of code. Sorry about the poor indentation it is hard trying to align everything on overflow.
#include <iostream>
using namespace std;
// converts the string half of the inputed decimal number into numerical values void converting
(string decimalNumber, float&numerator, float& denominator )
{ float number; string valueAfterPoint =decimalNumber.substr(decimalNumber.find("." ((decimalNumber.length() -1) )); // store the value after the decimal into a valueAfterPoint
int length = valueAfterPoint.length(); //stores the length of the value after the decimal point into length
numerator = atof(valueAfterPoint.c_str()); // converts the string type decimal number into a float value and stores it into the numerator
// loop increases the decimal value of the numerator by multiples of ten as long as the length is above zero of the decimal
for (; length > 0; length--)
numerator *= 10;
do
denominator *=10;
while (denominator < numerator);
// simplifies the the converted values of the numerator and denominator into simpler values for an easier to read output
void simplifying (float& numerator, float& denominator) { int maximumNumber = 9; //Numbers in the tenths place can only range from zero to nine so the maximum number for a position in a position for the decimal number will be nine
bool isDivisble; // is used as a checker to verify whether the value of the numerator has the found the dividing number that will a value of zero
// Will check to see if the numerator divided denominator is will equal to zero
if(int(numerator) % int(denominator) == 0) {
numerator /= denominator;
denominator = 1;
return; }
//check to see if the maximum number is greater than the denominator to simplify to lowest form while (maximumNumber < denominator) { maximumNumber *=10; }
// the maximum number loops from nine to zero. This conditions stops if the function isDivisible is true
for(; maximumNumber > 0;maximumNumber --){
isDivisble = ((int(numerator) % maximumNumber == 0) && int(denominator)% maximumNumber == 0);
if(isDivisble)
{
numerator /= maximumNumber; // when is divisible true numerator be devided by the max number value for example 25/5 = numerator = 5
denominator /= maximumNumber; //// when is divisible true denominator be devided by themax number value for example 100/5 = denominator = 20
}
// stop value if numerator and denominator is lower than 17 than it is at the lowest value
int stop = numerator + denominator;
if (stop < 17)
{
return;
} } }
I agree completely with dxiv's solution but I needed a more general function (I threw in the signed stuff for fun because my use cases only included positive values):
#include <concepts>
/**
* \brief Multiply two numbers together checking for overflow.
* \tparam T The unsigned integral type to check for multiplicative overflow.
* \param a The multiplier.
* \param b The multicland.
* \return The result and a value indicating whether the multiplication
* overflowed.
*/
template<std::unsigned_integral T>
auto mul_overflow(T a, T b) -> std::tuple<T, bool>
{
size_t constexpr shift{ std::numeric_limits<T>::digits / 2 };
T constexpr mask{ (T{ 1 } << shift) - T{ 1 } };
T const a_high = a >> shift;
T const a_low = a & mask;
T const b_high = b >> shift;
T const b_low = b & mask;
T const low_low{ a_low * b_low };
if (!(a_high || b_high))
{
return { low_low, false };
}
bool overflowed = a_high && b_high;
T const low_high{ a_low * b_high };
T const high_low{ a_high * b_low };
T const ret{ low_low + ((low_high + high_low) << shift) };
return
{
ret,
overflowed
|| ret < low_low
|| (low_high >> shift) != 0
|| (high_low >> shift) != 0
};
}
/**
* \brief Converts a floating point value to a numerator and
* denominator pair.
*
* If the floating point value is larger than the maximum that the Tout
* type can hold, the results are silly.
*
* \tparam Tout The integral output type.
* \tparam Tin The floating point input type.
* \param f The value to convert to a numerator and denominator.
* \return The numerator and denominator.
*/
template <std::integral Tout, std::floating_point Tin>
auto to_fraction(Tin f) -> std::tuple<Tout, Tout>
{
const Tin multiplier
{
std::pow(std::numeric_limits<Tin>::radix,
std::numeric_limits<Tin>::digits)
};
uint64_t denominator{ static_cast<uint64_t>(multiplier) };
int power;
Tout num_fix{ 1 };
if constexpr (std::is_signed_v<Tout>)
{
num_fix = f < static_cast<Tin>(0) ? -1 : 1;
f = std::abs(f);
}
uint64_t numerator
{
static_cast<uint64_t>(std::frexp(f, &power) * multiplier)
};
uint64_t const factor
{
static_cast<uint64_t>(std::pow(
std::numeric_limits<Tin>::radix, std::abs(power)))
};
if (power > 0)
{
while(true)
{
auto const [res, overflow]{ mul_overflow(numerator, factor) };
if (!overflow)
{
numerator = res;
break;
}
numerator >>= 1;
denominator >>= 1;
}
}
else
{
while (true)
{
auto const [res, overflow]{ mul_overflow(denominator, factor) };
if (!overflow)
{
denominator = res;
break;
}
numerator >>= 1;
denominator >>= 1;
}
}
// get the results into the requested sized integrals.
while ((numerator > std::numeric_limits<Tout>::max()
|| denominator > std::numeric_limits<Tout>::max())
&& denominator > 1)
{
numerator >>= 1;
denominator >>= 1;
}
return
{
num_fix * static_cast<Tout>(numerator),
static_cast<Tout>(denominator)
};
}
You can call this like:
auto [n, d] { to_fraction<int8_t>(-124.777f) };
And you get n=-124, d=1;
auto [n, d] { to_fraction<uint64_t>(.33333333333333) };
gives n=6004799503160601, d=18014398509481984
#include<iostream>
#include<cmath>
#include<algorithm>
#include<functional>
#include<iomanip>
#include<string>
#include<vector>
#include <exception>
#include <sstream>
// note using std = c++11
// header section
#ifndef rational_H
#define rational_H
struct invalid : std::exception {
const char* what() const noexcept { return "not a number\n"; }};
struct Fraction {
public:
long long value{0};
long long numerator{0};
long long denominator{0};
}; Fraction F;
class fraction : public Fraction{
public:
fraction() {}
void ctf(double &);
void get_fraction(std::string& w, std::string& d, long double& n) {
F.value = (long long )n;
set_whole_part(w);
set_fraction_part(d);
make_fraction();
}
long long set_whole_part(std::string& w) {
return whole = std::stoll(w);
}
long long set_fraction_part(std::string& d) {
return decimal = std::stoll(d);
}
void make_fraction();
bool cmpf(long long&, long long&, const long double& epsilon);
int Euclids_method(long long&, long long&);
long long get_f_part() { return decimal; };
void convert(std::vector<long long>&);
bool is_negative{ false };
friend std::ostream& operator<<(std::ostream& os, fraction& ff);
struct get_sub_length;
private:
long long whole{ 0 };
long long decimal{ 0 };
};
#endif // rational_H
// definitions/source
struct get_sub_length {
size_t sub_len{};
size_t set_decimal_length(size_t& n) {
sub_len = n;
return sub_len;
}
size_t get_decimal_length() { return sub_len; }
}; get_sub_length slen;
struct coefficient {
std::vector<long long>coef;
}; coefficient C;
//compare's the value returned by convert with the original
// decimal value entered.
//if its within the tolarence of the epsilon consider it the best
//approximation you can get.
//feel free to experiment with the epsilon.
//for better results.
bool fraction::cmpf(long long& n1, long long& d1, const long double& epsilon = 0.0000005)
{
long double ex = pow(10, slen.get_decimal_length());
long long d = get_f_part(); // the original fractional part to use for comparison.
long double a = (long double)d / ex;
long double b = ((long double)d1 / (long double)n1);
if ((fabs(a - b) <= epsilon)) { return true; }
return false;
}
//Euclids algorithm returns the cofficients of a continued fraction through recursive division,
//for example: 0.375 -> 1/(375/1000) (note: for the fractional portion only).
// 1000/375 -> Remainder of 2.6666.... and 1000 -(2*375)=250,using only the integer value
// 375/250 -> Remainder of 1.5 and 375-(1*250)=125,
// 250/125 -> Remainder of 2.0 and 250-(2*125)=2
//the coefficients of the continued fraction are the integer values 2,1,2
// These are generally written [0;2,1,2] or [0;2,1,1,1] were 0 is the whole number value.
int fraction::Euclids_method(long long& n_dec, long long& exp)
{
long long quotient = 0;
if ((exp >= 1) && (n_dec != 0)) {
quotient = exp / n_dec;
C.coef.push_back(quotient);
long long divisor = n_dec;
long long dividend = exp - (quotient * n_dec);
Euclids_method(dividend, divisor); // recursive division
}
return 0;
}
// Convert is adding the elements stored in coef as a simple continued fraction
// which should result in a good approximation of the original decimal number.
void fraction::convert(std::vector<long long>& coef)
{
std::vector<long long>::iterator pos;
pos = C.coef.begin(), C.coef.end();
long long n1 = 0;
long long n2 = 1;
long long d1 = 1;
long long d2 = 0;
for_each(C.coef.begin(), C.coef.end(), [&](size_t pos) {
if (cmpf(n1, d1) == false) {
F.numerator = (n1 * pos) + n2;
n2 = n1;
n1 = F.numerator;
F.denominator = (d1 * pos) + d2;
d2 = d1;
d1 = F.denominator;
}
});
//flip the fraction back over to format the correct output.
F.numerator = d1;
F.denominator = n1;
}
// creates a fraction from the decimal component
// insures its in its abs form to ease calculations.
void fraction::make_fraction() {
size_t count = slen.get_decimal_length();
long long n_dec = decimal;
long long exp = (long long)pow(10, count);
Euclids_method(n_dec, exp);
convert(C.coef);
}
std::string get_w(const std::string& s)
{
std::string st = "0";
std::string::size_type pos;
pos = s.find(".");
if (pos - 1 == std::string::npos) {
st = "0";
return st;
}
else { st = s.substr(0, pos);
return st;
}
if (!(s.find("."))){
st = "0";
return st;
}
return st;
}
std::string get_d(const std::string& s)
{
std::string st = "0";
std::string::size_type pos;
pos = s.find(".");
if (pos == std::string::npos) {
st = "0";
return st;
}
std::string sub = s.substr(pos + 1);
st = sub;
size_t sub_len = sub.length();
slen.set_decimal_length(sub_len);
return st;
}
void fraction::ctf(double& nn)
{
//using stringstream for conversion to string
std::istringstream is;
is >> nn;
std::ostringstream os;
os << std::fixed << std::setprecision(14) << nn;
std::string s = os.str();
is_negative = false; //reset for loops
C.coef.erase(C.coef.begin(), C.coef.end()); //reset for loops
long double n = 0.0;
int m = 0;
//The whole number part will be seperated from the decimal part leaving a pure fraction.
//In such cases using Euclids agorithm would take the reciprocal 1/(n/exp) or exp/n.
//for pure continued fractions the cf must start with 0 + 1/(n+1/(n+...etc
//So the vector is initilized with zero as its first element.
C.coef.push_back(m);
std::cout << '\n';
if (s == "q") { // for loop structures
exit(0);
}
if (s.front() == '-') { // flag negative values.
is_negative = true; // represent nagative in output
s.erase(remove(s.begin(), s.end(), '-'), s.end()); // using abs
}
// w, d, seperate the string components
std::string w = get_w(s);
std::string d = get_d(s);
try
{
if (!(n = std::stold(s))) {throw invalid(); } // string_to_double()
get_fraction(w, d, n);
}
catch (std::exception& e) {
std::cout << e.what();
std::cout <<'\n'<< std::endl;
}
}
// The ostream formats and displays the various outputs
std::ostream& operator<<(std::ostream& os, fraction& f)
{
std::cout << '\n';
if (f.is_negative == true) {
os << "The coefficients are [" << '-' << f.whole << ";";
for (size_t i = 1; i < C.coef.size(); ++i) {
os << C.coef[i] << ',';
}
std::cout << "]" << '\n';
os << "The cf is: " << '-' << f.whole;
for (size_t i = 1; i < C.coef.size(); ++i) {
os << "+1/(" << C.coef[i];
}
for (size_t i = 1; i < C.coef.size(); ++i) {
os << ')';
}
std::cout << '\n';
if (F.value >= 1 && F.numerator == 0 && F.denominator == 1) {
F.numerator = abs(f.whole);
os << '-' << F.numerator << '/' << F.denominator << '\n';
return os;
}
else if (F.value == 0 && F.numerator == 0 && F.denominator == 1) {
os << F.numerator << '/' << F.denominator << '\n';
return os;
}
else if (F.value == 0 && F.numerator != 0 && F.denominator != 0) {
os << '-' << abs(F.numerator) << '/' << abs(F.denominator) << '\n';
return os;
}
else if (F.numerator == 0 && F.denominator == 0) {
os << '-' << f.whole << '\n';
return os;
}
else
os << '-' << (abs(f.whole) * abs(F.denominator) + abs(F.numerator)) << '/' << abs(F.denominator) << '\n';
}
if (f.is_negative == false) {
os << "The coefficients are [" << f.whole << ";";
for (size_t i = 1; i < C.coef.size(); ++i) {
os << C.coef[i] << ',';
}
std::cout << "]" << '\n';
os << "The cf is: " << f.whole;
for (size_t i = 1; i < C.coef.size(); ++i) {
os << "+1/(" << C.coef[i];
}
for (size_t i = 1; i < C.coef.size(); ++i) {
os << ')';
}
std::cout << '\n';
if (F.value >= 1 && F.numerator == 0 && F.denominator == 1) {
F.numerator = abs(f.whole);
os << F.numerator << '/' << F.denominator << '\n';
return os;
}
else if (F.value == 0 && F.numerator != 0 && F.denominator != 0) {
os << abs(F.numerator) << '/' << abs(F.denominator) << '\n';
return os;
}
else if (F.numerator == 0 && F.denominator == 0) {
os << f.whole << '\n';
return os;
}
else
os << (abs(f.whole) * abs(F.denominator) + abs(F.numerator)) << '/' << abs(F.denominator) << '\n';
os << f.whole << ' ' << F.numerator << '/' << F.denominator << '\n';
}
return os;
}
int main()
{
fraction f;
double s = 0;
std::cout << "Enter a number to convert to a fraction\n";
std::cout << "Enter a \"q\" to quit\n";
// uncomment for a loop
while (std::cin >> s) {
f.ctf(s);
std::cout << f << std::endl;
}
// comment out these lines if you want the loop
//std::cin >> s;
//f.ctf(s);
//std::cout << f << std::endl;
}

How to get a number from unsigned long long mask?

I wonder how to reverse something like this. So having a mask where auto mask = 1ULL << 20; how to get 20 out from mask?
Loop-free
Many years ago when I was writing a bit-wise arithmetic for a chess engine, I found a fast implementation which is useful for your requirement, it's loop-free. This method will return the position of the first 1-bit from right-to-left (Least Significant Bit):
inline unsigned int lsb(unsigned long long value)
{
if (!value)
return -1;
value &= -value;
unsigned int lsb = (unsigned) value | (unsigned) (value >> 32);
return (((((((((((unsigned) (value >> 32) != 0) << 1)
+ ((lsb & 0xffff0000) != 0)) << 1)
+ ((lsb & 0xff00ff00) != 0)) << 1)
+ ((lsb & 0xf0f0f0f0) != 0)) << 1)
+ ((lsb & 0xcccccccc) != 0)) << 1)
+ ((lsb & 0xaaaaaaaa) != 0);
}
int main()
{
unsigned long long x = 1ULL<<20;
cout << lsb(x) << endl;
}
Output
20
I think, I had found it here.
Using log:
#include <iostream>
#include <cmath>
int main() {
auto mask = 1ULL << 20;
std::cout << log2(mask) << std::endl;
// edit out: std::cout << log(mask) / log(2) << std::endl;
return 0;
}
or loop and shift:
#include <iostream>
int main() {
auto mask = 1ULL << 20;
for (unsigned int c = 0; c < sizeof(mask) * 8 && mask; c++) {
mask >>= 1;
if (mask == 0)
std::cout << c << std::endl;
}
return 0;
}
If it's a 64-bit mask, you can compute it modulo 67 and do a table lookup.
To wit:
static int table[67] = {
-1, 0, 1,39, 2,15,40,23, 3,12,
16,59,41,19,24,54, 4,-1,13,10,
17,62,60,28,42,30,20,51,25,44,
55,47, 5,32,-1,38,14,22,11,58,
18,53,63, 9,61,27,29,50,43,46,
31,37,21,57,52, 8,26,49,45,36,
56, 7,48,35, 6,34,33};
int unmask(unsigned long long ull) {
return table[ull % 67];
}
//first if you want to make sure only 1 bit is "on" you can do that:
if ((mask & mask-1) != 0)
{
//you have more than 1 bit "on", deal with it...
}
//finding which bit is "on" can be achieve in a loop
int count 0;
while (mask > 1)
{
mask>>=1;
count++;
}
//At this point count will have the required value (20 in your example)
Option 1: iterate
while (mask && !(mask & 1)) { mask>>=1; count++; }
Option 2: iterate multiple bits at a time:
unsigned long long a=0xFFFFFFFFULL; int b=32;
while (mask>1) {
if (!(mask & a)) { count+=b; mask>>=b; }
b>>=1; mask>>=b;
}
Option 3: Convert the mask to double or float and extract the exponent.
union {
struct {
int mantissa:23;
int exp:7;
int sign:1;
} s;
float f;
} u = { (float) mask };
return u.s.exp + 1;
A simple loop will be quite okay:
for (int bit = 0; bit < sizeof(mask) * 8; bit++)
{
if ((1ULL << bit) & mask)
std::cout << "Bit " << bit << " is set in the mask\n";
}
How about a TMP solution:
#include <iostream>
template < unsigned long long MASK >
struct MaskIndex
{
enum { i = MaskIndex < MASK / 2 >::i + 1 };
};
template <>
struct MaskIndex < 1 >
{
enum { i = 0 };
};
int main()
{
const unsigned long long mask = 1ULL << 20;
std::cout << MaskIndex < mask >::i << std::endl;
return ( 0 );
}
You can try this..
if((1ULL<<20)&mask) {
cout << "20th bit is set";
}