How to Perform Tuckerman Rounding for Floating Point Square Root

How to Perform Tuckerman Rounding for Floating Point Square Root - c++

I am trying to perform a Tuckerman Rounding Test in order to determine the correctly rounded to nearest result.
I created a program in C++ to compare two solutions to a square root of a number and perform a tuckerman test on them. However, the C++ math library solution fails to pass the tuckerman test, so I'm wondering what could be wrong?
Here is my output:
Square root program started
Input value is 62a83003
===Tuckerman Test with MATLAB result===
Square root result from MATLAB = 5112b968
g*(g-ulp) = 62a83001
b = 62a83003
g*(g+ulp) = 62a83003
=====>Passes Tuckerman test
===Tuckerman Test with correct result===
Correct square root result = 5112b969
g*(g-ulp) = 62a83003
b = 62a83003
g*(g+ulp) = 62a83005
=====>Fails Tuckerman test
Here is my code (C++):
#include <iostream>
#include <cmath>
#include <fstream>
using namespace std;
union newfloat{
float f;
unsigned int i;
};
int main ()
{
// Declare new floating point numbers
newfloat input;
newfloat result, resultm1, resultp1;
newfloat correct_result, correct_resultm1, correct_resultp1;
newfloat resultm1_times_result, resultp1_times_result;
newfloat correct_resultm1_times_result, correct_resultp1_times_result;
// Print message at start of program
cout << "Square root program started" << endl;
input.i = 0x62A83003; // Input we are trying to find the square root of
cout << "Input value is " << hex << input.i << "\n" << endl; // Print input value
result.i = 0x5112B968; // Result from MATLAB
resultm1.i = result.i - 1; // value minus 1 ulp
resultp1.i = result.i + 1; // value plus 1 ulp
correct_result.f = sqrt(input.f); // Compute correct square root
correct_resultm1.i = correct_result.i - 1; // correct value minus 1 ulp
correct_resultp1.i = correct_result.i + 1; // correct value plus 1 ulp
resultm1_times_result.f = result.f * resultm1.f; // Compute g(g-ulp) for matlab result
resultp1_times_result.f = result.f * resultp1.f; // Compute g(g+ulp) for matlab result
correct_resultm1_times_result.f = correct_result.f * correct_resultm1.f; // Compute g*(g-ulp) for correct result
correct_resultp1_times_result.f = correct_result.f * correct_resultp1.f; // Compute g*(g+ulp) for correct result
// Print output from MATLAB algorithm and perform tuckerman test
cout << "===Tuckerman Test with MATLAB result===" << endl;
cout << "Square root result from MATLAB = " << result.i << endl;
cout << "g*(g-ulp) = " << hex << resultm1_times_result.i << endl;
cout << "b = " << hex << input.i << endl;
cout << "g*(g+ulp) = " << hex << resultp1_times_result.i << endl;
if ((resultm1_times_result.f < input.f) && (input.f <= resultp1_times_result.f))
cout << "=====>Passes Tuckerman test" << endl;
else
cout << "=====>Fails Tuckerman test" << endl;
cout << "\n" << endl;
// Print output from C++ sqrt math library and perform tuckerman test
cout << "===Tuckerman Test with correct result===" << endl;
cout << "Correct square root result = " << hex << correct_result.i << endl;
cout << "g*(g-ulp) = " << hex << correct_resultm1_times_result.i << endl;
cout << "b = " << hex << input.i << endl;
cout << "g*(g+ulp) = " << hex << correct_resultp1_times_result.i << endl;
if ((correct_resultm1_times_result.f < input.f) && (input.f <= correct_resultp1_times_result.f))
cout << "=====>Passes Tuckerman test" << endl;
else
cout << "=====>Fails Tuckerman test" << endl;
return 0;
}

The original publication that introduced Tuckerman rounding for the square root was:
Ramesh C. Agarwal, James W. Cooley, Fred G. Gustavson, James B. Shearer, Gordon Slishman, Bryant Tuckerman,
"New scalar and vector elementary functions for the IBM System/370", IBM J. Res. Develop., Vol. 30, No. 2, March 1986, pp. 126-144.
This paper specifically points out that the multiplications used to compute the products g*(g-ulp) and g*(g+ulp) are truncating, not rounding multiplications:
"However, these inequalities can be shown to be equivalent to
y- * y < x <= y * y+ ,
where * denotes System/360/370 multiplication (which truncates the result), so that the tests are easily carried out
without the need for extra precision. (Note the asymmetry: one <, one <=.) If the left inequality fails, y is too large; if the
right inequality fails, y is too small."
The following C99 code shows how Tuckerman rounding is successfully utilized to deliver correctly rounded results in a single-precision square root function.
#include <stdio.h>
#include <stdlib.h>
#include <fenv.h>
#include <math.h>
#pragma STDC FENV_ACCESS ON
float mul_fp32_rz (float a, float b)
{
float r;
int orig_rnd = fegetround();
fesetround (FE_TOWARDZERO);
r = a * b;
fesetround (orig_rnd);
return r;
}
float my_sqrtf (float a)
{
float b, r, v, w, p, s;
int e, t, f;
if ((a <= 0.0f) || isinff (a) || isnanf (a)) {
if (a < 0.0f) {
r = 0.0f / 0.0f;
} else {
r = a + a;
}
} else {
/* compute exponent adjustments */
b = frexpf (a, &e);
t = e - 2*512;
f = t / 2;
t = t - 2 * f;
f = f + 512;
/* map argument into the primary approximation interval [0.25,1) */
b = ldexpf (b, t);
/* initial approximation to reciprocal square root */
r = -6.10005470e+0f;
r = r * b + 2.28990124e+1f;
r = r * b - 3.48110069e+1f;
r = r * b + 2.76135244e+1f;
r = r * b - 1.24472151e+1f;
r = r * b + 3.84509158e+0f;
/* round rsqrt approximation to 11 bits */
r = rintf (r * 2048.0f);
r = r * (1.0f / 2048.0f);
/* Use A. Schoenhage's coupled iteration for the square root */
v = 0.5f * r;
w = b * r;
w = (w * -w + b) * v + w;
v = (r * -w + 1.0f) * v + v;
w = (w * -w + b) * v + w;
/* Tuckerman rounding: mul_rz (w, w-ulp) < b <= mul_rz (w, w+ulp) */
p = nextafterf (w, 0.0f);
s = nextafterf (w, 2.0f);
if (b <= mul_fp32_rz (w, p)) {
w = p;
} else if (b > mul_fp32_rz (w, s)) {
w = s;
}
/* map back from primary approximation interval by jamming exponent */
r = ldexpf (w, f);
}
return r;
}
int main (void)
{
volatile union {
float f;
unsigned int i;
} arg, res, ref;
arg.i = 0;
do {
res.f = my_sqrtf (arg.f);
ref.f = sqrtf (arg.f);
if (res.i != ref.i) {
printf ("!!!! error # arg=%08x: res=%08x ref=%08x\n",
arg.i, res.i, ref.i);
break;
}
arg.i++;
} while (arg.i);
return EXIT_SUCCESS;
}

Related

C++ Newton Raphson method is slower than bisection?

My task is to find the root of a function with both the Newton Raphson and the bisection method within an error margin of 10E-7.
The point of all that is, that we learn that the Newton Raphson method is faster and more effective.
Now for some reason I come to the opposite result. Although I am aware that the initial guess of the root in both methods strongly affects the number of necessary iterations. But I entered a similar guess in both algorithms and my fellow students dont get the result I do.
Bisection method:
#include <iostream>
#include <iomanip>
using namespace std;
//Declaring the given function
double func1(double x) {
return 0.00000000027 * (x - 10000000) - 0.16460351745 * (-1 + ((1000000000) / (x))) * 1 / (sqrt(x));
}
int main() {
std::fixed;
//Initial guess: root ist at 10 to the 7.
double x1 = 10000000;
double x2 = 1000000000;
double eps = 0.0000001;
int i = 0;
double x0[100000];
x0[0] =0;
//Exception handler
if (func1(x1) * func1(x2) > 0) {
cout << "Root is not inside the bracket.";
goto end;
}
goto start;
//Bisection Algorithm
while (abs(x0[i] - x0[i-1]) >= eps) {
start:
i = i + 1;
x0[i] = 0.5 * (x1 + x2);
if (func1(x1) * func1(x0[i]) < 0) {
x2 = x0[i];
}
else {
x1 = x0[i];
}
}
cout << endl << "Bisection Method: " << fixed << setprecision(10) << x0[i] << endl << "Iterations: " << i << endl << endl << endl << endl << endl;
end:
return 0;
}
}
Newton Raphson:
#include <iostream>
#include <iomanip>
using namespace std;
// Declaring the function and its derivative
double func1(double x) {
return 0.00000000027 * (x - 10000000) - 0.16460351745 * (-1 + ((1000000000) / (x))) * 1 / (sqrt(x));
}
double funcderiv1(double x) {
return 0.00000000027+((0.1646035174)/(2*x*x*sqrt(x)))*(30000000-x);
}
int main()
{
std::fixed;
double eps = 1;
double x_start = 10000000;
double c;
int i = 0;
while (eps >= 0.0000001) {
c = x_start - ((func1(x_start)) / (funcderiv1(x_start)));
eps = abs(func1(x_start) / funcderiv1(x_start));
x_start = c;
i = i + 1;
}
cout << fixed << setprecision(5) << "RESULT " << c << endl << " Iterations: " << i << endl;
}
The root is at 17903534.23630
Does anyone know why my bisection method needs 55 iterations while Newton Raphson takes like 82?

For the function
f(x) = A * (x - B) - C * (D / x - 1) / sqrt(x)
A = 0.00000000027
B = 10000000
C = 0.16460351745
D = 1000000000
the correct derivative is:
f'(x) = A - C (x - 3D) / (2 * x * x * sqrt(x))
Compare this with your expression:
g(x) = A - C (x - 3B) / (2 * x * x * sqrt(x))
After fixing the formula (by adding two zeros), your code makes 6 iterations:
RESULT 17903534.23630
Iterations: 6

Perceptron converging but returning odd results

I made a simple perceptron in c++ to study AI and even following a book(pt_br) i could not make my perceptron return an expected result, i tryed to debug and find the error but i didnt succeed.
My algorithm AND gate results (A and B = Y):
0 && 0 = 0
0 && 1 = 1
1 && 0 = 1
1 && 1 = 1
Basically its working as an OR gate or random.
I Tried to jump to Peter Norving and Russel book, but he goes fast over this and dont explain on depth one perceptron training.
I really want to learn every inch of this content, so i dont want to jump to Multilayer perceptron without making the simple one work, can you help?
The following code is the minimal code for operation with some explanations:
Sharp function:
int signal(float &sin){
if(sin < 0)
return 0;
if(sin > 1)
return 1;
return round(sin);
}
Perceptron Struct (W are Weights):
struct perceptron{
float w[3];
};
Perceptron training:
perceptron startTraining(){
//- Random factory generator
long int t = static_cast<long int>(time(NULL));
std::mt19937 gen;
gen.seed(std::random_device()() + t);
std::uniform_real_distribution<float> dist(0.0, 1.0);
//--
//-- Samples (-1 | x | y)
float t0[][3] = {{-1,0,0},
{-1,0,1},
{-1,1,0},
{-1,1,1}};
//-- Expected result
short d [] = {0,0,0,1};
perceptron per;
per.w[0] = dist(gen);
per.w[1] = dist(gen);
per.w[2] = dist(gen);
//-- print random numbers
cout <<"INIT "<< "W0: " << per.w[0] <<" W1: " << per.w[1] << " W2: " << per.w[2] << endl;
const float n = 0.1; // Lerning rate N
int saida =0; // Output Y
long int epo = 0; // Simple Couter
bool erro = true; // Loop control
while(erro){
erro = false;
for (int amost = 0; amost < 4; ++amost) { // Repeat for the number of samples x0=-1, x1,x2
float u=0; // Variable for the somatory
for (int entrad = 0; entrad < 3; ++entrad) { // repeat for every sinaptic weight W0=θ , W1, W2
u = u + (per.w[entrad] * t0[amost][entrad]);// U <- Weights * Inputs
}
// u=u-per.w[0]; // some references sau to take θ and subtract from U, i tried but without success
saida = signal(u); // returns 1 or 0
cout << d[amost] << " <- esperado | encontrado -> "<< saida<< endl;
if(saida != d[amost]){ // if the output is not equal to the expected value
for (int ajust = 0; ajust < 3; ++ajust) {
per.w[ajust] = per.w[ajust] + n * (d[amost] - saida) * t0[amost][ajust]; // W <- W + ɳ * ((d - y) x) where
erro = true; // W: Weights, ɳ: Learning rate
} // d: Desired outputs, y: outputs
} // x: samples
epo++;
}
}
cout << "Epocas(Loops): " << epo << endl;
return per;
}
Main with testing part:
int main()
{
perceptron per = startTraining();
cout << "fim" << endl;
cout << "W0: " << per.w[0] <<" W1: " << per.w[1] << " W2: " << per.w[2] << endl;
while(true){
int x,y;
cin >> x >> y;
float u=0;
u = (per.w[1] * x);
u = u + (per.w[2] * y);
//u=u-per.w[0];
cout << signal(u) << endl;
}
return 0;
}

In your main(), re-enable the line you commented out. Alternatively, you could write it like this to make it more illuminating:
float u = 0.0f;
u += (per.w[0] * float (-1));
u += (per.w[1] * float (x));
u += (per.w[2] * float (y));
The thing is that you trained the perceptron with three inputs, the first being hard-wired to a "-1" (making the first weight w[0] act like a constant "bias"). Accordingly, in your training function, your u is the sum of all THREE of those weight-input product.
However, in the main() you posted, you omit w[0] completely, thus producing a wrong result.

Function as parameter of a function, using bisection method C++

// Function as parameter of a function
#include <iostream>
#include <cmath>
#include <cassert>
using namespace std;
const double PI = 4 * atan(1.0); // tan^(-1)(1) == pi/4 then 4*(pi/4)== pi
typedef double(*FD2D)(double);
double root(FD2D, double, double); //abscissae of 2-points,
//For the algorithm to work, the function must assume values of opposite sign in these two points, check
// at point 8
double polyn(double x) { return 3 - x*(1 + x*(27 - x * 9)); }
int main() {
double r;
cout.precision(15);
r = root(sin, 3, 4);
cout << "sin: " << r << endl
<< "exactly: " << PI << endl << endl;
r = root(cos, -2, -1.5);
cout << "cos: " << r << endl
<< "exactly: " << -PI/2 << endl << endl;
r = root(polyn, 0, 1);
cout << "polyn: " << r << endl
<< "exactly: " << 1./3 << endl << endl;
/*
we look for the root of the function equivalent to function polyn
but this time defined as a lambda function
*/
r = root([](double x) -> double {
return 3 - x*(1 + x*(27 - x * 9));
}, 0, 1);
cout << "lambda: " << r << endl
<< "exactly: " << 1. / 3 << endl << endl;
return 0;
}
// Finding root of function using bisection.
// fun(a) and fun(b) must be of opposite sign
double root(FD2D fun, double a, double b) {
static const double EPS = 1e-15; // 1×10^(-15)
double f, s, h = b - a, f1 = fun(a), f2 = fun(b);
if (f1 == 0) return a;
if (f2 == 0) return b;
assert(f1*f2<0); // 8.- macro assert from header cassert.
do {
if ((f = fun((s = (a + b) / 2))) == 0) break;
if (f1*f2 < 0) {
f2 = f;
b = s;
}
else {
f1 = f;
a = s;
}
} while ((h /= 2) > EPS);
return (a + b) / 2;
}
Could somebody explain me how the loop in double root function works? I don't seem to understand 100%, I checked this bisection method online and try on paper, but I can't manage to figure it out from this example.
Thanks in advance!

It's a lot easier to understand if you split the "clever" line into multiple lines. Here's some modifications, plus comments:
double root(FD2D fun, double a, double b) {
static const double EPS = 1e-15; // 1×10^(-15)
double fa = fun(a), fb = fun(b);
// if either f(a) or f(b) are the root, return that
// nothing else to do
if (fa == 0) return a;
if (fb == 0) return b;
// this method only works if the signs of f(a) and f(b)
// are different. so just assert that
assert(fa * fb < 0); // 8.- macro assert from header cassert.
do {
// calculate fun at the midpoint of a,b
// if that's the root, we're done
// this line is awful, never write code like this...
//if ((f = fun((s = (a + b) / 2))) == 0) break;
// prefer:
double midpt = (a + b) / 2;
double fmid = fun(midpt);
if (fmid == 0) return midpt;
// adjust our bounds to either [a,midpt] or [midpt,b]
// based on where fmid ends up being. I'm pretty
// sure the code in the question is wrong, so I fixed it
if (fa * fmid < 0) { // fmid, not f1
fb = fmid;
b = midpt;
}
else {
fa = fmid;
a = midpt;
}
} while (b-a > EPS); // only loop while
// a and b are sufficiently far
// apart
return (a + b) / 2; // approximation
}

Vastly different output C++ monte carlo approximation

doing a C++ approximation of Pi using a random number generator, output works exactly as expected on my AMD 64 machine running Ubuntu, however on my school machine the second algorithm I've implemented is broken, and would love some insight as to why. Code is as follows:
#ifndef RANDOMNUMBER_H_
#define RANDOMNUMBER_H_
class RandomNumber {
public:
RandomNumber() {
x = time(NULL);
m = pow(2, 19); //some constant value
M = 65915 * 7915; //multiply of some simple numbers p and q
method = 1;
}
RandomNumber(int seed) {
x = ((seed > 0) ? seed : time(NULL));
m = pow(2, 19); //some constant value
method = 1; //method number
M = 6543 * 7915; //multiply of some simple numbers p and q
}
void setSeed(long int seed) {
x = seed; //set start value
}
void chooseMethod(int method) {
this->method = ((method > 0 && method <= 2) ? method : 1); //choose one of two method
}
long int linearCongruential() { //first generator, that uses linear congruential method
long int c = 0; // some constant
long int a = 69069; //some constant
x = (a * x + c) % m; //solution next value
return x;
}
long int BBS() { //algorithm Blum - Blum - Shub
x = (long int) (pow(x, 2)) % M;
return x;
}
double nextPoint() { //return random number in range (-1;1)
double point;
if (method == 1) //use first method
point = linearCongruential() / double(m);
else
point = BBS() / double(M);
return point;
}
private:
long int x; //current value
long int m; // some range for first method
long int M; //some range for second method
int method; //method number
};
#endif /* RANDOMNUMBER_H_ */
and test class:
#include <iostream>
#include <stdlib.h>
#include <math.h>
#include <iomanip>
#include "RandomNumber.h"
using namespace std;
int main(int argc, char* argv[]) {
cout.setf(ios::fixed);
cout.precision(6);
RandomNumber random;
random.setSeed(argc);
srand((unsigned) time(NULL));
cout << "---------------------------------" << endl;
cout << " Monte Carlo Pi Approximation" << endl;
cout << "---------------------------------" << endl;
cout << " Enter number of points: ";
long int k1;
cin >> k1;
cout << "Select generator number: ";
int method;
cin >> method;
random.chooseMethod(method);
cout << "---------------------------------" << endl;
long int k2 = 0;
double sumX = 0;
double sumY = 0;
for (long int i = 0; i < k1; i++) {
double x = pow(-1, int(random.nextPoint() * 10) % 2)
* random.nextPoint();
double y = pow(-1, int(random.nextPoint() * 10) % 2)
* random.nextPoint();
sumX += x;
sumY += y;
if ((pow(x, 2) + pow(y, 2)) <= 1)
k2++;
}
double pi = 4 * (double(k2) / k1);
cout << "M(X) = " << setw(10) << sumX / k1 << endl; //mathematical expectation of x
cout << "M(Y) = " << setw(10) << sumY / k1 << endl; //mathematical expectation of y
cout << endl << "Pi = " << pi << endl << endl; //approximate Pi
return 0;
}
The second method returns 4.000 consistently on my lab machine, yet returns a rather close approximation on my personal machine.

For one thing, the BBS generator as you're using it will always return 1.
Since your program takes no arguments, presumably its argc will be 1. You pass argc as the seed (why?), so the initial value of x is 1.
BBS() has the following logic:
x = (long int) (pow(x, 2)) % M;
Clearly, 1 squared modulo M gives 1, so x never changes.
When you run the simulation with such a generator, your program will always output 4.
P.S. Wikipedia has the following to say about the initial value x0 for Blum Blum Shub:
The seed x0 should be an integer that's co-prime to M (i.e. p and q are not factors of x0) and not 1 or 0.

self made pow() c++

I was reading through How can I write a power function myself? and the answer given by dan04 caught my attention mainly because I am not sure about the answer given by fortran, but I took that and implemented this:
#include <iostream>
using namespace std;
float pow(float base, float ex){
// power of 0
if (ex == 0){
return 1;
// negative exponenet
}else if( ex < 0){
return 1 / pow(base, -ex);
// even exponenet
}else if ((int)ex % 2 == 0){
float half_pow = pow(base, ex/2);
return half_pow * half_pow;
//integer exponenet
}else{
return base * pow(base, ex - 1);
}
}
int main(){
for (int ii = 0; ii< 10; ii++){\
cout << "pow(" << ii << ".5) = " << pow(ii, .5) << endl;
cout << "pow(" << ii << ",2) = " << pow(ii, 2) << endl;
cout << "pow(" << ii << ",3) = " << pow(ii, 3) << endl;
}
}
though I am not sure if I translated this right because all of the calls giving .5 as the exponent return 0. In the answer it states that it might need a log2(x) based on a^b = 2^(b * log2(a)), but I am unsure about putting that in as I am unsure where to put it, or if I am even thinking about this right.
NOTE: I know that this might be defined in a math library, but I don't need all the added expense of an entire math library for a few functions.
EDIT: does anyone know a floating-point implementation for fractional exponents? (I have seen a double implementation, but that was using a trick with registers, and I need floating-point, and adding a library just to do a trick I would be better off just including the math library)

I have looked at this paper here which describes how to approximate the exponential function for double precision. After a little research on Wikipedia about single precision floating point representation I have worked out the equivalent algorithms. They only implemented the exp function, so I found an inverse function for the log and then simply did
POW(a, b) = EXP(LOG(a) * b).
compiling this gcc4.6.2 yields a pow function almost 4 times faster than the standard library's implementation (compiling with O2).
Note: the code for EXP is copied almost verbatim from the paper I read and the LOG function is copied from here.
Here is the relevant code:
#define EXP_A 184
#define EXP_C 16249
float EXP(float y)
{
union
{
float d;
struct
{
#ifdef LITTLE_ENDIAN
short j, i;
#else
short i, j;
#endif
} n;
} eco;
eco.n.i = EXP_A*(y) + (EXP_C);
eco.n.j = 0;
return eco.d;
}
float LOG(float y)
{
int * nTemp = (int*)&y;
y = (*nTemp) >> 16;
return (y - EXP_C) / EXP_A;
}
float POW(float b, float p)
{
return EXP(LOG(b) * p);
}
There is still some optimization you can do here, or perhaps that is good enough.
This is a rough approximation but if you would have been satisfied with the errors introduced using the double representation, I imagine this will be satisfactory.

I think the algorithm you're looking for could be 'nth root'. With an initial guess of 1 (for k == 0):
#include <iostream>
using namespace std;
float pow(float base, float ex);
float nth_root(float A, int n) {
const int K = 6;
float x[K] = {1};
for (int k = 0; k < K - 1; k++)
x[k + 1] = (1.0 / n) * ((n - 1) * x[k] + A / pow(x[k], n - 1));
return x[K-1];
}
float pow(float base, float ex){
if (base == 0)
return 0;
// power of 0
if (ex == 0){
return 1;
// negative exponenet
}else if( ex < 0){
return 1 / pow(base, -ex);
// fractional exponent
}else if (ex > 0 && ex < 1){
return nth_root(base, 1/ex);
}else if ((int)ex % 2 == 0){
float half_pow = pow(base, ex/2);
return half_pow * half_pow;
//integer exponenet
}else{
return base * pow(base, ex - 1);
}
}
int main_pow(int, char **){
for (int ii = 0; ii< 10; ii++){\
cout << "pow(" << ii << ", .5) = " << pow(ii, .5) << endl;
cout << "pow(" << ii << ", 2) = " << pow(ii, 2) << endl;
cout << "pow(" << ii << ", 3) = " << pow(ii, 3) << endl;
}
return 0;
}
test:
pow(0, .5) = 0.03125
pow(0, 2) = 0
pow(0, 3) = 0
pow(1, .5) = 1
pow(1, 2) = 1
pow(1, 3) = 1
pow(2, .5) = 1.41421
pow(2, 2) = 4
pow(2, 3) = 8
pow(3, .5) = 1.73205
pow(3, 2) = 9
pow(3, 3) = 27
pow(4, .5) = 2
pow(4, 2) = 16
pow(4, 3) = 64
pow(5, .5) = 2.23607
pow(5, 2) = 25
pow(5, 3) = 125
pow(6, .5) = 2.44949
pow(6, 2) = 36
pow(6, 3) = 216
pow(7, .5) = 2.64575
pow(7, 2) = 49
pow(7, 3) = 343
pow(8, .5) = 2.82843
pow(8, 2) = 64
pow(8, 3) = 512
pow(9, .5) = 3
pow(9, 2) = 81
pow(9, 3) = 729

I think that you could try to solve it by using the Taylor's series,
check this.
http://en.wikipedia.org/wiki/Taylor_series
With the Taylor's series you can solve any difficult to solve calculation such as 3^3.8 by using the already known results such as 3^4. In this case you have
3^4 = 81 so
3^3.8 = 81 + 3.8*3( 3.8 - 4) +..+.. and so on depend on how big is your n you will get the closer solution of your problem.

I and my friend faced similar problem while we're on an OpenGL project and math.h didn't suffice in some cases. Our instructor also had the same problem and he told us to seperate power to integer and floating parts. For example, if you are to calculate x^11.5 you may calculate sqrt(x^115, 10) which may result more accurate result.

Reworked on #capellic answer, so that nth_root works with bigger values as well.
Without the limitation of an array that is allocated for no reason:
#include <iostream>
float pow(float base, float ex);
inline float fabs(float a) {
return a > 0 ? a : -a;
}
float nth_root(float A, int n, unsigned max_iterations = 500, float epsilon = std::numeric_limits<float>::epsilon()) {
if (n < 0)
throw "Invalid value";
if (n == 1 || A == 0)
return A;
float old_value = 1;
float value;
for (int k = 0; k < max_iterations; k++) {
value = (1.0 / n) * ((n - 1) * old_value + A / pow(old_value, n - 1));
if (fabs(old_value - value) < epsilon)
return value;
old_value = value;
}
return value;
}
float pow(float base, float ex) {
if (base == 0)
return 0;
if (ex == 0){
// power of 0
return 1;
} else if( ex < 0) {
// negative exponent
return 1 / pow(base, -ex);
} else if (ex > 0 && ex < 1) {
// fractional exponent
return nth_root(base, 1/ex);
} else if ((int)ex % 2 == 0) {
// even exponent
float half_pow = pow(base, ex/2);
return half_pow * half_pow;
} else {
// integer exponent
return base * pow(base, ex - 1);
}
}
int main () {
for (int i = 0; i <= 128; i++) {
std::cout << "pow(" << i << ", .5) = " << pow(i, .5) << std::endl;
std::cout << "pow(" << i << ", .3) = " << pow(i, .3) << std::endl;
std::cout << "pow(" << i << ", 2) = " << pow(i, 2) << std::endl;
std::cout << "pow(" << i << ", 3) = " << pow(i, 3) << std::endl;
}
std::cout << "pow(" << 74088 << ", .3) = " << pow(74088, .3) << std::endl;
return 0;
}

This solution of MINE will be accepted upto O(n) time complexity
utpo input less then 2^30 or 10^8
IT will not accept more then these inputs
It WILL GIVE TIME LIMIT EXCEED warning
but easy understandable solution
#include<bits/stdc++.h>
using namespace std;
double recursive(double x,int n)
{
// static is important here
// other wise it will store same values while multiplying
double p = x;
double ans;
// as we multiple p it will multiply it with q which has the
//previous value of this ans latter we will update the q
// so that q has fresh value for further test cases here
static double q=1; // important
if(n==0){ ans = q; q=1; return ans;}
if(n>0)
{
p *= q;
// stored value got multiply by p
q=p;
// and again updated to q
p=x;
//to update the value to the same value of that number
// cout<<q<<" ";
recursive(p,n-1);
}
return ans;
}
class Solution {
public:
double myPow(double x, int n) {
// double q=x;double N=n;
// return pow(q,N);
// when both sides are double this function works
if(n==0)return 1;
x = recursive(x,abs(n));
if(n<0) return double(1/x);
// else
return x;
}
};
For More help you may try
LEETCODE QUESTION NUMBER 50

**NOW the Second most optimize code pow(x,n) **
logic is that we have to solve it in O(logN) so we devide the n by 2
when we have even power n=4 , 4/2 is 2 means we have to just square it (22)(22)
but when we have odd value of power like n=5, 5/2 here we have square it to get
also the the number itself to it like (22)(2*2)*2 to get 2^5 = 32
HOPE YOU UNDERSTAND FOR MORE YOU CAN VISIT
POW(x,n) question on leetcode
below the optimized code and above code was for O(n) only
*
#include<bits/stdc++.h>
using namespace std;
double recursive(double x,int n)
{
// recursive calls will return the whole value of the program at every calls
if(n==0){return 1;}
// 1 is multiplied when the last value we get as we don't have to multiply further
double store;
store = recursive(x,n/2);
// call the function after the base condtion you have given to it here
if(n%2==0)return store*store;
else
{
return store*store*x;
// odd power we have the perfect square multiply the value;
}
}
// main function or the function for indirect call to recursive function
double myPow(double x, int n) {
if(n==0)return 1;
x = recursive(x,abs(n));
// for negatives powers
if(n<0) return double(1/x);
// else for positves
return x;
}

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

How to Perform Tuckerman Rounding for Floating Point Square Root - c++

Related

C++ Newton Raphson method is slower than bisection?

Perceptron converging but returning odd results

Function as parameter of a function, using bisection method C++

Vastly different output C++ monte carlo approximation

self made pow() c++

Categories

Resources