c++ dividing two floats results in an int

c++ dividing two floats results in an int - c++

I created a little program that is supposed to calculate pi using the first 26 iterations of the Leibniz formula in c++, just to see id it would work. When I ran the code, it outputted 4 instead of a floating point number. What is going on and how can I fix it? Here is the code:
#include <iostream>
#include <math.h>
using namespace std;
int main ()
{
float a = 1/1;
float b = 1/3;
float c = 1/5;
float d = 1/7;
float e = 1/9;
float f = 1/11;
float g = 1/13;
float h = 1/15;
float i = 1/17;
float j = 1/19;
float k = 1/21;
float l = 1/23;
float m = 1/25;
float n = 1/27;
float o = 1/29;
float p = 1/31;
float q = 1/33;
float r = 1/35;
float s = 1/37;
float t = 1/39;
float u = 1/41;
float v = 1/43;
float w = 1/45;
float x = 1/47;
float y = 1/49;
float z = 1/51;
float a1 = a-b+c-d+e-f+g-h+i-j+k-l+m-n+o-p+q-r+s-t+u-v+w-x+y-z;
float b1 = a1*4;
cout << b1;
}
Yes, I know there are much more simple ways to do this, but this is just a proof of concept.

When you use:
float b = 1/3;
the RHS of the assignment operator is evaluated using integer division, which results in 0. All other variables have the 0 value except a which has the value of 1.
In order to avoid that, use
float b = 1.0f/3;
or
float b = 1.0/3;
Make similar changes to all other statements.

Another way, use casting
float a = (float)1/1; // C-style cast
or
float a = float(1)/1;

Related

C++: Solution to this floating point error problem?

This is an example of my code:
float a = 0.f;
float b = 5.f;
float increment = 0.1f;
while(a != b)
a+=increment;
This will result in an infinite loop. Is there any solutions to it, or the only way to solve this is to set a tolerance?

Avoid using floating-point calculation when possible. In this case you can treat with the numbers as integer by multiplying them by 10 and dividing by 10 in the end.
float a, b, increment;
int a_i = 0;
int b_i = 50;
int increment_i = 1;
while(a_i != b_i)
a_i+=increment_i;
a = a_i / 10.f,
b = b_i / 10.f;
increment = increment_i / 10.f;

4th Order Adams-Moulton Method C++

I am trying to solve an ODE using 4th Order Runge Kutta and the 4th Order Adams-Moulton Method. I iterate over a couple of thousand timesteps and it seems to hold fine when the values are constant(first 50 or so) However, the very first iteration when a change in input value jumps(current(I) changes from 0 to 1) it won't converge.
In order to satisfy the 4th Order Adams-Moulton Method I have split each timestep in the main function into 5 smaller timesteps so the 5th inner step will equal the next time step (0.1s) in the main function.
I will be very grateful for any advice/help!!
The code is below
#include <iostream>
#include <cmath>
using namespace std;
double state_deriv(double Vc,double I, double OCV1, double C, double Rct)
{
double newVc = (OCV1 - Vc) / (C * Rct) - (I / C);
return newVc;
}
double predict(double Vc,double Vc_[],double dt,double I_[],double OCV1_[],double C_[],double Rct_[])
{
double fn[4];
fn[0] = state_deriv(Vc_[0], I_[0],OCV1_[0],C_[0],Rct_[0]);
fn[1] = state_deriv(Vc_[1], I_[1],OCV1_[1],C_[1],Rct_[1]);
fn[2] = state_deriv(Vc_[2], I_[2],OCV1_[2],C_[2],Rct_[2]);
fn[3] = state_deriv(Vc_[3], I_[3],OCV1_[3],C_[3],Rct_[3]);
// value of next y(predicted) is returned
double y1p = Vc_[3] + (dt/24)*(55*fn[3] - 59*fn[2] + 37 *fn[1] - 9 *fn[0]);
return y1p;
}
double correct(double Vc,double Vc_[],double Vc1,double dt,double I_[],double OCV1_[],double C_[],double Rct_[])
{
double e = 0.0001;
double Vc1c = Vc1;
double fn[4];
fn[1] = state_deriv(Vc_[1], I_[1],OCV1_[1],C_[1],Rct_[1]);
fn[2] = state_deriv(Vc_[2], I_[2],OCV1_[2],C_[2],Rct_[2]);
fn[3] = state_deriv(Vc_[3], I_[3],OCV1_[3],C_[3],Rct_[3]);
do {
Vc1 = Vc1c;
fn[4] = state_deriv(Vc1, I_[4],OCV1_[4],C_[4],Rct_[4]);
Vc1c = Vc_[3] + dt/24*(9*fn[4] + 19*fn[3] - 5*fn[2] + fn[1]);
} while (fabs(Vc1c - Vc1) > e);
// every iteration is correcting the value
// of state deriv using average slope
return Vc1c;
}
double predictor_corrector(double Vc, double dt, double I[], double* OCV1, double* C, double* Rct)
{
//double x = time[0];
//double xn = time[1];
double h = dt;
//4th order RKK TO predict 4 values
double I_[4],OCV1_[5],C_[5],Rct_[5];
I_[0] = I[0];
OCV1_[0] = OCV1[0];
C_[0] = *C;
Rct_[0] = *Rct;
I_[4] = I[1];
OCV1_[4] = OCV1[1];
C_[4] = *C;
Rct_[4] = *Rct;
double newdt;
for(int i = 1;i<4;i++){
newdt = i * dt/5;
I_[i] = I_[0] + (I[1]-I[0]) * (newdt/dt);
OCV1_[i] = OCV1_[0] + (OCV1[1] - OCV1[0])*(newdt/dt);
Rct_[i] = *Rct;
C_[i] = *C;
}
//now do rk4 on each step
double k1,k2,k3,k4;
double Vc_[4];
Vc_[0] = Vc;
for(int i = 1;i<4;i++) {
// 4th order runge kutta
k1 = dt/4 * state_deriv(Vc_[i-1], *I, *OCV1, *C, *Rct);
k2 = dt/4 * state_deriv(Vc_[i-1]+k1/2.0, I_[i], OCV1_[i], *C, *Rct);
k3 = dt/4 * state_deriv(Vc_[i-1]+k2/2.0, I_[i], OCV1_[i], *C, *Rct);
k4 = dt/4 * state_deriv(Vc_[i-1]+k3, I_[i], OCV1_[i], *C, *Rct);
Vc_[i] = Vc_[i-1] + (1.0 / 6.0) * (k1 + 2 * k2 + 2 * k3 + k4);
}
double y1p = predict(Vc,Vc_,dt/4,I_,OCV1_,C_,Rct_);
double y1c = correct(Vc,Vc_,y1p,dt/4,I_,OCV1_,C_,Rct_);
Vc = y1c;
cout << Vc << endl;
return Vc;
}
int main()
{
double R1 = 0.0315973;
double C = 0.00100284;
double Vc[5];
double OCV1[5];
double I[5];
Vc[0] = 4.15;
double dt = 0.1;
for(int i=0;i<3;i++){
OCV1[i] = 4.15;
I[i] = 0;
}
I[3]=I[4]=1;
OCV1[3] = 4.13;
OCV1[4] = 4.10;
double I_[2];
for(size_t i=1;i<4;i++){
Vc[i] = predictor_corrector(Vc[i-1],dt,&I[i-1],&OCV1[i-1],&C,&R1);
}
}

Changing the whole part of a number with the decimal part [duplicate]

I have a program in C++ (compiled using g++). I'm trying to apply two doubles as operands to the modulus function, but I get the following error:
error: invalid operands of types 'double' and 'double' to binary 'operator%'
Here's the code:
int main() {
double x = 6.3;
double y = 2;
double z = x % y;
}

The % operator is for integers. You're looking for the fmod() function.
#include <cmath>
int main()
{
double x = 6.3;
double y = 2.0;
double z = std::fmod(x,y);
}

fmod(x, y) is the function you use.

You can implement your own modulus function to do that for you:
double dmod(double x, double y) {
return x - (int)(x/y) * y;
}
Then you can simply use dmod(6.3, 2) to get the remainder, 0.3.

Use fmod() from <cmath>. If you do not want to include the C header file:
template<typename T, typename U>
constexpr double dmod (T x, U mod)
{
return !mod ? x : x - mod * static_cast<long long>(x / mod);
}
//Usage:
double z = dmod<double, unsigned int>(14.3, 4);
double z = dmod<long, float>(14, 4.6);
//This also works:
double z = dmod(14.7, 0.3);
double z = dmod(14.7, 0);
double z = dmod(0, 0.3f);
double z = dmod(myFirstVariable, someOtherVariable);

pseudo code for sqrt function

I managed to get my sqrt function to run perfectly, but I'm second guessing if I wrote this code correctly based on the pseudo code I was given.
Here is the pseudo code:
x = 1
repeat 10 times: x = (x + n / x) / 2
return x.
The code I wrote,
#include <iostream>
#include <math.h>
using namespace std;
double my_sqrt_1(double n)
{
double x= 1; x<10; ++x;
return (x+n/x)/2;
}

No, your code is not following your pseudo-code. For example, you're not repeating anything in your code. You need to add a loop to do that:
#include <iostream>
#include <math.h>
using namespace std;
double my_sqrt_1(double n)
{
double x = 1;
for(int i = 0; i < 10; ++i) // repeat 10 times
x = (x+n/x)/2;
return x;
}
Let's analyze your code:
double x = 1;
// Ok, x set to 1
x < 10;
// This is true, as 1 is less than 10, but it is not used anywhere
++x;
// Increment x - now x == 2
return (x + n / x) / 2
// return value is always (2 + n / 2) / 2
As you don't have any loop, function will always exit in the first "iteration" with the return value (2 + n / 2) / 2.

Just as another approach that you can use binary search or the another pretty elegant solution is to use the Newton's method.
Newton's method is a method for finding roots of a function, making use of a function's derivative. At each step, a value is calculated as: x(step) = x(step-1) - f(x(step-1))/f'(x(step-1)) Newton's_method
This might be faster than binary search.My implementation in C++:
double NewtonMethod(double x) {
double eps = 0.0001; //the precision
double x0 = 10;
while( fabs(x-x0) > eps) {
double a = x0*x0-n;
double r = a/(2*x0);
x = x0 - r;
x0 = x;
}
return x;
}

Since people are showing different approaches to calculating the square root, I couldn't resist ;)...
Below is the exact copy (with the original comments, but without preprocessor directives) of the inverse square root implementation from Quake III Arena:
float Q_rsqrt( float number )
{
long i;
float x2, y;
const float threehalfs = 1.5F;
x2 = number * 0.5F;
y = number;
i = * ( long * ) &y; // evil floating point bit level hacking
i = 0x5f3759df - ( i >> 1 ); // what the...?
y = * ( float * ) &i;
y = y * ( threehalfs - ( x2 * y * y ) ); // 1st iteration
// y = y * ( threehalfs - ( x2 * y * y ) ); // 2nd iteration, this can be removed
return y;
}

Round a double to the closest and greater float

I want to round big double number (>1e6) to the closest but bigger float using c/c++.
I tried this but I'm not sure it is always correct and there is maybe a fastest way to do that :
int main() {
// x is the double we want to round
double x = 100000000005.0;
double y = log10(x) - 7.0;
float a = pow(10.0, y);
float b = (float)x;
//c the closest round up float
float c = a + b;
printf("%.12f %.12f %.12f\n", c, b, x);
return 0;
}
Thank you.

Simply assigning a double to float and back should tell, if the float is larger. If it's not, one should simply increment the float by one unit. (for positive floats). If this doesn't still produce expected result, then the double is larger than supported by a float, in which case float should be assigned to Inf.
float next(double a) {
float b=a;
if ((double)b > a) return b;
return std::nextafter(b, std::numeric_limits<float>::infinity());
}
[Hack] C-version of next_after (on selected architectures would be)
float next_after(float a) {
*(int*)&a += a < 0 ? -1 : 1;
return a;
}
Better way to do it is:
float next_after(float a) {
union { float a; int b; } c = { .a = a };
c.b += a < 0 ? -1 : 1;
return c.a;
}
Both of these self-made hacks ignore Infs and NaNs (and work on non-negative floats only). The math is based on the fact, that the binary representations of floats are ordered. To get to next representable float, one simply increments the binary representation by one.

If you use c99, you can use the nextafterf function.
#include <stdio.h>
#include <math.h>
#include <float.h>
int main(){
// x is the double we want to round
double x=100000000005.0;
float c = x;
if ((double)c <= x)
c = nextafterf(c, FLT_MAX);
//c the closest round up float
printf("%.12f %.12f\n",c,x);
return 0;
}

C has a nice nextafter function which will help here;
float toBiggerFloat( const double a ) {
const float test = (float) a;
return ((double) test < a) ? nextafterf( test, INFINITY ) : test;
}
Here's a test script which shows it on all classes of number (positive/negative, normal/subnormal, infinite, nan, -0): http://codepad.org/BQ3aqbae (it works fine on anything is the result)

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

c++ dividing two floats results in an int - c++

Another way, use casting float a = (float)1/1; // C-style cast or float a = float(1)/1;

Related

C++: Solution to this floating point error problem?

4th Order Adams-Moulton Method C++

Changing the whole part of a number with the decimal part [duplicate]

pseudo code for sqrt function

Round a double to the closest and greater float

Categories

Resources