Getting Different Outputs with Recursive, Task-based, Parallel programming in TBB? - c++

I'm afraid this is a bit of a long code. I'm programming a parallel, recursive, task-based version of Euler's partition formula with Intel TBB and C++, and I don't think there's much problem with this program's logic, but I have a feeling the variables are being accessed wrongly and I might have declared them in the wrong place or something. I say this because inputting a number n should always give the same result, and it does below n = 11, but above that it gives different answers. Even stranger, adding lines of output to try and troubleshoot the program results in slightly more accurate answers (as if somehow padding the time each part of the calculation takes helps it). I have no idea how to avoid this problem or which variable exactly is causing it as the answer is usually fairly close, it's not just a random number. So this is a bit of a tricky one, I apologise, but if someone could help me I would so damn thankful, I've spent a number of hours on this problem.
Here's the parallel task:
class ParallelFormula : public task {
public:
int n;
int* pTot;
//Task constructor
ParallelFormula(int n_, int* pTot_) : n(n_), pTot(pTot_) {}
//Task definition
task* execute() {
//Iterating for formula to work
for (int k = 1; k > 0; k++) {
//Add fixed values to pTot for any case where 2 >= n >= 0
switch (n) {
case 0:
if (k % 2 != 0)
*pTot += 1;
else
*pTot -= 1;
return NULL;
case 1:
if (k % 2 != 0)
*pTot += 1;
else
*pTot -= 1;
return NULL;
case 2:
if (k % 2 != 0)
*pTot += 2;
else
*pTot -= 2;
return NULL;
}
//Calculate p numbers using section of Euler's formula (relies on iteration number)
p1 = (k*((3 * k) - 1)) / 2;
p2 = (k*((3 * k) + 1)) / 2;
if (n >= p2) {
//If n is more than p2, must call recursive tasks to break down problem to smaller n's, and adds result to total result pTot (i.e. p(n))
int x = 0;
int y = 0;
ParallelFormula& a = *new(allocate_child()) ParallelFormula(n - p1, &x);
ParallelFormula& b = *new(allocate_child()) ParallelFormula(n - p2, &y);
//Set ref_count to two children plus one for the wait
set_ref_count(3);
//Start b running
spawn(b);
//Start a running and wait for all children (a and b)
spawn_and_wait_for_all(a);
//Sum the total
if (k % 2 != 0)
*pTot += (x + y);
else
*pTot -= (x + y);
}
else if (n >= p1) {
//If n is more than p1, problem is small and therefore need not be parallelised, result added to pTot
if (k % 2 != 0)
*pTot += serialLoop(n - p1);
else
*pTot -= serialLoop(n - p1);
return NULL;
}
else
return NULL;
}
}
};
The method that calls the parallel task:
int parallelLoop(int n) {
int pTot = 0;
ParallelFormula& a = *new(task::allocate_root()) ParallelFormula(n, &pTot);
task::spawn_root_and_wait(a);
return pTot;
}
In case you want to look at the full code for all the context:
// Assignment2.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include "iostream"
#include "tbb/task_scheduler_init.h"
#include "tbb/parallel_reduce.h"
#include "tbb/partitioner.h"
#include "tbb/blocked_range.h"
#include "tbb/tick_count.h"
#include "math.h"
using namespace tbb;
using namespace std;
int p, p1, p2;
int serialLoop(int n);
int n;
int m;
int serialFormula(int pTemp) {
switch (pTemp) {
case 0:
return 1;
case 1:
return 1;
case 2:
return 2;
}
//If p is any other value it is less than 0 and therefore has nothing to calculate - the current calculation is complete
return 0;
}
int serialLoop(int n) {
int pTot = 0;
for (int k = 1; k > 0; k++) {
//Checking whether k is even or odd to determine if adding or substracting value of p(x) to make p(n)
if (n == 0)
return pTot += 1;
else if (k % 2 != 0) {
//Calculate p number using section of Euler's formula
p = n - ((k*((3 * k) - 1)) / 2);
//If p is more than 2, must call recursive function to break down problem to smaller n's, and adds result to total result P (i.e. p(n))
if (p > 2) {
pTot += serialLoop(p);
}
else if (p >= 0) {
pTot += serialFormula(p);
}
else return pTot;
p = n - ((k*((3 * k) + 1)) / 2);
if (p > 2) {
pTot += serialLoop(p);
}
else if (p >= 0) {
pTot += serialFormula(p);
}
else return pTot;
}
else {
p = n - ((k*((3 * k) - 1)) / 2);
if (p > 2) {
pTot -= serialLoop(p);
}
else if (p >= 0) {
pTot -= serialFormula(p);
}
else return pTot;
p = n - ((k*((3 * k) + 1)) / 2);
if (p > 2) {
pTot -= serialLoop(p);
}
else if (p >= 0) {
pTot -= serialFormula(p);
}
else return pTot;
}
}
}
class ParallelFormula : public task {
public:
int n;
int* pTot;
//Task constructor
ParallelFormula(int n_, int* pTot_) : n(n_), pTot(pTot_) {}
//Task definition
task* execute() {
//Checking task is called
for (int k = 1; k > 0; k++) {
//Calculate p number using section of Euler's formula
switch (n) {
case 0:
if (k % 2 != 0)
*pTot += 1;
else
*pTot -= 1;
cout << "Case 0" << endl;
cout << *pTot << endl;
return NULL;
case 1:
if (k % 2 != 0)
*pTot += 1;
else
*pTot -= 1;
cout << "Case 1" << endl;
cout << *pTot << endl;
return NULL;
case 2:
if (k % 2 != 0)
*pTot += 2;
else
*pTot -= 2;
cout << "Case 2" << endl;
cout << *pTot << endl;
return NULL;
}
p1 = (k*((3 * k) - 1)) / 2;
p2 = (k*((3 * k) + 1)) / 2;
if (n >= p2) {
//If p is more than 2, must call recursive function to break down problem to smaller n's, and adds result to total result P (i.e. p(n))
int x = 0;
int y = 0;
ParallelFormula& a = *new(allocate_child()) ParallelFormula(n - p1, &x);
ParallelFormula& b = *new(allocate_child()) ParallelFormula(n - p2, &y);
//Set ref_count to two children plus one for the wait
set_ref_count(3);
//Start b running
spawn(b);
//Start a running and wait for all children (a and b)
spawn_and_wait_for_all(a);
//Sum the total
if (k % 2 != 0)
*pTot += (x + y);
else
*pTot -= (x + y);
cout << "Double p" << endl;
cout << *pTot << endl;
}
else if (n >= p1) {
if (k % 2 != 0)
*pTot += serialLoop(n - p1);
else
*pTot -= serialLoop(n - p1);
cout << "Single p" << endl;
cout << *pTot << endl;
return NULL;
}
else
return NULL;
}
}
};
int parallelLoop(int n) {
int pTot = 0;
ParallelFormula& a = *new(task::allocate_root()) ParallelFormula(n, &pTot);
task::spawn_root_and_wait(a);
return pTot;
}
int main()
{
//Take inputs n and m.
cout << "Enter partition number n:" << endl;
cin >> n;
cout << "Enter modulo m:" << endl;
cin >> m;
//Start timer for serial method
tick_count serial_start = tick_count::now();
//Serial method for computing partition function modulo m.
int sP = serialLoop(n);
int serialMod = sP % m;
//Finish timer for serial method
tick_count serial_end = tick_count::now();
//Output serial results
cout << "Serial result for p(n) is: " << sP << endl;
cout << "Serial result for p(n) mod m is: " << serialMod << endl;
cout << "Serial time (s): " << (serial_end - serial_start).seconds() << endl;
//Start timer for parallel method
tick_count parallel_start = tick_count::now();
//Parallel method for computing partition function
int pP = parallelLoop(n);
int parallelMod = pP % m;
//Finish timer for parallel method
tick_count parallel_end = tick_count::now();
//Output parallel results
cout << "Parallel result for p(n) is: " << pP << endl;
cout << "Parallel result for p(n) mod m is: " << parallelMod << endl;
cout << "Parallel time (s): " << (parallel_end - parallel_start).seconds() << endl;
//Acceleration achieved
cout << "Acceleration achieved was: " << (serial_end - serial_start).seconds() / (parallel_end - parallel_start).seconds() << endl;
return 0;
};
P.S. This was partly based off of the Fibonacci sequence example in the Intel TBB documentation, so if I've done something seriously dumb by following that example then I apologise for that too XD.

The variables p1 and p2 are global but you write to them in ParallelFormula::execute concurrently. Try to declare them inside the ParallelFormula::execute method, e.g.
int p1 = (k*((3 * k) - 1)) / 2;
int p2 = (k*((3 * k) + 1)) / 2;
Also do not forget about the p variable in int serialLoop(int n) since you call this function from ParallelFormula::execute.

Related

Prime-checking every element of a 10^6 array

My goal is to figure out whether each element of an array is a prime or not.
Example:
Input: int A[5]={1,2,3,4,5}
Output: bool P[5]={0,1,1,0,1}
The problem is the array size is up to 10^6. I tried the most efficient prime-checking algorithm
(code: http://cpp.sh/9ewxa) but just the "cin" and "prime_checking" take really long time. How should I solve this problem, Thanks.
Your "most efficient" prime test is actually horribly inefficient. Something like the Miller-Rabin primality test is much faster on a one by one basis. If your input are below 4.3 billion (i.e. uint32_t) then you only need to do 3 tests: a = 2, 7, and 61. For numbers in the uint64_t range it's 12 tests.
If you have a large array of integers then computing all primes up to some maximum might be faster than repeated tests. See Sieve of Eratosthenes for a good way to compute all primes fast. But it's impractical if your input numbers can be larger than 4 billion due to the memory required.
Here is some code that computes a Sieve up to UINT32_MAX+1 and then checks Miller-Rabin has the same results as the sieve: https://gist.github.com/mrvn/137fb0c8a5c78dbf92108b696ff82d92
#include <iostream>
#include <cstdint>
#include <array>
#include <ranges>
#include <cassert>
#include <bitset>
uint32_t pow_n(uint32_t a, uint32_t d, uint32_t n) {
if (d == 0) return 1;
if (d == 1) return a;
uint32_t t = pow_n(a, d / 2, n);
t = ((uint64_t)t * t) % n;
if (d % 2 == 0) {
return t;
} else {
return ((uint64_t)t * a) % n;
}
};
bool test(uint32_t n, unsigned s, uint32_t d, uint32_t a) {
//std::cout << "test(n = " << n << ", s = " << s << ", d = " << d << ", a = " << a << ")\n";
uint32_t x = pow_n(a ,d ,n);
//std::cout << " x = " << x << std::endl;
if (x == 1 || x == n - 1) return true;
for (unsigned i = 1; i < s; ++i) {
x = ((uint64_t)x * x) % n;
if (x == n - 1) return true;
}
return false;
}
bool is_prime(uint32_t n) {
static const std::array witnesses{2u, 3u, 5u, 7u, 11u, 13u, 17u, 19u, 23u, 29u, 31u, 37u};
static const std::array bounds{
2'047llu, 1'373'653llu, 25'326'001llu, 3'215'031'751llu,
2'152'302'898'747llu, 3'474'749'660'383llu,
341'550'071'728'321llu, 341'550'071'728'321llu /* no bounds for 19 */,
3'825'123'056'546'413'051llu,
3'825'123'056'546'413'051llu /* no bound for 29 */,
3'825'123'056'546'413'051llu /* no bound for 31 */,
(unsigned long long)UINT64_MAX /* off by a bit but it's the last bounds */,
};
static_assert(witnesses.size() == bounds.size());
if (n == 2) return true; // 2 is prime
if (n % 2 == 0) return false; // other even numbers are not
if (n <= witnesses.back()) { // I know the first few primes
return (std::ranges::find(witnesses, n) != std::end(witnesses));
}
// write n = 2^s * d + 1 with d odd
unsigned s = 0;
uint32_t d = n - 1;
while (d % 2 == 0) {
++s;
d /= 2;
}
// test widtnesses until the bounds say it's a sure thing
auto it = bounds.cbegin();
for (auto a : witnesses) {
//std::cout << a << " ";
if (!test(n, s, d, a)) return false;
if (n < *it++) return true;
}
return true;
}
template<std::size_t N>
auto composite() {
std::bitset<N / 2 + 1> is_composite;
for (uint32_t i = 3; (uint64_t)i * i < N; i += 2) {
if (is_composite[i / 2]) continue;
for (uint64_t j = i * i; j < N; j += 2 * i) is_composite[j / 2] = true;
}
return is_composite;
}
bool slow_prime(uint32_t n) {
static const auto is_composite = composite<UINT32_MAX + 1llu>();
if (n < 2) return false;
if (n == 2) return true;
if (n % 2 == 0) return false;
return !is_composite.test(n / 2);
}
int main() {
/*
std::cout << "2047: ";
bool fast = is_prime(2047);
bool slow = slow_prime(2047);
std::cout << (fast ? "fast prime" : "");
std::cout << (slow ? "slow prime" : "");
std::cout << std::endl;
*/
//std::cout << "2: prime\n";
for (uint64_t i = 0; i <= UINT32_MAX; ++i) {
if (i % 1000000 == 1) { std::cout << "\r" << i << " "; std::cout.flush(); }
bool fast = is_prime(i);
bool slow = slow_prime(i);
if (fast != slow) std::cout << i << std::endl;
assert(fast == slow);
//std::cout << i << ": " << (is_prime(i) ? "prime" : "") << std::endl;
}
}
The sieve takes ~15s to compute and uses 256MB of memory, verifying Miller-Rabin takes ~12m45s or 765 times slower than the sieve. Which tells me that if you are testing more than 85 million 32bit numbers for primes then just compute them all with a sieve. Since the sieve is O(n^2) it only gets better if your maximum input is smaller.

Multithreading C++ longer execution than single thread

I'm trying to write a piece of code, which goes over a loop of 8^12 iterations, and in each iteration when some conditions are met I push_back to a vector (each thread has its own vector to push_back, which I combine after the loop). But it seems that my the execution is more time consuming the more threads are active. Here' the function (method of an object) passed to each thread:
void HamiltonianKH::mapping_kernel(ull_int start, ull_int stop, std::vector<ull_int>* map_threaded, int _id) {
int n = 1;
out << "A new thread joined tha party! from " << start << " to " << stop << endl;
for (ull_int j = start; j < stop; j++) {
int bSz = 0, fSz = 0, N_e = 0;
std::tie(bSz, fSz, N_e) = calculateSpinElements(this->L, j);
if ((bSz + fSz == this->Sz) && N_e == this->num_of_electrons)
map_threaded->push_back(j);
if (show_system_size_parameters == true && (j - start) % ull_int((stop - start) * n / 4) == 0 && j > 0) {
out << n << "-th quarter of " << _id << endl;
n++;
}
}
}
, here is the caulculate_spinelements function:
std::tuple<int, int, int> calculateSpinElements(int L, ull_int& j) {
int bSz = 0; //bosonic total spin - spin of upper orbital locked to n=1 filling
int fSz = 0; //fermionic total spin
int N_e = 0; // numer of electrons in given state
std::vector<int> temp = int_to_binary(j, L);
for (int k = 0; k < L; k++) {
if (temp[k] < 4) bSz += 1;
else bSz -= 1;
if (temp[k] % 4 == 1) {
fSz += 1;
N_e += 1;
}
else if (temp[k] % 4 == 2) {
fSz -= 1;
N_e += 1;
}
else if (temp[k] % 4 == 3)
N_e += 2;
}
return std::make_tuple(bSz, fSz, N_e);
}
and her is the separation to threads:
void HamiltonianKH::generate_mapping() {
ull_int start = 0, stop = std::pow(8, L);
//mapping_kernel(start, stop, mapping, L, Sz, num_of_electrons);
//Threaded
std::vector<std::vector<ull_int>*> map_threaded(num_of_threads);
std::vector<std::thread> threads;
threads.reserve(num_of_threads);
for (int t = 0; t < num_of_threads; t++) {
start = t * (ull_int)std::pow(8, L) / num_of_threads;
stop = ((t + 1) == num_of_threads ? (ull_int)std::pow(8, L) : (ull_int)std::pow(8, L) * (t + 1) / num_of_threads);
map_threaded[t] = new std::vector<ull_int>();
threads.emplace_back(&HamiltonianKH::mapping_kernel, this, start, stop, map_threaded[t], t);
}
for (auto& t : threads) t.join();
for (auto& t : threads) t.~thread();
ull_int size = 0;
for (auto& t : map_threaded) {
size += t->size();
}
out << "size = " << size << endl;
for (auto & t : map_threaded)
mapping->insert(mapping->end(), t->begin(), t->end());
//sort(mapping->begin(), mapping->end());
if (show_system_size_parameters == true) {
out << "Mapping generated with " << mapping->size() << " elements" << endl;
out << "Last element = " << mapping->at(mapping->size() - 1) << endl;
}
//out << mapping[0] << " " << mapping[mapping.size() - 1] << endl;
assert(mapping->size() > 0 && "Not possible number of electrons - no. of states < 1");
}
The variables: mapping, L, num_of_electrons and Sz are public fields in the object. The whole code has over 2000 lines, but the execution after the generate_mapping() call is irrelevant to the problem.
Do any of you guys have an idea, why this piece of code executes longer on more threads?
Thank you very much in advance.

Optimization of recursion in C++

I have a recursive function. And I want to optimize it as less as 10 sec. Higher use of Memory is not an issue.
Currently, it takes around 85 sec on Linux and three times more on Mac.
I could not understand in what way I can proceed?
Here is code:
using namespace std;
double get_wall_time(){
struct timeval time;
if (gettimeofday(&time,NULL)){
// Handle error
return 0;
}
return (double)time.tv_sec + (double)time.tv_usec * .000001;
}
int fun(long long n) {
if (n < 0) {
cout << "Please enter valid number";
return 0;
}
else if (n == 0 || n == 1)
return 1;
else if (n % 2 == 0)
return fun(n / 2);
else if (n % 2 == 1)
return fun(n / 2) + fun(n / 2 - 1);
else
return 0;
}
int main() {
double begin = get_wall_time();
cout << fun(123456789012345678) << std::endl;
double end = get_wall_time();
cout << "Time elapsed : " << double(end - begin);
return 0;
}
EDIT:
If n is even, return f(n/2)
If n is odd, return f(n/2)+f(n/2-1)
I would memoize the results using a std::map, like so:
std::map<long long, int>fun_results;
int fun(long long n) {
try {
return fun_results.at(n);
} catch(const std::out_of_range&) {
if (n < 0) {
cout << "Please enter valid number";
return 0;
}
else if (n == 0 || n == 1)
return fun_results[n] = 1;
else if (n % 2 == 0)
return fun_results[n] = fun(n / 2);
else if (n % 2 == 1)
return fun_results[n] = fun(n / 2) + fun(n / 2 - 1);
else
return 0;
}
}
Result:
1332403
Time elapsed : 0.000710011

Miller-Rabin Primality test FIPS 186-3 implementation

Im trying to implement the Miller-Rabin primality test according to the description in FIPS 186-3 C.3.1. No matter what I do, I cannot get it to work. The instructions are pretty specific, and I dont think I missed anything, and yet Im getting true for non-prime values.
What did I do wrong?
template <typename R, typename S, typename T>
T POW(R base, S exponent, const T mod){
T result = 1;
while (exponent){
if (exponent & 1)
result = (result * base) % mod;
exponent >>= 1;
base = (base * base) % mod;
}
return result;
}
// used uint64_t to prevent overflow, but only testing with small numbers for now
bool MillerRabin_FIPS186(uint64_t w, unsigned int iterations = 50){
srand(time(0));
unsigned int a = 0;
uint64_t W = w - 1; // dont want to keep calculating w - 1
uint64_t m = W;
while (!(m & 1)){
m >>= 1;
a++;
}
// skipped getting wlen
// when i had this function using my custom arbitrary precision integer class,
// and could get len(w), getting it and using it in an actual RBG
// made no difference
for(unsigned int i = 0; i < iterations; i++){
uint64_t b = (rand() % (W - 3)) + 2; // 2 <= b <= w - 2
uint64_t z = POW(b, m, w);
if ((z == 1) || (z == W))
continue;
else
for(unsigned int j = 1; j < a; j++){
z = POW(z, 2, w);
if (z == W)
continue;
if (z == 1)
return 0;// Composite
}
}
return 1;// Probably Prime
}
this:
std::cout << MillerRabin_FIPS186(33) << std::endl;
std::cout << MillerRabin_FIPS186(35) << std::endl;
std::cout << MillerRabin_FIPS186(37) << std::endl;
std::cout << MillerRabin_FIPS186(39) << std::endl;
std::cout << MillerRabin_FIPS186(45) << std::endl;
std::cout << MillerRabin_FIPS186(49) << std::endl;
is giving me:
0
1
1
1
0
1
The only difference between your implementation and Wikipedia's is that you forgot the second return composite statement. You should have a return 0 at the end of the loop.
Edit: As pointed out by Daniel, there is a second difference. The continue is continuing the inner loop, rather than the outer loop like it's supposed to.
for(unsigned int i = 0; i < iterations; i++){
uint64_t b = (rand() % (W - 3)) + 2; // 2 <= b <= w - 2
uint64_t z = POW(b, m, w);
if ((z == 1) || (z == W))
continue;
else{
int continueOuter = 0;
for(unsigned int j = 1; j < a; j++){
z = POW(z, 2, w);
if (z == W)
continueOuter = 1;
break;
if (z == 1)
return 0;// Composite
}
if (continueOuter) {continue;}
}
return 0; //This is the line you're missing.
}
return 1;// Probably Prime
Also, if the input is even, it will always return probably prime since a is 0. You should add an extra check at the start for that.
In the inner loop,
for(unsigned int j = 1; j < a; j++){
z = POW(z, 2, w);
if (z == W)
continue;
if (z == 1)
return 0;// Composite
}
you should break; instead of continue; when z == W. By continueing, in the next iteration of that loop, if there is one, z will become 1 and the candidate is possibly wrongly declared composite. Here, that happens for 17, 41, 73, 89 and 97 among the primes less than 100.

self made pow() c++

I was reading through How can I write a power function myself? and the answer given by dan04 caught my attention mainly because I am not sure about the answer given by fortran, but I took that and implemented this:
#include <iostream>
using namespace std;
float pow(float base, float ex){
// power of 0
if (ex == 0){
return 1;
// negative exponenet
}else if( ex < 0){
return 1 / pow(base, -ex);
// even exponenet
}else if ((int)ex % 2 == 0){
float half_pow = pow(base, ex/2);
return half_pow * half_pow;
//integer exponenet
}else{
return base * pow(base, ex - 1);
}
}
int main(){
for (int ii = 0; ii< 10; ii++){\
cout << "pow(" << ii << ".5) = " << pow(ii, .5) << endl;
cout << "pow(" << ii << ",2) = " << pow(ii, 2) << endl;
cout << "pow(" << ii << ",3) = " << pow(ii, 3) << endl;
}
}
though I am not sure if I translated this right because all of the calls giving .5 as the exponent return 0. In the answer it states that it might need a log2(x) based on a^b = 2^(b * log2(a)), but I am unsure about putting that in as I am unsure where to put it, or if I am even thinking about this right.
NOTE: I know that this might be defined in a math library, but I don't need all the added expense of an entire math library for a few functions.
EDIT: does anyone know a floating-point implementation for fractional exponents? (I have seen a double implementation, but that was using a trick with registers, and I need floating-point, and adding a library just to do a trick I would be better off just including the math library)
I have looked at this paper here which describes how to approximate the exponential function for double precision. After a little research on Wikipedia about single precision floating point representation I have worked out the equivalent algorithms. They only implemented the exp function, so I found an inverse function for the log and then simply did
POW(a, b) = EXP(LOG(a) * b).
compiling this gcc4.6.2 yields a pow function almost 4 times faster than the standard library's implementation (compiling with O2).
Note: the code for EXP is copied almost verbatim from the paper I read and the LOG function is copied from here.
Here is the relevant code:
#define EXP_A 184
#define EXP_C 16249
float EXP(float y)
{
union
{
float d;
struct
{
#ifdef LITTLE_ENDIAN
short j, i;
#else
short i, j;
#endif
} n;
} eco;
eco.n.i = EXP_A*(y) + (EXP_C);
eco.n.j = 0;
return eco.d;
}
float LOG(float y)
{
int * nTemp = (int*)&y;
y = (*nTemp) >> 16;
return (y - EXP_C) / EXP_A;
}
float POW(float b, float p)
{
return EXP(LOG(b) * p);
}
There is still some optimization you can do here, or perhaps that is good enough.
This is a rough approximation but if you would have been satisfied with the errors introduced using the double representation, I imagine this will be satisfactory.
I think the algorithm you're looking for could be 'nth root'. With an initial guess of 1 (for k == 0):
#include <iostream>
using namespace std;
float pow(float base, float ex);
float nth_root(float A, int n) {
const int K = 6;
float x[K] = {1};
for (int k = 0; k < K - 1; k++)
x[k + 1] = (1.0 / n) * ((n - 1) * x[k] + A / pow(x[k], n - 1));
return x[K-1];
}
float pow(float base, float ex){
if (base == 0)
return 0;
// power of 0
if (ex == 0){
return 1;
// negative exponenet
}else if( ex < 0){
return 1 / pow(base, -ex);
// fractional exponent
}else if (ex > 0 && ex < 1){
return nth_root(base, 1/ex);
}else if ((int)ex % 2 == 0){
float half_pow = pow(base, ex/2);
return half_pow * half_pow;
//integer exponenet
}else{
return base * pow(base, ex - 1);
}
}
int main_pow(int, char **){
for (int ii = 0; ii< 10; ii++){\
cout << "pow(" << ii << ", .5) = " << pow(ii, .5) << endl;
cout << "pow(" << ii << ", 2) = " << pow(ii, 2) << endl;
cout << "pow(" << ii << ", 3) = " << pow(ii, 3) << endl;
}
return 0;
}
test:
pow(0, .5) = 0.03125
pow(0, 2) = 0
pow(0, 3) = 0
pow(1, .5) = 1
pow(1, 2) = 1
pow(1, 3) = 1
pow(2, .5) = 1.41421
pow(2, 2) = 4
pow(2, 3) = 8
pow(3, .5) = 1.73205
pow(3, 2) = 9
pow(3, 3) = 27
pow(4, .5) = 2
pow(4, 2) = 16
pow(4, 3) = 64
pow(5, .5) = 2.23607
pow(5, 2) = 25
pow(5, 3) = 125
pow(6, .5) = 2.44949
pow(6, 2) = 36
pow(6, 3) = 216
pow(7, .5) = 2.64575
pow(7, 2) = 49
pow(7, 3) = 343
pow(8, .5) = 2.82843
pow(8, 2) = 64
pow(8, 3) = 512
pow(9, .5) = 3
pow(9, 2) = 81
pow(9, 3) = 729
I think that you could try to solve it by using the Taylor's series,
check this.
http://en.wikipedia.org/wiki/Taylor_series
With the Taylor's series you can solve any difficult to solve calculation such as 3^3.8 by using the already known results such as 3^4. In this case you have
3^4 = 81 so
3^3.8 = 81 + 3.8*3( 3.8 - 4) +..+.. and so on depend on how big is your n you will get the closer solution of your problem.
I and my friend faced similar problem while we're on an OpenGL project and math.h didn't suffice in some cases. Our instructor also had the same problem and he told us to seperate power to integer and floating parts. For example, if you are to calculate x^11.5 you may calculate sqrt(x^115, 10) which may result more accurate result.
Reworked on #capellic answer, so that nth_root works with bigger values as well.
Without the limitation of an array that is allocated for no reason:
#include <iostream>
float pow(float base, float ex);
inline float fabs(float a) {
return a > 0 ? a : -a;
}
float nth_root(float A, int n, unsigned max_iterations = 500, float epsilon = std::numeric_limits<float>::epsilon()) {
if (n < 0)
throw "Invalid value";
if (n == 1 || A == 0)
return A;
float old_value = 1;
float value;
for (int k = 0; k < max_iterations; k++) {
value = (1.0 / n) * ((n - 1) * old_value + A / pow(old_value, n - 1));
if (fabs(old_value - value) < epsilon)
return value;
old_value = value;
}
return value;
}
float pow(float base, float ex) {
if (base == 0)
return 0;
if (ex == 0){
// power of 0
return 1;
} else if( ex < 0) {
// negative exponent
return 1 / pow(base, -ex);
} else if (ex > 0 && ex < 1) {
// fractional exponent
return nth_root(base, 1/ex);
} else if ((int)ex % 2 == 0) {
// even exponent
float half_pow = pow(base, ex/2);
return half_pow * half_pow;
} else {
// integer exponent
return base * pow(base, ex - 1);
}
}
int main () {
for (int i = 0; i <= 128; i++) {
std::cout << "pow(" << i << ", .5) = " << pow(i, .5) << std::endl;
std::cout << "pow(" << i << ", .3) = " << pow(i, .3) << std::endl;
std::cout << "pow(" << i << ", 2) = " << pow(i, 2) << std::endl;
std::cout << "pow(" << i << ", 3) = " << pow(i, 3) << std::endl;
}
std::cout << "pow(" << 74088 << ", .3) = " << pow(74088, .3) << std::endl;
return 0;
}
This solution of MINE will be accepted upto O(n) time complexity
utpo input less then 2^30 or 10^8
IT will not accept more then these inputs
It WILL GIVE TIME LIMIT EXCEED warning
but easy understandable solution
#include<bits/stdc++.h>
using namespace std;
double recursive(double x,int n)
{
// static is important here
// other wise it will store same values while multiplying
double p = x;
double ans;
// as we multiple p it will multiply it with q which has the
//previous value of this ans latter we will update the q
// so that q has fresh value for further test cases here
static double q=1; // important
if(n==0){ ans = q; q=1; return ans;}
if(n>0)
{
p *= q;
// stored value got multiply by p
q=p;
// and again updated to q
p=x;
//to update the value to the same value of that number
// cout<<q<<" ";
recursive(p,n-1);
}
return ans;
}
class Solution {
public:
double myPow(double x, int n) {
// double q=x;double N=n;
// return pow(q,N);
// when both sides are double this function works
if(n==0)return 1;
x = recursive(x,abs(n));
if(n<0) return double(1/x);
// else
return x;
}
};
For More help you may try
LEETCODE QUESTION NUMBER 50
**NOW the Second most optimize code pow(x,n) **
logic is that we have to solve it in O(logN) so we devide the n by 2
when we have even power n=4 , 4/2 is 2 means we have to just square it (22)(22)
but when we have odd value of power like n=5, 5/2 here we have square it to get
also the the number itself to it like (22)(2*2)*2 to get 2^5 = 32
HOPE YOU UNDERSTAND FOR MORE YOU CAN VISIT
POW(x,n) question on leetcode
below the optimized code and above code was for O(n) only
*
#include<bits/stdc++.h>
using namespace std;
double recursive(double x,int n)
{
// recursive calls will return the whole value of the program at every calls
if(n==0){return 1;}
// 1 is multiplied when the last value we get as we don't have to multiply further
double store;
store = recursive(x,n/2);
// call the function after the base condtion you have given to it here
if(n%2==0)return store*store;
else
{
return store*store*x;
// odd power we have the perfect square multiply the value;
}
}
// main function or the function for indirect call to recursive function
double myPow(double x, int n) {
if(n==0)return 1;
x = recursive(x,abs(n));
// for negatives powers
if(n<0) return double(1/x);
// else for positves
return x;
}