I wrote a C++ routine to find nearest double element in sorted array. Is there a way to speed up?
There are two branches based on the value of boolean reversed, if reversed it is sorted in the decreasing order.
void findNearestNeighbourIndex_new(real_T value, real_T* x, int_T x_size, int_T& l_idx)
{
l_idx = -1;
bool reversed= (x[1] - x[0] < 0);
if ((!reversed&& value <= x[0])
|| (reversed&& value >= x[0])){
// Value is before first position in x
l_idx = 0;
}
else if ((!reversed&& value >= x[x_size - 1])
|| (reversed&& value <= x[x_size - 1])){
// Value is after last position in x
l_idx = x_size - 2;
}
else // All other cases
{
if (reversed)
{
for (int i = 0; i < x_size - 1; ++i)
{
if (value <= x[i] && value > x[i + 1])
{
l_idx = i;
break;
}
}
}
else{
for (int i = 0; i < x_size - 1; ++i)
{
if (value >= x[i] && value < x[i + 1])
{
l_idx = i;
break;
}
}
}
}
}
In this very case where array is sorted, I do not see a way to do better. So, with profiling i see that the comparison in if (value <= x[i] && value > x[i + 1]) is expensive.
EDIT
tried with lower_bound()
std::vector<real_T> x_vec(x, x + x_size);
l_idx = std::upper_bound(x_vec.begin(), x_vec.end(), value) - x_vec.begin() - 1;
You can use std::lower_bound to find a element equal or greater than requested, and then move iterator backwards and check preceding value too. This will use binary search and will cost O(log n), also this enables standard STL comparators and so on.
If you don't actually have an vector to use with upper_bound() you don't need to construct one as that is going to be an O(n) operation. upper_bound() will work with the array that you have. You can use:
l_idx = std::upper_bound(x, x + size, value) - x - 1;
Test case:
#include <iostream>
#include <functional>
#include <algorithm>
int main()
{
const int size = 9;
int x[9] = {1,2,3,4,5,6,7,8,9};
auto pos = std::upper_bound(x, x + size, 5) - x;
std::cout << "position: " << pos;
return 0;
}
Output:
5
As the result of upper_bound() points us to 6(live example).
The way is to substract 1 to size (to make work over higest value) and 0.5 to target value to make it accurate:
#include <iostream>
#include <functional>
#include <algorithm>
using namespace std;
int main()
{
float x[10] = { 2,3,4,5,6,7,8,9,10,11 },y;
int size = sizeof(x) / sizeof(x[0]),pos;
y = 4.1; pos = std::upper_bound(x, x + size - 1, y - 0.5) - x;
std::cout << "position: " << pos << " target value=" << y << " upper_bound=" << x[pos] << endl;
y = 4.9; pos = std::upper_bound(x, x + size - 1, y - 0.5) - x;
std::cout << "position: " << pos << " target value=" << y << " upper_bound=" << x[pos] << endl;
y = -0.5; pos = std::upper_bound(x, x + size - 1, y - 0.5) - x;
std::cout << "position: " << pos << " target value=" << y << " upper_bound=" << x[pos] << endl;
y = 100; pos = std::upper_bound(x, x + size - 1, y - 0.5) - x;
std::cout << "position: " << pos << " target value=" << y << " upper_bound=" << x[pos] << endl;
getchar();
return 0;
}
Implemented this helper routine
void findNearestNeighbourIndex_bin_search_new(real_T value, real_T* x,
int_T start, int_T stop, int_T& l_idx)
{
int_T mid = ( stop - start ) / 2;
if (value >= x[mid+1])
{
findNearestNeighbourIndex_bin_search_new(value, x, mid + 1, stop, l_idx);
}
else if (value < x[mid])
{
findNearestNeighbourIndex_bin_search_new(value, x, start, mid, l_idx);
}
else
{
l_idx = mid;
return;
}
}
Related
I'm trying to write a piece of code, which goes over a loop of 8^12 iterations, and in each iteration when some conditions are met I push_back to a vector (each thread has its own vector to push_back, which I combine after the loop). But it seems that my the execution is more time consuming the more threads are active. Here' the function (method of an object) passed to each thread:
void HamiltonianKH::mapping_kernel(ull_int start, ull_int stop, std::vector<ull_int>* map_threaded, int _id) {
int n = 1;
out << "A new thread joined tha party! from " << start << " to " << stop << endl;
for (ull_int j = start; j < stop; j++) {
int bSz = 0, fSz = 0, N_e = 0;
std::tie(bSz, fSz, N_e) = calculateSpinElements(this->L, j);
if ((bSz + fSz == this->Sz) && N_e == this->num_of_electrons)
map_threaded->push_back(j);
if (show_system_size_parameters == true && (j - start) % ull_int((stop - start) * n / 4) == 0 && j > 0) {
out << n << "-th quarter of " << _id << endl;
n++;
}
}
}
, here is the caulculate_spinelements function:
std::tuple<int, int, int> calculateSpinElements(int L, ull_int& j) {
int bSz = 0; //bosonic total spin - spin of upper orbital locked to n=1 filling
int fSz = 0; //fermionic total spin
int N_e = 0; // numer of electrons in given state
std::vector<int> temp = int_to_binary(j, L);
for (int k = 0; k < L; k++) {
if (temp[k] < 4) bSz += 1;
else bSz -= 1;
if (temp[k] % 4 == 1) {
fSz += 1;
N_e += 1;
}
else if (temp[k] % 4 == 2) {
fSz -= 1;
N_e += 1;
}
else if (temp[k] % 4 == 3)
N_e += 2;
}
return std::make_tuple(bSz, fSz, N_e);
}
and her is the separation to threads:
void HamiltonianKH::generate_mapping() {
ull_int start = 0, stop = std::pow(8, L);
//mapping_kernel(start, stop, mapping, L, Sz, num_of_electrons);
//Threaded
std::vector<std::vector<ull_int>*> map_threaded(num_of_threads);
std::vector<std::thread> threads;
threads.reserve(num_of_threads);
for (int t = 0; t < num_of_threads; t++) {
start = t * (ull_int)std::pow(8, L) / num_of_threads;
stop = ((t + 1) == num_of_threads ? (ull_int)std::pow(8, L) : (ull_int)std::pow(8, L) * (t + 1) / num_of_threads);
map_threaded[t] = new std::vector<ull_int>();
threads.emplace_back(&HamiltonianKH::mapping_kernel, this, start, stop, map_threaded[t], t);
}
for (auto& t : threads) t.join();
for (auto& t : threads) t.~thread();
ull_int size = 0;
for (auto& t : map_threaded) {
size += t->size();
}
out << "size = " << size << endl;
for (auto & t : map_threaded)
mapping->insert(mapping->end(), t->begin(), t->end());
//sort(mapping->begin(), mapping->end());
if (show_system_size_parameters == true) {
out << "Mapping generated with " << mapping->size() << " elements" << endl;
out << "Last element = " << mapping->at(mapping->size() - 1) << endl;
}
//out << mapping[0] << " " << mapping[mapping.size() - 1] << endl;
assert(mapping->size() > 0 && "Not possible number of electrons - no. of states < 1");
}
The variables: mapping, L, num_of_electrons and Sz are public fields in the object. The whole code has over 2000 lines, but the execution after the generate_mapping() call is irrelevant to the problem.
Do any of you guys have an idea, why this piece of code executes longer on more threads?
Thank you very much in advance.
I'm afraid this is a bit of a long code. I'm programming a parallel, recursive, task-based version of Euler's partition formula with Intel TBB and C++, and I don't think there's much problem with this program's logic, but I have a feeling the variables are being accessed wrongly and I might have declared them in the wrong place or something. I say this because inputting a number n should always give the same result, and it does below n = 11, but above that it gives different answers. Even stranger, adding lines of output to try and troubleshoot the program results in slightly more accurate answers (as if somehow padding the time each part of the calculation takes helps it). I have no idea how to avoid this problem or which variable exactly is causing it as the answer is usually fairly close, it's not just a random number. So this is a bit of a tricky one, I apologise, but if someone could help me I would so damn thankful, I've spent a number of hours on this problem.
Here's the parallel task:
class ParallelFormula : public task {
public:
int n;
int* pTot;
//Task constructor
ParallelFormula(int n_, int* pTot_) : n(n_), pTot(pTot_) {}
//Task definition
task* execute() {
//Iterating for formula to work
for (int k = 1; k > 0; k++) {
//Add fixed values to pTot for any case where 2 >= n >= 0
switch (n) {
case 0:
if (k % 2 != 0)
*pTot += 1;
else
*pTot -= 1;
return NULL;
case 1:
if (k % 2 != 0)
*pTot += 1;
else
*pTot -= 1;
return NULL;
case 2:
if (k % 2 != 0)
*pTot += 2;
else
*pTot -= 2;
return NULL;
}
//Calculate p numbers using section of Euler's formula (relies on iteration number)
p1 = (k*((3 * k) - 1)) / 2;
p2 = (k*((3 * k) + 1)) / 2;
if (n >= p2) {
//If n is more than p2, must call recursive tasks to break down problem to smaller n's, and adds result to total result pTot (i.e. p(n))
int x = 0;
int y = 0;
ParallelFormula& a = *new(allocate_child()) ParallelFormula(n - p1, &x);
ParallelFormula& b = *new(allocate_child()) ParallelFormula(n - p2, &y);
//Set ref_count to two children plus one for the wait
set_ref_count(3);
//Start b running
spawn(b);
//Start a running and wait for all children (a and b)
spawn_and_wait_for_all(a);
//Sum the total
if (k % 2 != 0)
*pTot += (x + y);
else
*pTot -= (x + y);
}
else if (n >= p1) {
//If n is more than p1, problem is small and therefore need not be parallelised, result added to pTot
if (k % 2 != 0)
*pTot += serialLoop(n - p1);
else
*pTot -= serialLoop(n - p1);
return NULL;
}
else
return NULL;
}
}
};
The method that calls the parallel task:
int parallelLoop(int n) {
int pTot = 0;
ParallelFormula& a = *new(task::allocate_root()) ParallelFormula(n, &pTot);
task::spawn_root_and_wait(a);
return pTot;
}
In case you want to look at the full code for all the context:
// Assignment2.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include "iostream"
#include "tbb/task_scheduler_init.h"
#include "tbb/parallel_reduce.h"
#include "tbb/partitioner.h"
#include "tbb/blocked_range.h"
#include "tbb/tick_count.h"
#include "math.h"
using namespace tbb;
using namespace std;
int p, p1, p2;
int serialLoop(int n);
int n;
int m;
int serialFormula(int pTemp) {
switch (pTemp) {
case 0:
return 1;
case 1:
return 1;
case 2:
return 2;
}
//If p is any other value it is less than 0 and therefore has nothing to calculate - the current calculation is complete
return 0;
}
int serialLoop(int n) {
int pTot = 0;
for (int k = 1; k > 0; k++) {
//Checking whether k is even or odd to determine if adding or substracting value of p(x) to make p(n)
if (n == 0)
return pTot += 1;
else if (k % 2 != 0) {
//Calculate p number using section of Euler's formula
p = n - ((k*((3 * k) - 1)) / 2);
//If p is more than 2, must call recursive function to break down problem to smaller n's, and adds result to total result P (i.e. p(n))
if (p > 2) {
pTot += serialLoop(p);
}
else if (p >= 0) {
pTot += serialFormula(p);
}
else return pTot;
p = n - ((k*((3 * k) + 1)) / 2);
if (p > 2) {
pTot += serialLoop(p);
}
else if (p >= 0) {
pTot += serialFormula(p);
}
else return pTot;
}
else {
p = n - ((k*((3 * k) - 1)) / 2);
if (p > 2) {
pTot -= serialLoop(p);
}
else if (p >= 0) {
pTot -= serialFormula(p);
}
else return pTot;
p = n - ((k*((3 * k) + 1)) / 2);
if (p > 2) {
pTot -= serialLoop(p);
}
else if (p >= 0) {
pTot -= serialFormula(p);
}
else return pTot;
}
}
}
class ParallelFormula : public task {
public:
int n;
int* pTot;
//Task constructor
ParallelFormula(int n_, int* pTot_) : n(n_), pTot(pTot_) {}
//Task definition
task* execute() {
//Checking task is called
for (int k = 1; k > 0; k++) {
//Calculate p number using section of Euler's formula
switch (n) {
case 0:
if (k % 2 != 0)
*pTot += 1;
else
*pTot -= 1;
cout << "Case 0" << endl;
cout << *pTot << endl;
return NULL;
case 1:
if (k % 2 != 0)
*pTot += 1;
else
*pTot -= 1;
cout << "Case 1" << endl;
cout << *pTot << endl;
return NULL;
case 2:
if (k % 2 != 0)
*pTot += 2;
else
*pTot -= 2;
cout << "Case 2" << endl;
cout << *pTot << endl;
return NULL;
}
p1 = (k*((3 * k) - 1)) / 2;
p2 = (k*((3 * k) + 1)) / 2;
if (n >= p2) {
//If p is more than 2, must call recursive function to break down problem to smaller n's, and adds result to total result P (i.e. p(n))
int x = 0;
int y = 0;
ParallelFormula& a = *new(allocate_child()) ParallelFormula(n - p1, &x);
ParallelFormula& b = *new(allocate_child()) ParallelFormula(n - p2, &y);
//Set ref_count to two children plus one for the wait
set_ref_count(3);
//Start b running
spawn(b);
//Start a running and wait for all children (a and b)
spawn_and_wait_for_all(a);
//Sum the total
if (k % 2 != 0)
*pTot += (x + y);
else
*pTot -= (x + y);
cout << "Double p" << endl;
cout << *pTot << endl;
}
else if (n >= p1) {
if (k % 2 != 0)
*pTot += serialLoop(n - p1);
else
*pTot -= serialLoop(n - p1);
cout << "Single p" << endl;
cout << *pTot << endl;
return NULL;
}
else
return NULL;
}
}
};
int parallelLoop(int n) {
int pTot = 0;
ParallelFormula& a = *new(task::allocate_root()) ParallelFormula(n, &pTot);
task::spawn_root_and_wait(a);
return pTot;
}
int main()
{
//Take inputs n and m.
cout << "Enter partition number n:" << endl;
cin >> n;
cout << "Enter modulo m:" << endl;
cin >> m;
//Start timer for serial method
tick_count serial_start = tick_count::now();
//Serial method for computing partition function modulo m.
int sP = serialLoop(n);
int serialMod = sP % m;
//Finish timer for serial method
tick_count serial_end = tick_count::now();
//Output serial results
cout << "Serial result for p(n) is: " << sP << endl;
cout << "Serial result for p(n) mod m is: " << serialMod << endl;
cout << "Serial time (s): " << (serial_end - serial_start).seconds() << endl;
//Start timer for parallel method
tick_count parallel_start = tick_count::now();
//Parallel method for computing partition function
int pP = parallelLoop(n);
int parallelMod = pP % m;
//Finish timer for parallel method
tick_count parallel_end = tick_count::now();
//Output parallel results
cout << "Parallel result for p(n) is: " << pP << endl;
cout << "Parallel result for p(n) mod m is: " << parallelMod << endl;
cout << "Parallel time (s): " << (parallel_end - parallel_start).seconds() << endl;
//Acceleration achieved
cout << "Acceleration achieved was: " << (serial_end - serial_start).seconds() / (parallel_end - parallel_start).seconds() << endl;
return 0;
};
P.S. This was partly based off of the Fibonacci sequence example in the Intel TBB documentation, so if I've done something seriously dumb by following that example then I apologise for that too XD.
The variables p1 and p2 are global but you write to them in ParallelFormula::execute concurrently. Try to declare them inside the ParallelFormula::execute method, e.g.
int p1 = (k*((3 * k) - 1)) / 2;
int p2 = (k*((3 * k) + 1)) / 2;
Also do not forget about the p variable in int serialLoop(int n) since you call this function from ParallelFormula::execute.
I am very new to c++ programming language i just need to know how can i declare a set of line segments given their start and end points? is there anything like that in C++?
I have this code which reads the starting and ending points of the line segments from a text file and divides the input into 4 vectors : X_start, Y_start, X_end, Y_end.
I need to know how to use these vectors to define line segments? any help would be appreciated. Thanks in advance
#include <iostream>
#include <algorithm> // for std::copy#include <iostream>
#include <iterator>
#include <fstream>
#include <math.h>
#include <vector>
#include <algorithm> // for std::copy
using namespace std;
int main()
{
std::ifstream is("D:\\Task1.txt");
std::istream_iterator<double> start(is), end;
std::vector<double> numbers(start, end);
std::vector<double> X_start(start, end);
std::vector<double> Y_start(start, end);
std::vector<double> X_end(start, end);
std::vector<double> Y_end(start, end);
std::vector<double>::iterator i;
std::vector<double>::iterator j;
float left, top, right, bottom; // Bounding Box For Line Segments
left = 12;
top = 12;
right = 0;
bottom = 0;
std::cout << "Read " << numbers.size() << " numbers" << std::endl;
std::copy(numbers.begin(), numbers.end(),
std::ostream_iterator<double>(std::cout, " "));
std::cout << std::endl;
for (vector<double>::iterator i = numbers.begin();
i != numbers.end();
++i)
{
for(int j = 0; j < numbers.size(); j = j+4)
{
std::cout << "elemnts of X_start " << numbers[j] << " " <<std::endl;
X_start.push_back(numbers[j]);
}
for(int k = 1; k < numbers.size(); k = k+4)
{
std::cout << "elemnts of Y_start " << numbers[k] << " " <<std::endl;
Y_start.push_back(numbers[k]);
}
for(int l = 2; l < numbers.size(); l = l+4)
{
std::cout << "elemnts of X_end " << numbers[l] << " " <<std::endl;
X_end.push_back(numbers[l]);
}
for(int m = 3; m < numbers.size(); m = m+4)
{
std::cout << "elemnts of Y_end " << numbers[m] << " " <<std::endl;
Y_end.push_back(numbers[m]);
}
getchar();
}
}
"I am very new to c++ programming language"
How new to C++? Do you understand the maths involved in what you're trying to do first?
I use the following function. Just modify it to meet your requirements.
class Point
{
public:
float x,y;
};
class LineSegment
{
public:
Point top;
Point bottom;
};
Point* getIntersectionPoint(LineSegment line1,LineSegment line2)
{
float s1_x, s1_y, s2_x, s2_y;
float p1_x = line1.bottom.x,p1_y = line1.bottom.y;
float p0_x = line1.top.x,p0_y = line1.top.y;
float p3_x = line2.bottom.x,p3_y = line2.bottom.y;
float p2_x = line2.top.x,p2_y = line2.top.y;
s1_x = p1_x - p0_x; s1_y = p1_y - p0_y;
s2_x = p3_x - p2_x; s2_y = p3_y - p2_y;
float s, t;
s = (-s1_y * (p0_x - p2_x) + s1_x * (p0_y - p2_y)) / (-s2_x * s1_y + s1_x * s2_y);
t = ( s2_x * (p0_y - p2_y) - s2_y * (p0_x - p2_x)) / (-s2_x * s1_y + s1_x * s2_y);
if (s >= 0 && s <= 1 && t >= 0 && t <= 1)
{
Point *intsPoint = (Point *)malloc(sizeof(Point));
intsPoint->x = p0_x + (t * s1_x);
intsPoint->y = p0_y + (t * s1_y);
return intsPoint;
}
return NULL;
}
I need to generate 4 random numbers, each between [-45 +45] degrees. and if rand%2 = 0 then I want the result (the random number generated to be equal to -angle). Once the 4 random numbers are generated it is requires to scan these angles and find a lock (the point at which the angles meet). Also -3,-2,-1,... +3 in the loop in if statement indicate that the lock takes place within 6 degrees beamwidth. the code works. But can it be simplified? also The objective is to establish a lock between 2 points by scannin elevation and azimuth angles at both points.
#include <iostream>
#include <conio.h>
#include <time.h>
using namespace std;
class Cscan
{
public:
int gran, lockaz, lockel;
};
int main()
{
srand (time(NULL));
int az1, az2, el1, el2, j, k;
BS1.lockaz = rand() % 46;
BS1.lockel = rand() % 46;
BS2.lockaz = rand() % 46;
BS2.lockel = rand() % 46;
k = rand() % 2;
if(k == 0)
k = -1;
BS1.lockaz = k*BS1.lockaz;
k = rand() % 2;
if(k == 0)
k = -1;
BS1.lockel = k*BS1.lockel;
k = rand() % 2;
if(k == 0)
k = -1;
BS2.lockaz = k*BS2.lockaz;
k = rand() % 2;
if(k == 0)
k = -1;
BS2.lockel = k*BS2.lockel;
for(az1=-45; az1<=45; az1=az1+4)
{
for(el1=-45; el1<=45; el1=el1+4)
{
for(az2=-45; az2<=45; az2=az2+4)
{
for(el2=-45; el2<=45; el2=el2+4)
{
if((az1==BS1.lockaz-3||az1==BS1.lockaz-2||az1==BS1.lockaz-1||az1==BS1.lockaz||az1==BS1.lockaz+1||az1==BS1.lockaz+2||az1==BS1.lockaz+3)&&
(az2==BS2.lockaz-3||az2==BS2.lockaz-2||az2==BS2.lockaz-1||az2==BS2.lockaz||az2==BS2.lockaz+1||az2==BS2.lockaz+2||az2==BS2.lockaz+3)&&
(el1==BS1.lockel-3||el1==BS1.lockel-2||el1==BS1.lockel-1||el1==BS1.lockel||el1==BS1.lockel+1||el1==BS1.lockel+2||el1==BS1.lockel+3)&&
(el2==BS2.lockel-3||el2==BS2.lockel-2||el2==BS2.lockel-1||el2==BS2.lockel||el2==BS2.lockel+1||el2==BS2.lockel+2||el2==BS2.lockel+3))
{
cout << "locked \n" << BS1.lockaz << " " << BS1.lockel << " " << BS2.lockaz << " " << BS2.lockel <<endl
< az1 << " " << el1 << " " << az2 << " " << el2 << endl;
k = 1;
break;
}
if(k==1)
break;
}
if(k==1)
break;
}
if(k==1)
break;
}
if(k==1)
break;
}
_getch();
}
BS1.lockaz = rand() % 91 - 45;
BS1.lockel = rand() % 91 - 45;
BS2.lockaz = rand() % 91 - 45;
BS2.lockel = rand() % 91 - 45;
Integer angles in degrees? Very questionable. Something "physical" like an angle is normally best expressed as a floating-point number, so I'd first change
typedef double angle;
struct Cscan { // why class? This is clearly POD
int gran; //I don't know what gran is. Perhaps this should also be floating-point.
angle lockaz, lockel;
};
That seems to make it more difficult at first sight because neither the random-range-selection with % works anymore nor is it much use to compare floats for equality. Which is, however, a good thing, because all of this is in fact very bad practise.
If you want to keep using rand() as the random number generator (though I'd suggest std::uniform_real_distribution), write a function to do this:
const double pi = 3.141592653589793; // Let's use radians internally, not degrees.
const angle rightangle = pi/2.; // It's much handier for real calculations.
inline angle deg2rad(angle dg) {return dg * rightangle / 90.;}
angle random_in_sym_rightangle() {
return rightangle * ( ((double) rand()) / ((double) RAND_MAX) - .5 );
}
Now you'd just do
BS1.lockaz = random_in_sym_rightangle();
BS1.lockel = random_in_sym_rightangle();
BS2.lockaz = random_in_sym_rightangle();
BS2.lockel = random_in_sym_rightangle();
Then you need to do this range-checking. That's again something to put in a dedicated function
bool equal_in_margin(angle theta, angle phi, angle margin) {
return (theta > phi-margin && theta < phi+margin);
}
Then you do this exhaustive search for locks. This could definitely be done more efficiently, but that's an algorithm issue and has nothing to do with the language. Sticking to the for loops, you can still make them look much nicer by avoiding this explicit break checking. One way is good old goto, I'd propose here you just stick it in an extra function and return when you're done
#define TRAVERSE_SYM_RIGHTANGLE(phi) \
for ( angle phi = -pi/4.; phi < pi/4.; phi += deg2rad(4) )
int lock_k // better give this a more descriptive name
( const Cscan& BS1, const Cscan& BS2, int k ) {
TRAVERSE_SYM_RIGHTANGLE(az1) {
TRAVERSE_SYM_RIGHTANGLE(el1) {
TRAVERSE_SYM_RIGHTANGLE(az2) {
TRAVERSE_SYM_RIGHTANGLE(el2) {
if( equal_in_margin( az1, BS1.lockaz, deg2rad(6.) )
&& equal_in_margin( el1, BS1.lockel, deg2rad(6.) )
&& equal_in_margin( az2, BS1.lockaz, deg2rad(6.) )
&& equal_in_margin( el2, BS2.lockel, deg2rad(6.) ) ) {
std::cout << "locked \n" << BS1.lockaz << " " << BS1.lockel << " " << BS2.lockaz << " " << BS2.lockel << '\n'
<< az1 << " " << el1 << " " << az2 << " " << el2 << std::endl;
return 1;
}
}
}
}
}
return k;
}
I was reading through How can I write a power function myself? and the answer given by dan04 caught my attention mainly because I am not sure about the answer given by fortran, but I took that and implemented this:
#include <iostream>
using namespace std;
float pow(float base, float ex){
// power of 0
if (ex == 0){
return 1;
// negative exponenet
}else if( ex < 0){
return 1 / pow(base, -ex);
// even exponenet
}else if ((int)ex % 2 == 0){
float half_pow = pow(base, ex/2);
return half_pow * half_pow;
//integer exponenet
}else{
return base * pow(base, ex - 1);
}
}
int main(){
for (int ii = 0; ii< 10; ii++){\
cout << "pow(" << ii << ".5) = " << pow(ii, .5) << endl;
cout << "pow(" << ii << ",2) = " << pow(ii, 2) << endl;
cout << "pow(" << ii << ",3) = " << pow(ii, 3) << endl;
}
}
though I am not sure if I translated this right because all of the calls giving .5 as the exponent return 0. In the answer it states that it might need a log2(x) based on a^b = 2^(b * log2(a)), but I am unsure about putting that in as I am unsure where to put it, or if I am even thinking about this right.
NOTE: I know that this might be defined in a math library, but I don't need all the added expense of an entire math library for a few functions.
EDIT: does anyone know a floating-point implementation for fractional exponents? (I have seen a double implementation, but that was using a trick with registers, and I need floating-point, and adding a library just to do a trick I would be better off just including the math library)
I have looked at this paper here which describes how to approximate the exponential function for double precision. After a little research on Wikipedia about single precision floating point representation I have worked out the equivalent algorithms. They only implemented the exp function, so I found an inverse function for the log and then simply did
POW(a, b) = EXP(LOG(a) * b).
compiling this gcc4.6.2 yields a pow function almost 4 times faster than the standard library's implementation (compiling with O2).
Note: the code for EXP is copied almost verbatim from the paper I read and the LOG function is copied from here.
Here is the relevant code:
#define EXP_A 184
#define EXP_C 16249
float EXP(float y)
{
union
{
float d;
struct
{
#ifdef LITTLE_ENDIAN
short j, i;
#else
short i, j;
#endif
} n;
} eco;
eco.n.i = EXP_A*(y) + (EXP_C);
eco.n.j = 0;
return eco.d;
}
float LOG(float y)
{
int * nTemp = (int*)&y;
y = (*nTemp) >> 16;
return (y - EXP_C) / EXP_A;
}
float POW(float b, float p)
{
return EXP(LOG(b) * p);
}
There is still some optimization you can do here, or perhaps that is good enough.
This is a rough approximation but if you would have been satisfied with the errors introduced using the double representation, I imagine this will be satisfactory.
I think the algorithm you're looking for could be 'nth root'. With an initial guess of 1 (for k == 0):
#include <iostream>
using namespace std;
float pow(float base, float ex);
float nth_root(float A, int n) {
const int K = 6;
float x[K] = {1};
for (int k = 0; k < K - 1; k++)
x[k + 1] = (1.0 / n) * ((n - 1) * x[k] + A / pow(x[k], n - 1));
return x[K-1];
}
float pow(float base, float ex){
if (base == 0)
return 0;
// power of 0
if (ex == 0){
return 1;
// negative exponenet
}else if( ex < 0){
return 1 / pow(base, -ex);
// fractional exponent
}else if (ex > 0 && ex < 1){
return nth_root(base, 1/ex);
}else if ((int)ex % 2 == 0){
float half_pow = pow(base, ex/2);
return half_pow * half_pow;
//integer exponenet
}else{
return base * pow(base, ex - 1);
}
}
int main_pow(int, char **){
for (int ii = 0; ii< 10; ii++){\
cout << "pow(" << ii << ", .5) = " << pow(ii, .5) << endl;
cout << "pow(" << ii << ", 2) = " << pow(ii, 2) << endl;
cout << "pow(" << ii << ", 3) = " << pow(ii, 3) << endl;
}
return 0;
}
test:
pow(0, .5) = 0.03125
pow(0, 2) = 0
pow(0, 3) = 0
pow(1, .5) = 1
pow(1, 2) = 1
pow(1, 3) = 1
pow(2, .5) = 1.41421
pow(2, 2) = 4
pow(2, 3) = 8
pow(3, .5) = 1.73205
pow(3, 2) = 9
pow(3, 3) = 27
pow(4, .5) = 2
pow(4, 2) = 16
pow(4, 3) = 64
pow(5, .5) = 2.23607
pow(5, 2) = 25
pow(5, 3) = 125
pow(6, .5) = 2.44949
pow(6, 2) = 36
pow(6, 3) = 216
pow(7, .5) = 2.64575
pow(7, 2) = 49
pow(7, 3) = 343
pow(8, .5) = 2.82843
pow(8, 2) = 64
pow(8, 3) = 512
pow(9, .5) = 3
pow(9, 2) = 81
pow(9, 3) = 729
I think that you could try to solve it by using the Taylor's series,
check this.
http://en.wikipedia.org/wiki/Taylor_series
With the Taylor's series you can solve any difficult to solve calculation such as 3^3.8 by using the already known results such as 3^4. In this case you have
3^4 = 81 so
3^3.8 = 81 + 3.8*3( 3.8 - 4) +..+.. and so on depend on how big is your n you will get the closer solution of your problem.
I and my friend faced similar problem while we're on an OpenGL project and math.h didn't suffice in some cases. Our instructor also had the same problem and he told us to seperate power to integer and floating parts. For example, if you are to calculate x^11.5 you may calculate sqrt(x^115, 10) which may result more accurate result.
Reworked on #capellic answer, so that nth_root works with bigger values as well.
Without the limitation of an array that is allocated for no reason:
#include <iostream>
float pow(float base, float ex);
inline float fabs(float a) {
return a > 0 ? a : -a;
}
float nth_root(float A, int n, unsigned max_iterations = 500, float epsilon = std::numeric_limits<float>::epsilon()) {
if (n < 0)
throw "Invalid value";
if (n == 1 || A == 0)
return A;
float old_value = 1;
float value;
for (int k = 0; k < max_iterations; k++) {
value = (1.0 / n) * ((n - 1) * old_value + A / pow(old_value, n - 1));
if (fabs(old_value - value) < epsilon)
return value;
old_value = value;
}
return value;
}
float pow(float base, float ex) {
if (base == 0)
return 0;
if (ex == 0){
// power of 0
return 1;
} else if( ex < 0) {
// negative exponent
return 1 / pow(base, -ex);
} else if (ex > 0 && ex < 1) {
// fractional exponent
return nth_root(base, 1/ex);
} else if ((int)ex % 2 == 0) {
// even exponent
float half_pow = pow(base, ex/2);
return half_pow * half_pow;
} else {
// integer exponent
return base * pow(base, ex - 1);
}
}
int main () {
for (int i = 0; i <= 128; i++) {
std::cout << "pow(" << i << ", .5) = " << pow(i, .5) << std::endl;
std::cout << "pow(" << i << ", .3) = " << pow(i, .3) << std::endl;
std::cout << "pow(" << i << ", 2) = " << pow(i, 2) << std::endl;
std::cout << "pow(" << i << ", 3) = " << pow(i, 3) << std::endl;
}
std::cout << "pow(" << 74088 << ", .3) = " << pow(74088, .3) << std::endl;
return 0;
}
This solution of MINE will be accepted upto O(n) time complexity
utpo input less then 2^30 or 10^8
IT will not accept more then these inputs
It WILL GIVE TIME LIMIT EXCEED warning
but easy understandable solution
#include<bits/stdc++.h>
using namespace std;
double recursive(double x,int n)
{
// static is important here
// other wise it will store same values while multiplying
double p = x;
double ans;
// as we multiple p it will multiply it with q which has the
//previous value of this ans latter we will update the q
// so that q has fresh value for further test cases here
static double q=1; // important
if(n==0){ ans = q; q=1; return ans;}
if(n>0)
{
p *= q;
// stored value got multiply by p
q=p;
// and again updated to q
p=x;
//to update the value to the same value of that number
// cout<<q<<" ";
recursive(p,n-1);
}
return ans;
}
class Solution {
public:
double myPow(double x, int n) {
// double q=x;double N=n;
// return pow(q,N);
// when both sides are double this function works
if(n==0)return 1;
x = recursive(x,abs(n));
if(n<0) return double(1/x);
// else
return x;
}
};
For More help you may try
LEETCODE QUESTION NUMBER 50
**NOW the Second most optimize code pow(x,n) **
logic is that we have to solve it in O(logN) so we devide the n by 2
when we have even power n=4 , 4/2 is 2 means we have to just square it (22)(22)
but when we have odd value of power like n=5, 5/2 here we have square it to get
also the the number itself to it like (22)(2*2)*2 to get 2^5 = 32
HOPE YOU UNDERSTAND FOR MORE YOU CAN VISIT
POW(x,n) question on leetcode
below the optimized code and above code was for O(n) only
*
#include<bits/stdc++.h>
using namespace std;
double recursive(double x,int n)
{
// recursive calls will return the whole value of the program at every calls
if(n==0){return 1;}
// 1 is multiplied when the last value we get as we don't have to multiply further
double store;
store = recursive(x,n/2);
// call the function after the base condtion you have given to it here
if(n%2==0)return store*store;
else
{
return store*store*x;
// odd power we have the perfect square multiply the value;
}
}
// main function or the function for indirect call to recursive function
double myPow(double x, int n) {
if(n==0)return 1;
x = recursive(x,abs(n));
// for negatives powers
if(n<0) return double(1/x);
// else for positves
return x;
}