Say that I have the following boost::odeint code:
#include <iostream>
#include <boost/array.hpp>
#include <boost/numeric/odeint.hpp>
using namespace std;
using namespace boost::numeric::odeint;
const double sigma = 10.0;
const double R = 28.0;
const double b = 8.0 / 3.0;
typedef boost::array< double , 3 > state_type;
void lorenz( const state_type &x , state_type &dxdt , double t ){
dxdt[0] = sigma * ( x[1] - x[0] );
dxdt[1] = R * x[0] - x[1] - x[0] * x[2];
dxdt[2] = -b * x[2] + x[0] * x[1];
}
void write_lorenz( const state_type &x , const double t ){
cout << t << '\t' << x[0] << '\t' << x[1] << '\t' << x[2] << endl;
}
int main(int argc, char **argv){
state_type x = { 10.0 , 1.0 , 1.0 }; // initial conditions
cout<<"Steps: "<<integrate( lorenz , x , 0.0 , 25.0 , 0.1 , write_lorenz )<<endl;
}
How can I modify the code so that integrate will break after a certain number of steps? I'm running a large number of integrations and want to avoid spending too much time on integrating any particular system.
I've thought of using integrate_n_steps(), but this may mean that the integration proceeds past the end time I am interested in.
there is no integrate routine for this task at the moment. Nevertheless, you have several options:
First, use an observer in integrate() and throw an exception there if you exceed the number of maximal steps. Of course, this is not very elegant:
struct write_lorenz_and_check_steps
{
size_t m_steps;
write_lorenz_and_check_steps( void ) : m_steps( 0 ) { }
void operator()( const state_type &x , const double t ) const {
cout << t << '\t' << x[0] << '\t' << x[1] << '\t' << x[2] << endl;
++m_steps;
if( m_steps > max_steps ) throw runtime_error( "Too much steps" );
}
};
// ...
size_t steps = 0;
try {
steps = integrate( lorenz , x , 0.0 , 25.0 , 0.1 , write_lorenz );
} catch( ... ) { steps = max_steps; }
cout << steps << endl;
Second, you can write the stepping loop yourself:
// Attention: the code has not been check to compile
double tmax = 25.0;
size_t imax = 1000;
size_t i = 0;
auto stepper = make_dense_output( 1.0e-6 , 1.0e-6 , runge_kutta_dopri5< state_type >() );
stepper.initialize( x , t , dt );
while ( ( stepper.current_time() < tmax ) && ( i < imax ) )
{
observer( stepper.current_state() , stepper.current_time() );
stepper.do_step( lorenz() );
++i;
}
x = stepper.current_state();
In this example you also work directly with stepper.current_state() and stepper.current_time() instead of calling the observer. Furthermore, if you compiler does not support auto, i.e. you have a C++03 compiler just use
typedef runge_kutta_dopri5< state_type > stepper_type;
result_of::make_dense_output< stepper_type >::type stepper =
make_dense_output( 1.0e-6 , 1.0e-6 , stepper_type() );
We are also developing a special integrate routine exactly for this task. But it will still take some weeks until it is finished. Furthermore, we develop ode iterators which could also be used and which will be ready very soon (I hope at the next of next week).
Related
I'm new in the world of C++ and I'm having some trouble with the boost library. In my problem I want to solve a ODE-System with 5 equations. It isn't a stiff problem. As iterative method I used both integreate(rhs, x0, t0, tf, size_step, write_output) and integreate_adaptive(stepper, sys, x0, t0, tf, size_step, write_output). Both these method actually integrate the equations but giving me non-sense results changing the size of the step from 0.001 to 5 almost randomly. The equations and data are correct. What can I do to fix this problem? Here is the code:
#include <iostream>
#include <vector>
#include <boost/numeric/odeint.hpp>
#include <fstream>
#include <boost/array.hpp>
using namespace std;
using namespace boost::numeric::odeint;
//DATA
double Lin = 20000; // kg/h
double Gdry = 15000; // kg/h
double P = 760; // mmHg
double TinH2O = 50; // °C
double ToutH2O = 25; // °C
double Tinair = 20; // °C
double Z = 0.5; // relative humidity
double Cu = 0.26; // kcal/kg*K
double CpL = 1; // kcal/kg*K
double DHev = 580; // kcal/kg
double hga = 4000; // kcal/m/h/K
double hla = 30000; // kcal/m/h/K
double A = -49.705; // Pev 1st coeff mmHg vs °C
double B = 2.71; // Pev 2nd coeff mmHg vs °C
double Usair = 0.62*(A + B*Tinair) / P;
double Uair = Z*Usair;
double Kua = hga / Cu;
double L0 = 19292; // kg/h
typedef vector< double > state_type;
vector <double> pack_height;
vector <double> Umidity;
vector <double> T_liquid;
vector <double> T_gas;
vector <double> Liquid_flow;
vector <double> Gas_flow;
void rhs(const state_type& x , state_type& dxdt , const double z )
{// U Tl Tg L G
double Ti = (hla*x[1] + hga*x[2] + Kua*DHev*(x[0] - 0.62*A / P)) / (hla + hga + Kua*DHev*0.62*B / P);
double Ui = 0.62*(A + B*Ti) / P;
dxdt[0] = Kua*(Ui - x[0]) / Gdry / 100;
dxdt[1] = hla*(x[1] - Ti) / x[3] / CpL / 100;
dxdt[2] = hga*(Ti - x[2]) / Gdry / Cu / 100;
dxdt[3] = Kua*(Ui - x[0]) / 100;
dxdt[4] = Kua*(Ui - x[0]) / 100;
}
void write_output(const state_type& x, const double z)
{
pack_height.push_back(z);
Umidity.push_back(x[0]);
T_liquid.push_back(x[1]);
T_gas.push_back(x[2]);
Liquid_flow.push_back(x[3]);
Gas_flow.push_back(x[4]);
cout << z << " " << x[0] << " " << x[1] << " " << x[2] << " " << x[3] << " " << x[4] << endl;
}
int main()
{
state_type x(5);
x[0] = Uair;
x[1] = ToutH2O;
x[2] = Tinair;
x[3] = L0;
x[4] = Gdry;
double z0 = 0.0;
double zf = 5.5;
double stepsize = 0.001;
integrate( rhs , x , z0 , zf , stepsize , write_output );
return 0;
}
And this is the final results that i get from the prompt:
0 0.00183349 25 20 19292 15000
0.001 0.00183356 25 20 19292 15000
0.0055 0.0018339 25.0002 20.0001 19292 15000
0.02575 0.00183542 25.001 20.0007 19292 15000
0.116875 0.00184228 25.0046 20.003 19292.1 15000.1
0.526938 0.00187312 25.0206 20.0135 19292.6 15000.6
2.37222 0.00201203 25.0928 20.0608 19294.7 15002.7
5.5 0.00224788 25.2155 20.142 19298.2 15006.2
Only the first iteration has the right-asked stepsize.. and obiviously the solution is not the right one.. what can i do? Thank you in advance. :)
If you read the documentation, then you will find that the constant step-size routines are integrate_const and integrate_n_steps, or possibly integrate_adaptive with a non-controlled stepper.
The short call to integrate uses the standard dopri5 stepper with adaptive step size, so that the changing step size is no surprise. You could possibly use the dense output of the stepper to interpolate values at equidistant times.
To describe the problem, I am trying to use objects in my code to stream line a three body problem. I have the following code for the object:
#include <stdlib.h>
#include <cstdio>
#include <iostream>
#include <cmath>
#include <vector>
#include "star.h"
using namespace std;
Star::Star( double m, double x_p, double y_p, double x_v, double y_v )
{
init( m, x_p, y_p, x_v, y_v);
}
void Star::init( double m, double x_p, double y_p, double x_v, double y_v )
{
Mass = m;
X_Position = x_p;
Y_Position = y_p;
X_Velocity = x_v;
Y_Velocity = y_v;
R_Position[0] = X_Position;
R_Position[1] = Y_Position;
R_Velocity[0] = X_Velocity;
R_Velocity[1] = Y_Velocity;
}
double Star::potential( Star star2, double dx, double dy )
{
double G = 3.0548e34;
double Potential;
double x_component = X_Position - star2.X_Position + dx;
double y_component = Y_Position - star2.Y_Position + dy;
double R = sqrt(x_component*x_component + y_component*y_component);
Potential = G* Mass* star2.Mass / R;
return Potential;
}
double * Star::compute_forces( Star star2 )
{
double h_x = ( X_Position - star2.X_Position )/1000;
double h_y = ( Y_Position - star2.Y_Position )/1000;
double *F = new double[2];
F[0] = ( potential( star2, h_x, 0.0 ) - potential( star2, -h_x, 0.0 ) )/2*h_x;
F[1] = ( potential( star2, 0.0, h_y ) - potential( star2, 0.0, -h_y ) )/2*h_y;
return F;
}
void Star::verlet( Star star2, double h )
{
double *Force = compute_forces( star2 );
X_Position += h*X_Velocity + 0.5*h*h*Force[ 0 ];
Y_Position += h*Y_Velocity + 0.5*h*h*Force[ 1 ];
double *Force_new = compute_forces( star2 );
X_Velocity += 0.5*h*(Force[ 0 ] + Force_new[ 0 ] );
Y_Velocity += 0.5*h*(Force[ 1 ] + Force_new[ 1 ] );
}
Now I believe that the velocity verlet algorithm is correct, but when I run the code using this main file:
#include <iostream>
#include <fstream>
#include <cmath>
#include <cstdio>
#include "star.h"
using namespace std;
int main()
{
Star star1( 50, 0.0, 0.0, 0.0, 0.0 );
Star star2( 1.00, 0.0, 1.0, -1.0, 1.0 );
Star star3( 1.00, 0.0, -1.0, 1.0, 1.0 );
Star arr[3] = { star1, star2, star3 };
double h = 10/1000;
//for ( double time = 0.0; time <= 10.0; )
//{
for ( int inst = 0 ; inst< 3; ++inst )
{
for ( int jnst = 0; jnst < 3; ++jnst )
{
if ( inst != jnst )
{
arr[ inst ].verlet( arr[ jnst ], h );
double *pos = arr[ inst ].get_positions();
cout << " " << pos[ 0 ] << " " << pos[ 1 ] << endl;
}
}
}
//time += h;
//}
return 0;
}
The values of members of the Star object are not updating :/. Is there something I am missing?
the out put of the cout is this:
0 0
0 0
0 1
0 1
0 -1
0 -1
Thank you in advance!
Edit:
I tried implementing a std::vector<double> for my forces, but I ended up with a segmentation fault.
Edit 2:
After checking my get_positions() method I noticed it was returning only the initialized values. So I tried implementing this:
std::vector<double> get_positions(){ std::vector<double> temp = { X_Position , Y_Position }; return temp; }
And it worked so i implemented the following into my main code.
std::vector<double> p1 = star1.get_positions();
std::vector<double> p2 = star2.get_positions();
std::vector<double> p3 = star3.get_positions();
cout << p1[ 0 ] << " " << p1[ 1 ] << " " << p2[ 0 ] << " " << p2[ 1 ] << " " << p3[ 0 ] << " " << p3[ 1 ] << endl;
However now I am stuck on a completely new problem... Now I am getting the following numbers for the algorithm updates!
5.66002e-320 2.31834e-316
1.132e-316 4.63669e-313
1.698e-319 6.95503e-316
1.132e-316 4.63669e-313
5.66002e-320 2.31834e-316
1.132e-316 4.63669e-313
1.698e-319 6.95503e-316
1.132e-316 4.63669e-313
5.66002e-320 2.31834e-316
1.132e-316 4.63669e-313
1.698e-319 6.95503e-316
1.132e-316 4.63669e-313
Which means some where I am multiplying by zeros somewhere in my code. The problem is I cant for the life of me see where. Thanks if there is any help!
Error
If you want to divide by 2*h_x, you need to write this as /(2*h_x), else you divide by 2 and multiply by h_x, giving miniscule values for forces and thus not moving the system by much.
To complement this, you defined the step size in the main program as
double h = 10/1000;
The value on the right is identified as result of an integer division, which is 0. With this step size nothing will change.
Style
Do not construct two data fields for the same value, you would have to ensure that these fields are always synchronized. Use getter methods to present data in a different format.
For science it would be better to use an established vector class that then also provides vector arithmetic, like the one of boost/Eigen.
Use initialization list syntax in the constructor, you do not need an init function to just assign the values.
Verlet
The Verlet method does not work this way. Even if everything goes right coding-wise, the result is a first order method that neither preserves energy nor momentum.
For using Verlet with gravity simulations, see N-Body Gravity Simulation in JavaScript.
The same in a slightly different context, Lennard-Jones potential simulation.
Perhaps also this discussion about general properties of the Verlet method, Velocity verlet algorithm not conserving energy, can be helpful.
The short version is, the stages of the Verlet method are the outer frame. In each stage, all computations have to be carried out for all objects before changing to the next stage. That is, all velocities change, then all positions chance, then all forces are computed and accumulated, then all velocities change with the new forces/accelerations for all objects.
Mixing these steps destroys the order of the method and all conservation properties. (The first two stages can be interleaved, as there is no interaction between objects.)
I implemented some of the suggested changes, using the data of the Pleiades IVP test suite example, as the provided data lead to a rapid explosion of the system.
The main program solarsystem.c with the main Verlet loop
#include <iostream>
#include <cstdio>
#include <vector>
#include "star.h"
using namespace std;
int main()
{
vector<Star> arr = {
Star( 1, 3.0, 3.0, 0.0, 0.0 ),
Star( 2, 3.0,-3.0, 0.0, 0.0 ),
Star( 3,-1.0, 2.0, 0.0, 0.0 ),
Star( 4,-3.0, 0.0, 0.0,-1.25 ),
Star( 5, 2.0, 0.0, 0.0, 1.0 ),
Star( 6,-2.0,-4.0, 1.75, 0.0 ),
Star( 7, 2.0, 4.0,-1.5, 0.0 )
};
int N = arr.size();
double dt = 0.001;
int count = 10;
for ( double time = 0.0; time <= 3.0; time += dt)
{
for ( int inst = 0 ; inst< N; ++inst ) {
arr[inst].Verlet_stage1(dt);
}
for ( int inst = 0 ; inst< N; ++inst ) {
for ( int jnst = inst+1; jnst < N; ++jnst ) {
arr[inst].acceleration(arr[jnst]);
}
}
for ( int inst = 0 ; inst< N; ++inst ) {
arr[inst].Verlet_stage2(dt);
}
if( 10 == count) {
count = 0;
for ( int inst = 0 ; inst< N; ++inst ) {
cout << " " << arr[inst].Position[1] << " " << arr[inst].Position[0];
}
cout << "\n";
}
count++;
}
return 0;
}
and the implementation of the Star class with header
#pragma once
#include <eigen3/Eigen/Dense>
typedef Eigen::Vector2d Vec2D;
const double G = 1;
class Star {
public:
Star( double m, double x_p, double y_p, double x_v, double y_v )
:Mass(m),Position(x_p,y_p),Velocity(x_v,y_v) {};
double Mass;
Vec2D Position, Velocity, Acceleration;
void Verlet_stage1(double dt);
void Verlet_stage2(double dt);
double potential(Star other);
void acceleration(Star &other);
};
and corpus
#include "star.h"
double Star::potential( Star other )
{
Vec2D diff = Position-other.Position;
double R = diff.norm();
return G * Mass * other.Mass / R;
}
void Star::acceleration( Star &other )
{
Vec2D diff = Position-other.Position;
double R = diff.norm();
Vec2D acc = (-G / (R*R*R)) * diff;
Acceleration += other.Mass * acc;
other.Acceleration -= Mass * acc;
}
void Star::Verlet_stage1( double dt )
{
Velocity += (0.5*dt) * Acceleration;
Position += dt*Velocity;
Acceleration *= 0;
}
void Star::Verlet_stage2( double dt )
{
Velocity += (0.5*dt) * Acceleration;
}
This results in the trajectories below. The picture is very depending on the step size dt as near singularities of the potential function, that is, if bodies come very close together, the promise of symplectic methods of near conservation of energy and momentums breaks apart.
I'm personally not against using raw pointers, but complications occur when they don't get managed properly. I have no idea what this code does, more so how it does! Nevertheless, I have tried ameliorating a few errors which I could observe, but evidently this code requires a serious overhaul. I suppose the shortcomings in this code are just due to inexperience, which can be understood.
https://gcc.godbolt.org/z/5zT5o9 Please do keep in mind that this code is still leaking due to usage(non-manage) of raw pointers in various function bodies.
After searching a long time, I still can't solve this problem.
I have two vectors: x = [a1,...,aN], y = [b1,...,bN].
And I want to compute their inner product: = a1*conj(b1) + ... + aN*conj(bN). (conj(.) means the complex conjugate operation)
I have tried cublasCdotu, and it just computes a1*b1 + ... + aN*bN.
And cublasCdotc returns conj(a1)*conj(b1) + ... + conj(aN)*conj(bN).
Finally, I tried thrust::inner_product, and it computes a1*b1 + ... + aN*bN too.
My thrust code is like the following:
typedef thrust::complex<float> comThr;
thrust::host_vector< comThr > x( vec_size );
thrust::generate(x.begin(), x.end(), rand);
thrust::host_vector< comThr > y( vec_size );
thrust::generate(y.begin(), y.end(), rand);
comThr z = thrust::inner_product(x.begin(), x.end(), y.begin(), comThr(0.0f,0.0f) );
Could you give me some advice on this problem? Thank you!
You can do this with thrust::inner_product. All that is required is a user defined binary function which implements a * conj(b), where conj is the complex conjugate. The thrust library includes all the complex operators required, so the implementation is a simple as an operator like this:
__host__ __device__
comThr operator()(comThr a, comThr b)
{
return a * thrust::conj(b);
};
A complete working example:
#include <iostream>
#include "thrust/host_vector.h"
#include "thrust/functional.h"
#include "thrust/complex.h"
#include "thrust/inner_product.h"
#include "thrust/random.h"
typedef thrust::complex<float> comThr;
struct a_dot_conj_b : public thrust::binary_function<comThr,comThr,comThr>
{
__host__ __device__
comThr operator()(comThr a, comThr b)
{
return a * thrust::conj(b);
};
};
__host__ static __inline__ comThr rand_comThr()
{
return comThr((float)rand()/RAND_MAX, (float)rand()/RAND_MAX);
}
int main()
{
const int vec_size = 16;
thrust::host_vector< comThr > x( vec_size );
thrust::generate(x.begin(), x.end(), rand_comThr);
thrust::host_vector< comThr > y( vec_size );
thrust::generate(y.begin(), y.end(), rand_comThr);
comThr z = thrust::inner_product(x.begin(), x.end(), y.begin(), comThr(0.0f,0.0f),
thrust::plus<comThr>(), a_dot_conj_b());
comThr zref(0.0,0.0);
for(int i=0; i<vec_size; i++) {
comThr val = x[i] * thrust::conj(y[i]);
std::cout << i << " " << x[i] << " op " << y[i] << " = " << val << std::endl;
zref += val;
}
std::cout << "z = " << z << " zref = " << zref << std::endl;
return 0;
}
which will compile and run like so:
$ nvcc -arch=sm_52 -o dotprod_thrust dotprod_thrust.cu
$ ./dotprod_thrust
0 (0.394383,0.840188) op (0.296032,0.61264) = (0.631482,0.00710744)
1 (0.79844,0.783099) op (0.524287,0.637552) = (0.917879,-0.0984784)
2 (0.197551,0.911647) op (0.972775,0.493583) = (0.642147,0.78932)
3 (0.76823,0.335223) op (0.771358,0.292517) = (0.690638,0.0338566)
4 (0.55397,0.277775) op (0.769914,0.526745) = (0.572826,-0.0779383)
5 (0.628871,0.477397) op (0.891529,0.400229) = (0.751725,0.173921)
6 (0.513401,0.364784) op (0.352458,0.283315) = (0.284301,-0.0168827)
7 (0.916195,0.95223) op (0.919026,0.807725) = (1.61115,0.135091)
8 (0.717297,0.635712) op (0.949327,0.0697553) = (0.725294,0.553463)
9 (0.606969,0.141603) op (0.0860558,0.525995) = (0.126716,-0.307077)
10 (0.242887,0.0163006) op (0.663227,0.192214) = (0.164222,-0.0358752)
11 (0.804177,0.137232) op (0.348893,0.890233) = (0.40274,-0.668025)
12 (0.400944,0.156679) op (0.020023,0.0641713) = (0.0180824,-0.0225919)
13 (0.108809,0.12979) op (0.0630958,0.457702) = (0.0662707,-0.0416127)
14 (0.218257,0.998924) op (0.970634,0.23828) = (0.449871,0.917584)
15 (0.839112,0.512932) op (0.85092,0.902208) = (1.17679,-0.32059)
z = (9.23213,1.02127) zref = (9.23213,1.02127)
I'm able to make function template parametrized by an other function, however, I don't know how to do it when I want to parametrize it by function with different number of arguments.
See this code:
#include <stdio.h>
#include <math.h>
template < double FUNC( double a ) >
void seq_op( int n, double * as ){
for (int i=0; i<n; i++){ printf( " %f \n", FUNC( as[i] ) ); }
}
template < double FUNC( double a, double b ) >
void seq_op_2( int n, double * as, double * bs ){
for (int i=0; i<n; i++){ printf( " %f \n", FUNC( as[i], bs[i] ) ); }
}
double a_plus_1 ( double a ){ return a + 1.0; }
double a_sq ( double a ){ return a*a; }
double a_plus_b ( double a, double b ){ return a + b; }
double a_times_b( double a, double b ){ return a * b; }
double as[5] = {1,2,3,4};
double bs[5] = {2,2,2,2};
// FUNCTION ====== main
int main(){
printf( "seq_op <a_plus_1> ( 5, as );\n"); seq_op <a_plus_1> ( 4, as );
printf( "seq_op <a_sq> ( 5, as );\n"); seq_op <a_sq> ( 4, as );
printf( "seq_op_2 <a_plus_b> ( 5, as, bs );\n"); seq_op_2 <a_plus_b> ( 4, as, bs );
printf( "seq_op_2 <a_times_b> ( 5, as, bs );\n"); seq_op_2 <a_times_b> ( 4, as, bs );
}
is there a way how to make common template for both cases?
Why I need such silly thing? A more practical example are this two functions which differs only in one line:
#define i3D( ix, iy, iz ) ( iz*nxy + iy*nx + ix )
void getLenardJonesFF( int natom, double * Rs_, double * C6, double * C12 ){
Vec3d * Rs = (Vec3d*) Rs_;
int nx = FF::n.x;
int ny = FF::n.y;
int nz = FF::n.z;
int nxy = ny * nx;
Vec3d rProbe; rProbe.set( 0.0, 0.0, 0.0 ); // we may shift here
for ( int ia=0; ia<nx; ia++ ){
printf( " ia %i \n", ia );
rProbe.add( FF::dCell.a );
for ( int ib=0; ib<ny; ib++ ){
rProbe.add( FF::dCell.b );
for ( int ic=0; ic<nz; ic++ ){
rProbe.add( FF::dCell.c );
Vec3d f; f.set(0.0,0.0,0.0);
for(int iatom=0; iatom<natom; iatom++){
// only this line differs
f.add( forceLJ( Rs[iatom] - rProbe, C6[iatom], C12[iatom] ) );
}
FF::grid[ i3D( ia, ib, ic ) ].add( f );
}
rProbe.add_mul( FF::dCell.c, -nz );
}
rProbe.add_mul( FF::dCell.b, -ny );
}
}
void getCoulombFF( int natom, double * Rs_, double * kQQs ){
Vec3d * Rs = (Vec3d*) Rs_;
int nx = FF::n.x;
int ny = FF::n.y;
int nz = FF::n.z;
int nxy = ny * nx;
Vec3d rProbe; rProbe.set( 0.0, 0.0, 0.0 ); // we may shift here
for ( int ia=0; ia<nx; ia++ ){
printf( " ia %i \n", ia );
rProbe.add( FF::dCell.a );
for ( int ib=0; ib<ny; ib++ ){
rProbe.add( FF::dCell.b );
for ( int ic=0; ic<nz; ic++ ){
rProbe.add( FF::dCell.c );
Vec3d f; f.set(0.0,0.0,0.0);
for(int iatom=0; iatom<natom; iatom++){
// only this line differs
f.add( forceCoulomb( Rs[iatom] - rProbe, kQQs[iatom] );
}
FF::grid[ i3D( ia, ib, ic ) ].add( f );
}
rProbe.add_mul( FF::dCell.c, -nz );
}
rProbe.add_mul( FF::dCell.b, -ny );
}
}
You should be able to combine the two functions using a combination of std::bind() and std::function() (see code on coliru):
#include <stdio.h>
#include <functional>
using namespace std::placeholders;
double getLJForceAtoms (int, int, double*, double*, double*)
{
printf("getLJForceAtoms\n");
return 0;
}
double getCoulombForceAtoms (int, int, double*, double*)
{
printf("getCoulombForceAtoms\n");
return 0;
}
void getFF (int natom, double* Rs_, std::function<double(int, int, double*)> GetForce)
{
int rProbe = 1;
double Force = GetForce(rProbe, natom, Rs_);
}
int main ()
{
double* C6 = nullptr;
double* C12 = nullptr;
double *kQQs = nullptr;
double* Rs_ = nullptr;
auto getLJForceFunc = std::bind(getLJForceAtoms, _1, _2, _3, C6, C12);
auto getCoulombForceFunc = std::bind(getCoulombForceAtoms, _1, _2, _3, kQQs);
getFF(1, Rs_, getLJForceFunc);
getFF(1, Rs_, getCoulombForceFunc);
return 0;
}
which outputs the expected:
getLJForceAtoms
getCoulombForceAtoms
Update -- On Performance
While it is natural to be concerned about performance of using std::function vs templates I would not omit a possible solution without first benchmarking and profiling it.
I can't compare the performance directly as I would need both your complete source code as well as input data set to make accurate benchmarks but I can do a very simple test to show you what it could look like. If I make the force functions do a little work:
double getLJForceAtoms (int x, int y, double* r1, double* r2, double* r3)
{
return cos(log2(abs(sin(log(pow(x, 2) + pow(y, 2))))));
}
and then have a very simple getFF() function call them 10 million times I can get a rough comparison between the various design methods (tests done on VS2013, release build, fast optimization flags):
Direct Call = 1900 ms
Switch = 1900 ms
If (flag) = 1900 ms
Virtual Function = 2400 ms
std::function = 2400 ms
So the std::function method is about 25% slower in this case but the switch and if methods are the same speed as the direct call case. Depending on how much work your actual force functions do you may get worse or better results. These days, the compiler optimizer and the CPU branch predictor are good enough to do a lot of things that may be surprising or even counter-intuitive, which is why actual testing must be done.
I would do a similar benchmark test with your exact code and data set and see what difference, if any, the various designs have. If you really only have two cases as shown in your question then the "if (flag)" method may be a good choice.
I'm fairly new with C++ and I'm trying to use it via Rcpp to speed up my R code.
The below code integrates from from t0 to t1- this is done in the "lorenz" function. Test4 integrates using "lorenz" "counts" number of times. However at time "t1" the state of the system is modified in "write_lorenz" before the system is rerun and this is where the problem is. If I run the same program over and over again by calling test4 from R, printing to the screen always produces the same result, however, my returned matrix "u" does not, and seems to eventually converge to whatever "t1" is which is the problem.
My input values don't change so I'm wondering if there a memory leak, or if something else is going on, how to fix it.
Also I'm wondering if my initialization of "u" is incorrect and I should be using the "new" command.
What I tried was
NumericMatrix* u = NULL;
*u = new NumericMatrix;
and then I tried accessing the elements of the matrix as *u(1,2) for instance, but accessing the elements this way caused an error saying u was not a function.
Any help would be greatly appreciated
I modified this code from the following site
http://headmyshoulder.github.io/odeint-v2/examples.html
so I could use it with Rcpp
//###################################R Code ###############################
library(Rcpp)
sourceCpp("test4.cpp")
sigma <- 10.0
R <-28.0
b <- 8.0 / 3.0
a2 <- c(10.0 , 1.0 , 1.0) #initial conditions X0,I0,Y0
L2 <- c(0.0 , 2.0 , 0.1) #initial time, kick time, error
counts <- 2
kick <-1.0; # kick size
pars <-c(sigma,R,b,kick)
test4(a=a,L2=L2,counts=counts,pars= pars)
// C ++ code
//[[Rcpp::depends(BH)]]
//[[Rcpp::depends(RcppEigen)]]
//[[Rcpp::plugins("cpp11")]]
#include <Rcpp.h>
#include <RcppEigen.h>
#include <math.h>
#include <boost/array.hpp>
#include <boost/numeric/odeint.hpp>
#include <boost/algorithm/string.hpp>
using namespace boost::numeric::odeint;
using namespace std;
using namespace Rcpp;
using namespace Eigen;
double sigma =0;
double e =0;
double b =0 ;
double t0 =0;
double t1 = 0;
double kick =0;
double X0 = 0;
double I0 =0;
double Y0 =0;
double N = 0;
int counter =0;
typedef boost::array< double , 3 > state_type;
NumericMatrix u(4,5);
void lorenz( const state_type &x , state_type &dxdt , double t )
{
dxdt[0] = sigma * ( x[1] - x[0] );
dxdt[1] = e * x[0] - x[1] - x[0] * x[2];
dxdt[2] = -b * x[2] + x[0] * x[1];
}
void write_lorenz( const state_type &x , const double t )
{
if(t==t1){
X0 = x[0];
I0 = x[1];
Y0 = x[2];
N = X0+I0+Y0;
X0 = X0 + exp(kick*N);
counter++;
//for some reason cout and u don't match for multiple runs of the
//program
cout << t << '\t' << X0 << '\t' << I0 << '\t' << Y0 << endl;
u(counter,0) = t;
u(counter,1) = X0;
u(counter,2) = I0;
u(counter,3) = Y0;
}
}
// [[Rcpp::export]]
NumericMatrix test4(NumericVector a, NumericVector L2,int counts,NumericVector pars)
{
double error; // control integration error
// initialize model parameters
//maybe these should be parenthesis?
sigma = pars[0]; //# average per capita birh rate 1-10(the mean is 4.7)
e = pars[1]; // # per capita average growth rate
b= pars[2];
kick = pars[3]; // kick size
//cout << sigma << R << b << kick << endl;
//myfile.open (ST);
X0 = a[0]; I0 =a[1]; Y0 = a[2];
int i = 0;
t0 = L2[0];
t1 = L2[1];
error = L2[2];
u(0,0) = t0;
u(0,1) = X0;
u(0,2) = I0;
u(0,3) = Y0;
// initial conditions
for(i=0;i<counts;i++)
{
state_type x = { X0 , I0 , Y0 };
integrate( lorenz , x , t0 , t1 , error , write_lorenz );
}
return u; // the variable I hope will be global
}
Here is a simple adaptation of the pure C++ file you linked to. We simply define a struct of three vectors into which we push the elements of each iterations--as opposed to printing them to standard output.
For data structures that grow, we prefer C++ standard library types (as our types have to be like R types, their internals do not match well to increasing one-by-one which is expensive for them). So at the end, we just copy into an R data.frame.
#include <boost/array.hpp>
#include <boost/numeric/odeint.hpp>
#include <Rcpp.h>
// [[Rcpp::depends(BH)]]
// [[Rcpp::plugins(cpp11)]]
using namespace std;
using namespace boost::numeric::odeint;
const double sigma = 10.0, r = 28.0, b = 8.0 / 3.0;
typedef boost::array< double , 3 > state_type;
void lorenz( const state_type &x , state_type &dxdt , double t ) {
dxdt[0] = sigma * ( x[1] - x[0] );
dxdt[1] = r * x[0] - x[1] - x[0] * x[2];
dxdt[2] = -b * x[2] + x[0] * x[1];
}
struct foo { std::vector<double> a, b, c; };
struct foo f;
void append_lorenz(const state_type &x , const double t ) {
f.a.push_back(x[0]);
f.b.push_back(x[1]);
f.c.push_back(x[2]);
}
using namespace Rcpp;
// [[Rcpp::export]]
DataFrame callMain() {
state_type x = { 10.0 , 1.0 , 1.0 }; // initial conditions
integrate( lorenz , x , 0.0 , 25.0 , 0.1 , append_lorenz );
return DataFrame::create(Named("a") = f.a,
Named("b") = f.b,
Named("c") = f.c);
}
/*** R
res <- callMain()
print(head(res))
*/
Here is the output of the R code (intentially limited to fist few rows):
R> sourceCpp("/tmp/lorenz.cpp")
R> res <- callMain()
R> print(head(res))
a b c
1 10.00000 1.00000 1.00000
2 9.40816 2.99719 1.12779
3 8.92164 5.35684 1.46991
4 8.68193 7.82671 2.05762
5 8.73730 10.42718 2.94783
6 9.11080 13.10452 4.18849
R>