I presume, or rather hope, that I have a singular fixable problem or perhaps many smaller ones and should give up. Either way I am relatively new to Rcpp and extremely uninformed on parallel computation and can't find a solution online.
The problem is typically, a 'fatal error' in R or R gets stuck in a loop, something like 5 minuets for 10 iterations, when the non-parallel version will do 5K iterations in the same time, roughly speaking.
As this algorithm fits into a much larger project I call on several other functions, these are all in Rcpp and I rewrote them with only 'arma' objects as that seemed to help other people, here. I also ran the optimization part with a 'heat map' optimizer I wrote in Rcpp, again exclusively in 'arma' without improvement - I should also point out this returned as an 'arma::vec'.
// [[Rcpp::depends("RcppArmadillo")]]
// [[Rcpp::depends("RcppParallel")]]
#include <RcppArmadillo.h>
#include <RcppParallel.h>
using namespace Rcpp;
using namespace std;
using namespace arma;
using namespace RcppParallel;
struct Boot_Worker : public Worker {
//Generate Inputs
// Source vector to keep track of the number of bootstraps
const arma::vec Boot_reps;
// Initial non-linear theta parameter values
const arma::vec init_val;
// Decimal date vector
const arma::colvec T_series;
// Generate the price series observational vector
const arma::colvec Y_est;
const arma::colvec Y_res;
// Generate the optimization constants
const arma::mat U;
const arma::colvec C;
const int N;
// Generate Output Matrix
arma::mat Boots_out;
// Initialize with the proper input and output
Boot_Worker( const arma::vec Boot_reps, const arma::vec init_val, const arma::colvec T_series, const arma::colvec Y_est, const arma::colvec Y_res, const arma::mat U, const arma::colvec C, const int N, arma::mat Boots_out)
: Boot_reps(Boot_reps), init_val(init_val), T_series(T_series), Y_est(Y_est), Y_res(Y_res), U(U), C(C), N(N), Boots_out(Boots_out) {}
void operator()(std::size_t begin, std::size_t end){
//load necessary stuffs from around
Rcpp::Environment stats("package:stats");
Rcpp::Function constrOptim = stats["constrOptim"];
Rcpp::Function SDK_pred_mad( "SDK_pred_mad");
arma::mat fake_data(N,2);
arma::colvec index(N);
for(unsigned int i = begin; i < end; i ++){
// Need a nested loop to create and fill the fake data matrix
arma::vec pool = arma::regspace(0, N-1) ;
std::random_shuffle(pool.begin(), pool.end());
for(int k = 0; k <= N-1; k++){
fake_data(k, 0) = Y_est[k] + Y_res[ pool[k] ];
fake_data(k, 1) = T_series[k];
}
// Call the optimization
Rcpp::List opt_results = constrOptim(Rcpp::_["theta"] = init_val,
Rcpp::_["f"] = SDK_pred_mad,
Rcpp::_["data_in"] = fake_data,
Rcpp::_["grad"] = "NULL",
Rcpp::_["method"] = "Nelder-Mead",
Rcpp::_["ui"] = U,
Rcpp::_["ci"] = C );
/// fill the output matrix ///
// need to create an place holder arma vector for the parameter output
arma::vec opt_param = Rcpp::as<arma::vec>(opt_results[0]);
Boots_out(i, 0) = opt_param[0];
Boots_out(i, 1) = opt_param[1];
Boots_out(i, 2) = opt_param[2];
// for the cost function value at optimization
arma::vec opt_value = Rcpp::as<arma::vec>(opt_results[1]);
Boots_out(i, 3) = opt_value[0];
// for the number of function calls (?)
arma::vec counts = Rcpp::as<arma::vec>(opt_results[2]);
Boots_out(i, 4) = counts[0];
// for thhe convergence code
arma::vec convergence = Rcpp::as<arma::vec>(opt_results[3]);
Boots_out(i, 5) = convergence[0];
}
}
};
// [[Rcpp::export]]
arma::mat SDK_boots_test(arma::vec init_val, arma::mat data_in, int boots_n){
//First establish theta_sp, estimate and residuals
const int N = arma::size(data_in)[0];
// Create the constraints for the constrained optimization
// Make a boundry boundry condition matrix of the form Ui*theta - ci >= 0
arma::mat U(6, 3);
U(0, 0) = 1;
U(1, 0) = -1;
U(2, 0) = 0;
U(3, 0) = 0;
U(4, 0) = 0;
U(5, 0) = 0;
U(0, 1) = 0;
U(1, 1) = 0;
U(2, 1) = 1;
U(3, 1) = -1;
U(4, 1) = 0;
U(5, 1) = 0;
U(0, 2) = 0;
U(1, 2) = 0;
U(2, 2) = 0;
U(3, 2) = 0;
U(4, 2) = 1;
U(5, 2) = -1;
arma::colvec C(6);
C[0] = 0;
C[1] = -data_in(N-1, 9)-0.5;
C[2] = 0;
C[3] = -3;
C[4] = 0;
C[5] = -50;
Rcpp::Function SDK_est( "SDK_est");
Rcpp::Function SDK_res( "SDK_res");
arma::vec Y_est = as<arma::vec>(SDK_est(init_val, data_in));
arma::vec Y_res = as<arma::vec>(SDK_res(init_val, data_in));
// Generate feed items for the Bootstrap Worker
arma::vec T_series = data_in( span(0, N-1), 9);
arma::vec Boots_reps(boots_n+1);
// Allocate the output matrix
arma::mat Boots_out(boots_n, 6);
// Pass input and output the Bootstrap Worker
Boot_Worker Boot_Worker(Boots_reps, init_val, T_series, Y_est, Y_res, U, C, N, Boots_out);
// Now finnaly call the parallel for loop
parallelFor(0, Boots_reps.size(), Boot_Worker);
return Boots_out;
}
So I wrote back in my 'heat algorithm' to solve the optimization, this is entirely in Rcpp-armadillo, this simplifies the code massively as the constraints are written into the optimizer. Additionally, I removed the randomization, so it just has to solve the same optimization; just to see if that was the only problem. Without fail I am still having the same 'fatal error'.
as it stands here is code:
// [[Rcpp::depends("RcppArmadillo")]]
// [[Rcpp::depends("RcppParallel")]]
#include <RcppArmadillo.h>
#include <RcppParallel.h>
#include <random>
using namespace Rcpp;
using namespace std;
using namespace arma;
using namespace RcppParallel;
struct Boot_Worker : public Worker {
//Generate Inputs
// Source vector to keep track of the number of bootstraps
const arma::vec Boot_reps;
// Initial non-linear theta parameter values
const arma::vec init_val;
// Decimal date vector
const arma::colvec T_series;
// Generate the price series observational vector
const arma::colvec Y_est;
const arma::colvec Y_res;
const int N;
// Generate Output Matrix
arma::mat Boots_out;
// Initialize with the proper input and output
Boot_Worker( const arma::vec Boot_reps, const arma::vec init_val, const arma::colvec T_series, const arma::colvec Y_est, const arma::colvec Y_res, const int N, arma::mat Boots_out)
: Boot_reps(Boot_reps), init_val(init_val), T_series(T_series), Y_est(Y_est), Y_res(Y_res), N(N), Boots_out(Boots_out) {}
void operator()(std::size_t begin, std::size_t end){
//load necessary stuffs from around
Rcpp::Function SDK_heat( "SDK_heat");
arma::mat fake_data(N,2);
arma::colvec index(N);
for(unsigned int i = begin; i < end; i ++){
// Need a nested loop to create and fill the fake data matrix
//arma::vec pool = arma::shuffle( arma::regspace(0, N-1) );
for(int k = 0; k <= N-1; k++){
fake_data(k, 0) = Y_est[k] + Y_res[ k ];
//fake_data(k, 0) = Y_est[k] + Y_res[ pool[k] ];
fake_data(k, 1) = T_series[k];
}
// Call the optimization
arma::vec opt_results = Rcpp::as<arma::vec>( SDK_heat(Rcpp::_["data_in"] = fake_data, Rcpp::_["tol"] = 0.1) );
/// fill the output matrix ///
// need to create an place holder arma vector for the parameter output
Boots_out(i, 0) = opt_results[0];
Boots_out(i, 1) = opt_results[1];
Boots_out(i, 2) = opt_results[2];
// for the cost function value at optimization
Boots_out(i, 3) = opt_results[3];
}
}
};
// [[Rcpp::export]]
arma::mat SDK_boots_test(arma::vec init_val, arma::mat data_in, int boots_n){
//First establish theta_sp, estimate and residuals
const int N = arma::size(data_in)[0];
Rcpp::Function SDK_est( "SDK_est");
Rcpp::Function SDK_res( "SDK_res");
const arma::vec Y_est = as<arma::vec>(SDK_est(init_val, data_in));
const arma::vec Y_res = as<arma::vec>(SDK_res(init_val, data_in));
// Generate feed items for the Bootstrap Worker
const arma::vec T_series = data_in( span(0, N-1), 9);
arma::vec Boots_reps(boots_n+1);
// Allocate the output matrix
arma::mat Boots_out(boots_n, 4);
// Pass input and output the Bootstrap Worker
Boot_Worker Boot_Worker(Boots_reps, init_val, T_series, Y_est, Y_res, N, Boots_out);
// Now finnaly call the parallel for loop
parallelFor(0, Boots_reps.size(), Boot_Worker);
return Boots_out;
}
Looking at your code I see the following:
struct Boot_Worker : public Worker {
[...]
void operator()(std::size_t begin, std::size_t end){
//load necessary stuffs from around
Rcpp::Environment stats("package:stats");
Rcpp::Function constrOptim = stats["constrOptim"];
Rcpp::Function SDK_pred_mad( "SDK_pred_mad");
[...]
// Call the optimization
Rcpp::List opt_results = constrOptim(Rcpp::_["theta"] = init_val,
Rcpp::_["f"] = SDK_pred_mad,
Rcpp::_["data_in"] = fake_data,
Rcpp::_["grad"] = "NULL",
Rcpp::_["method"] = "Nelder-Mead",
Rcpp::_["ui"] = U,
Rcpp::_["ci"] = C );
You are calling an R function from a multi-threaded C++ context. That's something you should not do. R is single-threaded so this will lead to undefined behavior or crashes:
API Restrictions
The code that you write within parallel workers should not call the R or Rcpp API in any fashion. This is because R is single-threaded and concurrent interaction with it’s data structures can cause crashes and other undefined behavior. Here is the official guidance from Writing R Extensions:
Calling any of the R API from threaded code is ‘for experts only’: they will need to read the source code to determine if it is thread-safe. In particular, code which makes use of the stack-checking mechanism must not be called from threaded code.
Besides, calling back to R from C++ even in a single threaded context is not the best thing you can do for performance. It should be more efficient to use a optimization library that offers a direct C(++) interface. One possibility might be the development version of nlopt, c.f. this issue for a discussion and references to examples. In addition, std::random_shuffle is not only deprecated in C++14 and removed from C++17, but it is also not thread-safe.
In your second example, you say that the function SDK_heat is actually implemented in C++. In that case you can call it directly:
Remove importing the corresponding R function, i.e. the Rcpp::Function SDK_heat( "SDK_heat");
Make sure that the compiler knows the declaration of the C++ function and that the linker has the actual function:
Quick and dirty: Copy the function definition into your cpp file before the definition of BootWorker.
For a cleaner approach, see section "1.10 Sharing code" in the Rcpp attributes vignette
Call the function like any other C++ function, i.e. using positional arguments with types compatible to the function declaration.
All this assumes you are using sourceCpp as indicated by your usage of [[Rcpp::depends(...)]]. You are reaching a complexity that warrants to build a package from this.
How to use the varargs functions of the R language, as is the case of the optim function?
Consider the code below where I want to maximize the log-likelihood function verossimilhanca:
#include <Rcpp.h>
#include <RInside.h>
using namespace Rcpp;
// [[Rcpp::export]]
double verossimilhanca(Function pdf, NumericVector par, NumericVector x){
NumericVector log_result = log(pdf(par,x));
double soma =0;
for(int i = 0; i < log_result.size(); i++){
soma += log_result[i];
}
return -1*soma;
}
// [[Rcpp::export]]
List bootC(NumericVector x, NumericVector init_val){
Rcpp::Environment stats("package:stats");
Rcpp::Function optim = stats["optim"];
R["my_objective_fn"] = Rcpp::InternalFunction(&verossimilhanca);
Rcpp::List opt_results = optim(Rcpp::_["par"] = init_val,
Rcpp::_["fn"] = Rcpp::InternalFunction(&verossimilhanca),
Rcpp::_["method"] = "BFGS", x);
return opt_results;
// x is a data vetor.
}
In summary, I have a log-likelihood function and I want to maximize this function and x is my data set. I know that RInside allows me to create instances of R in C++ but I want to solve this problem only by using the Rcpp.h library without resorting to RInside.h.
Replace x with Rcpp::_["x"] = x in the arguments of optim function.
It bothers me too until I find the answer of #coatless.
I have a three dimensional array with positive definite matrices and I would like to obtain an array of the same size with the Cholesky factors of all matrices. I am using the Armadillo library and the cube type, for which there is the convenient function each_slice which I'm trying to use. But I am not getting the lambda expression to work correctly, so hopefully someone can help me and point out my mistake.
Here is a minimal example:
// [[Rcpp::depends(RcppArmadillo)]]
#include <RcppArmadillo.h>
// [[Rcpp::export]]
arma::cube chol_array(arma::cube Sigma) {
arma::cube Sigma_chol = Sigma;
Sigma_chol.each_slice([](arma::mat X) {return arma::chol(X);});
return Sigma_chol;
}
// [[Rcpp::export]]
arma::cube chol_array2(arma::cube Sigma) {
arma::cube Sigma_chol(size(Sigma));
for (arma::uword i = 0; i < Sigma.n_slices; i++) {
Sigma_chol.slice(i) = arma::chol(Sigma.slice(i));
}
return Sigma_chol;
}
/*** R
Sigma <- array(crossprod(matrix(rnorm(9), 3, 3)), dim = c(3, 3, 2))
chol_array(Sigma)
chol_array2(Sigma)
*/
The function chol_array2 does the job, but chol_array just returns the original matrices. What am I missing?
The issue here is the lack of references in the .each_slice() call. Armadillo's use of lambda expressions require references to update the object and not a return statement. In particular, we have:
For form 3:
apply the given lambda_function to each slice; the function must accept a reference to a Mat object with the same element type as the underlying cube
So, change:
Sigma_chol.each_slice([](arma::mat X) {return arma::chol(X);});
to:
Sigma_chol.each_slice([](arma::mat& X) {X = arma::chol(X);});
Fixed Code
// [[Rcpp::depends(RcppArmadillo)]]
#include <RcppArmadillo.h>
// Enable lambda expressions....
// [[Rcpp::plugins(cpp11)]]
// [[Rcpp::export]]
arma::cube chol_array(arma::cube Sigma) {
arma::cube Sigma_chol = Sigma;
// NOTE: the '&' and saving _back_ into the object are crucial
Sigma_chol.each_slice( [](arma::mat& X) { X = arma::chol(X); } );
return Sigma_chol;
}
Test code
set.seed(1113)
Sigma = array(crossprod(matrix(rnorm(9), 3, 3)), dim = c(3, 3, 2))
all.equal(chol_array(Sigma), chol_array2(Sigma))
# [1] TRUE
// computing the matrix operation here
// resultEigen = Input matrix
// result1Eigen = hidden bias
// result2Eigen = visible bias
// result3Eigen = weight matrix
MatrixXd H;
MatrixXd V;
double well[36];
Map<MatrixXd>( well, H.rows(), H.cols() ) = H;
H = resultEigen * result3Eigen + result1Eigen;
mexPrintf("H is here\n");
for (int i=0; i<36; i++)
{
mexPrintf("%d\n",H);
}
mexPrintf("\n");
I need to build a reconstructing function for my RBM and since direct matrix multiplication could get me a better result, I have been referring to eigen library to solve my issues but I am facing some difficulties.
when running the above code I end up getting a single value for the H matrix and I wonder why!
Moreover the parameters used in for the computation of H have been initiated as follows:
double *data1 = hbias;
Map<VectorXd>hidden_bias(data1,6,1);
VectorXd result1Eigen;
double result1[6];
result1Eigen = hidden_bias.transpose();
Map<VectorXd>(result1, result1Eigen.cols()) = result1Eigen;
// next param
double *data2 = vbias;
Map<VectorXd>visible_bias(data2,6,1);
VectorXd result2Eigen;
double result2[6];
result2Eigen = visible_bias.transpose();
Map<VectorXd>(result2, result2Eigen.cols()) = result2Eigen;
// next param
double *data3 = w;
Map<MatrixXd>weight_matrix(data3,n_visible,n_hidden);
MatrixXd result3Eigen;
// double result3[36];
mxArray * result3Matrix = mxCreateDoubleMatrix(n_visible, n_hidden, mxREAL );
double *result3=(double*)mxGetData(result3Matrix);
result3Eigen = weight_matrix.transpose();
Map<MatrixXd>(result3, result3Eigen.rows(), result3Eigen.cols()) = result3Eigen
At last I also face issues printing out data using std::cout from inside the mexFunction.
Thanks for any hints.
The problem is in the printing code which should be:
mexPrintf("%d\n",H(i));
Then, there is no need to duplicate vectors and matrices. For instance, result1 is useless, as you can get a raw pointer to the data stored in result1Eigen using result1Eigen.data(). Likewise, you can directly assign weight_matrix.transpose() to Map<MatrixXd>(result3,...), and I don't see the purpose of well.
Finally, if sizes are really known at compile-time, then better using Matrix<double,6,1> instead of a VectorXd and Matrix<double,6,6> instead of a MatrixXd. Yo ucan expect significant speedup.
Im using the Noise++ Library to generate noise in my program, well at least thats the aim.
I have it setup like one of the tests in order to test it out, however no matter what parameters I give it I only get 0 back
If anyone has any experience with Noise++ it would be really helpful if you could check over and see if im doing anything wrong.
//
// Defaults are
// Frequency = 1
// Octaves = 6
// Seed = 0
// Quality = 1
// Lacunarity = 2
// Persistence = 0.5
// Scale = 2.12
//
NoiseppNoise::NoiseppNoise( ) : mPipeline2d( 2 )
{
mThreadCount = noisepp::utils::System::getNumberOfCPUs ();
mPerlin.setSeed(4321);
if ( mThreadCount > 2 ) {
mPipeline2d = noisepp::ThreadedPipeline2D( mThreadCount );
}
mNoiseID2D = mPerlin.addToPipe ( mPipeline2d );
mCache2d = mPipeline2d.createCache();
}
double NoiseppNoise::Generate( double x, double y )
{
return mPipeline2d.getElement( mNoiseID2D )->getValue ( x, y, mCache2d );
}
I have added the following lines to your code to compile (basically no changes except for cleaning the cache):
struct NoiseppNoise
{
NoiseppNoise();
double Generate( double x, double y );
noisepp::ThreadedPipeline2D mPipeline2d;
noisepp::ElementID mThreadCount;
noisepp::PerlinModule mPerlin;
noisepp::ElementID mNoiseID2D;
noisepp::Cache* mCache2d;
};
/* constructor as in the question */
double NoiseppNoise::Generate( double x, double y )
{
mPipeline2d.cleanCache (mCache2d); // clean the cache before calculating value
return mPipeline2d.getElement( mNoiseID2D )->getValue ( x, y, mCache2d );
}
Calling it with
NoiseppNoise np;
std::cout<<np.Generate(1.5,1)<<std::endl;
actually outputs a good value, 0.0909 for me.
However if you call it with two "integers" (e.g. 3.0 and 5.0) the output will be 0 because at some point something similar to the following statement is executed:
const Real xs = Math::CubicCurve3 (x - Real(x0));
If the parameters are integers then x and Real(x0) are always the same because Real(x0) is basically the integer part of x and so xs will be set to 0. After this there are more calculations to get the actual value but it becomes deterministically 0.