How can I make my c++ code run faster with eigen library? - c++

I have written a parallelized c++ code, which functions as follows :
There are 75 'w' points, and each of them is sent to one processor.
For each 'w' point, I am defining a matrix. Then I am diagonalizing it. I am using the eigenvectors to compute a particular quantity by summing over the fourth power of each of the matrix elements. And then I average this quantity over 300 iterations of the matrix.
So I am using Eigen package for this calculation, and I compile the code with mpiCC -I eigen -Ofast filename.cpp. For a 512 x 512 matrix, the whole procedure takes 2.5 hours. Currently I need to do the same for a 2748 x 2748 matrix, and it's still going on after approx. 12:30 hrs. Is there anyway I can make the code run faster?
The code is given here for reference :
#include <iostream>
#include <complex>
#include <cmath>
#include<math.h>
#include<stdlib.h>
#include<time.h>
#include<Eigen/Dense>
#include<fstream>
#include<random>
#include "mpi.h"
#define pi 3.14159
using namespace std;
using namespace Eigen;
#define no_of_processor 75 // no of processors used for computing
#define no_of_disorder_av 300 //300 iterations for each w
#define A_ratio 1 //aspect ratio Ly/Lx
#define Lx 8
#define w_init 0.1 // initial value of potential strength
#define del_w 0.036 // increment of w in each loop
#define w_loop 75 // no of different w
#define alpha (sqrt(5.0)-1.0)/(double)2.0
double onsite_pot(int x,int y, int z, double phi, double alpha_0){
double B11=alpha;
double B12=alpha;
double B13=alpha;
double b1= (double)B11*x+(double)B12*y+(double)B13*z;
double c11= 1.0-cos(2*M_PI*b1+phi); //printf("%f\n",c1);
double c12= 1.0+(alpha_0*cos(2*M_PI*b1+phi));
double c1=c11/c12;
return c1;
}
int main(int argc, char *argv[])
{
clock_t begin = clock();
/*golden ratio----------------------------*/
char filename[200];
double t=1.0;
int i,j,k,l,m;//for loops
double alpha_0=0;
int Ly=A_ratio*Lx;
int Lz= A_ratio*Lx;
int A=Lx*Ly;
int V=A*Lz; //size of the matrix
int numtasks,rank,RC;
RC=MPI_Init(&argc,&argv);
if (RC != 0) {
printf ("Error starting MPI program. Terminating.\n");
MPI_Abort(MPI_COMM_WORLD, RC);
}
MPI_Comm_size(MPI_COMM_WORLD,&numtasks);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
sprintf(filename,"IPR3D%dalpha%g.dat",rank+1,alpha_0);
ofstream myfile;
myfile.open(filename, ios::app); //preparing file to write in
int n = w_loop/no_of_processor;
double w=w_init+(double)(n*rank*del_w);
int var_w_loop = 0;
MatrixXcd H(V,V); // matrix getting defined here
MatrixXcd evec(V,V); // matrix for eigenvector
VectorXcd temp(V); // vector for a temporary space used later in calculation
double IPR[V], E_levels[V]; // for average value of the quantity and eigen values.
do{
for(i=0;i<V;i++)
{
IPR[i]=E_levels[i]=0.0;
}
/*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
/*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
/*----loop for disorder average---------------------------*/
for (l=0;l<no_of_disorder_av;l++){
for (i=V; i<V; i++)
{
for (j=V; j<V; j++)
H(i,j)=0;
}
double phi=(2*M_PI*(double)l)/(double)no_of_disorder_av;
//matrix assignment starts
int z=0;
for (int plane=0; plane<Lz; plane++)
{
z += 1 ;
int y=0;
int indx1= plane*A ; //initial index of each plane
int indx2= indx1+A-1; // last index of each plane
for (int linchain=0; linchain<Ly; linchain++){
y += 1;
int x=0;
int indx3= indx1 + linchain*Lx ; //initial index of each chain
int indx4= indx3 + Lx-1 ; //last index of each chain
for (int latpoint=0; latpoint<Lx; latpoint++){
x += 1;
int indx5= indx3 +latpoint; //index of each lattice point
H(indx5,indx5)= 2*w*onsite_pot(x,y,z,phi,alpha_0); //onsite potential
if (indx5<indx4){ //hopping inside a chain
H(indx5,(indx5+1))= t; //printf("%d %d\n",indx5,indx5+1);
H((indx5+1),indx5)= t;
}
if (indx5<=(indx2-Lx)){ //hopping between different chain
H(indx5,(indx5+Lx))= t; //printf("%d %d\n",indx5,indx5+Lx);
H((indx5+Lx),indx5)= t;
printf("%d\n",indx5);
}
if (indx5<(V-A)){
H(indx5,(indx5+A))= t; //printf("%d %d\n",indx5,indx5+A);// hopping between different plane
H((indx5+A),indx5)= t;
}
} //latpoint loop
}//linchain loop
}//plane loop
//PB..............................................
for (int plane=0; plane<Lz; plane++){
int indx1= plane*A; //initial index of each plane
int indx2 = indx1+A-1 ;//last indx of each plane
//periodic boundary condition x
for (int linchain=0; linchain<Ly; linchain++){
int indx3 = indx1 + linchain*Lx; // initital index of each chain
int indx4=indx3+ Lx-1; //last index of each chain
H(indx3,indx4)= t; //printf("%d %d\n",indx3,indx4);
H(indx4,indx3)= t;
}//linchain loop
//periodic boundary condition y
for (int i=0; i<Lx; i++){
int indx5 = indx1+i;
int indx6 = indx5+(Ly-1)*Lx; //printf("%d %d\n",indx5,indx6);
H(indx5,indx6)=t;
H(indx6,indx5)=t;
}
}//plane loop
//periodic boundary condition in z
for (int i=0; i<A; i++){
int indx1=i ;
int indx2=(Lz-1)*A+i ;
H(indx1,indx2)= t; //printf("%d %d\n",indx1,indx2);
H(indx2,indx1)= t ;
}
//matrix assignment ends
/**-------------------------------------------------------*/
double Tr = abs(H.trace());
for(i=0;i<V;i++)
{
for(j=0;j<V;j++)
{
if(i==j)
{
H(i,j) = H(i,j)-(Tr/(double)V);
}
}
}
SelfAdjointEigenSolver<MatrixXcd> es(H); //defining the diagonalizing function
double *E = NULL;
E = new double[V]; // for the eigenvalues
for(i=0;i<V;i++)
{
E[i]=es.eigenvalues()[i];
//cout<<"original eigenvalues "<<E[i]<<"\n";
}
evec=es.eigenvectors();
double bandwidth = E[V-1] - E[0];
for(i=0;i<V;i++)
E[i]=E[i]/bandwidth;
for(i=0;i<V;i++)
{
E_levels[i] = E_levels[i]+E[i]; //summing over energies for each iteration
}
delete[] E;
E=NULL;
//main calculation process
for(i=0;i<V;i++)
{
temp = evec.col(i);
double num=0.0,denom=0.0;
for(j=0;j<V;j++)
{
num=num+pow(abs(temp(j)),4);
denom=denom+pow(abs(temp(j)),2);
}
IPR[i] = IPR[i]+(num/(denom*denom));
} //calculation ends
}//no_of_disorder_av loop (l)
for(i=0; i<V; i++)
{
myfile<<w<<"\t"<<(E_levels[i]/(double)no_of_disorder_av)<<"\t"
<<(IPR[i]/(double)no_of_disorder_av)<<"\n"; //taking output in file
}
var_w_loop++; // counts number of w loop
w+= del_w; // proceeds to next w
}while(var_w_loop<n) ; // w varying do while loop
MPI_Finalize();
clock_t end = clock();
double time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
printf("time spent %f s\n\n",time_spent);
return 0;
}

Related

Optimize c++ Monte Carlo simulation with long dynamic arrays

This is my first post here and I am not that experienced, so please excuse my ignorance.
I am building a Monte Carlo simulation in C++ for my PhD and I need help in optimizing its computational time and performance. I have a 3d cube repeated in each coordinate as a simulation volume and inside every cube magnetic particles are generated in clusters. Then, in the central cube a loop of protons are created and move and at each step calculate the total magnetic field from all the particles (among other things) that they feel.
At this moment I define everything inside the main function and because I need the position of the particles for my calculations (I calculate the distance between the particles during their placement and also during the proton movement), I store them in dynamic arrays. I haven't used any class or function,yet. This makes my simulations really slow because I have to use eventually millions of particles and thousands of protons. Even with hundreds it needs days. Also I use a lot of for and while loops and reading/writing to .dat files.
I really need your help. I have spent weeks trying to optimize my code and my project is behind schedule. Do you have any suggestion? I need the arrays to store the position of the particles .Do you think classes or functions would be more efficient? Any advice in general is helpful. Sorry if that was too long but I am desperate...
Ok, I edited my original post and I share my full script. I hope this will give you some insight regarding my simulation. Thank you.
Additionally I add the two input files
parametersDiffusion_spher_shel.txt
parametersIONP_spher_shel.txt
#include <stdio.h>
#include <iostream>
#include <fstream>
#include <stdlib.h>
#include <math.h>
#include <iomanip> //precision to output
//#include <time.h>
#include <ctime>
#include <cstdlib>
#include <algorithm>
#include <string>
//#include <complex>
#include <chrono> //random generator
#include <random>
using namespace std;
#define PI 3.14159265
#define tN 500000 //# of timepoints (steps) to define the arrays ONLY
#define D_const 3.0E-9 //diffusion constant (m^2/s)
#define Beq 0.16 // Tesla
#define gI 2.6752218744E8 //(sT)^-1
int main(){
//Mersenne Twister random engine
mt19937 rng(chrono::steady_clock::now().time_since_epoch().count());
//uniform_int_distribution<int> intDist(0,1);
uniform_real_distribution<double> realDist(0.,1.);
//for(int i=1; i<100; i++){
//cout<<"R max: "<<Ragg-Rspm<<" r_spm: "<<(Ragg-Rspm)*sqrt(realDist(rng))<<endl;
//}
/////////////////////////////////////////////////////////////////////////////////////////////////////////
//input files
double Rionp=1.0E-8, Ragg=2.0E-7, t_tot=2.0E-2, l_tot = 3.0E-4;
int ionpN=10, aggN=10,cubAxN=10, parN=1E5;
int temp_ionpN, temp_aggN, temp_cubAxN, temp_parN;
ifstream inIONP;
inIONP.open("parametersIONP_spher_shel.txt");
if(!inIONP){
cout << "Unable to open IONP parameters file";
exit(1); // terminate with error
}
while (inIONP>>Rionp>>Ragg>>temp_ionpN>>temp_aggN>>l_tot>>temp_cubAxN) {
ionpN = (int)temp_ionpN;
aggN = (int)temp_aggN;
cubAxN = (int)temp_cubAxN;
}
inIONP.close();
cout<<"Rionp: "<<Rionp<<" ionpN: "<<ionpN <<" aggN: "<<aggN<<endl;
cout<<"l_tot: "<<l_tot<<" cubAxN: "<<cubAxN<<endl;
ifstream indiff;
indiff.open("parametersDiffusion_spher_shel.txt");
if(!indiff){
cout << "Unable to open diffusion parameters file";
exit(1); // terminate with error
}
while (indiff>>temp_parN>>t_tot) {
parN = (int)temp_parN;
}
indiff.close();
cout<<"parN: "<<parN<<" t_tot: "<<t_tot<<endl;
/////////////////////////////////////////////////////////////////////////////////////////////////
int cubN = pow(cubAxN,3.); // total cubes
int Nionp_tot = ionpN*aggN*cubN; //total IONP
double f_tot = (double)Nionp_tot*(4.*PI*pow(Rionp,3.)/3.)/pow(l_tot,3.);//volume density
//central cube
double l_c = l_tot/(double)cubAxN;
int Nionp_c = ionpN*aggN; //SPM in central cube
double f_c = (double)Nionp_c*(4.*PI*pow(Rionp,3.)/3.)/pow(l_c,3.);
cout<<"f_tot: "<<f_tot<<" Nionp_tot: "<<Nionp_tot<<" l_tot "<<l_tot<<endl;
cout<<"f_c: "<<f_c<<" Nionp_c: "<<Nionp_c<<" l_c "<<l_c<<endl;
cout<<"Now IONP are generated..."<<endl;
//position of aggregate (spherical distribution IONP)
double *x1_ionp, *x2_ionp, *x3_ionp, *theta_ionp, *phi_ionp, *r_ionp, *x1_agg, *x2_agg, *x3_agg;
x1_ionp = new double [Nionp_tot];
x2_ionp = new double [Nionp_tot];
x3_ionp = new double [Nionp_tot];
theta_ionp = new double [Nionp_tot];
phi_ionp = new double [Nionp_tot];
r_ionp = new double [Nionp_tot];
x1_agg = new double [Nionp_tot];
x2_agg = new double [Nionp_tot];
x3_agg = new double [Nionp_tot];
int ionpCounter = 0;
int aggCounter = 0;
double x1_aggTemp=0., x2_aggTemp=0., x3_aggTemp=0.;
double ionpDist = 0.; //distance SPM-SPM
for(int a=0; a<cubAxN; a++){ //x1-filling cubes
for(int b=0; b<cubAxN; b++){ //x2-
for(int c=0; c<cubAxN; c++){ //x3-
bool far_ionp = true;
cout<<"cube: (a, b, c): ("<<a<<", "<<b<<", "<<c<<")"<<endl;
for(int i=0; i<aggN; i++){ //aggregate iterations
x1_aggTemp=realDist(rng)*l_c + l_c*a - l_tot/2.; //from neg to pos filling
x2_aggTemp=realDist(rng)*l_c + l_c*b - l_tot/2.;
x3_aggTemp=realDist(rng)*l_c + l_c*c - l_tot/2.;
for(int j=0; j<ionpN; j++){ //SPM iterations
// cout<<"SPM: "<<j<<" aggregate: "<<i<<" cube: (a, b, c): ("<<a<<", "<<b<<", "<<c<<")"<<endl;
x1_agg[ionpCounter]=x1_aggTemp;
x2_agg[ionpCounter]=x2_aggTemp;
x3_agg[ionpCounter]=x3_aggTemp;
//uniform 4pi distribution in sphere
while(true){
far_ionp = true; //must be updated!
theta_ionp[ionpCounter] = 2.*PI*realDist(rng);
phi_ionp[ionpCounter] = acos(1. - 2.*realDist(rng));
r_ionp[ionpCounter] = (Ragg-Rionp)*sqrt(realDist(rng)); // to have uniform distribution sqrt
x1_ionp[ionpCounter] = sin(phi_ionp[ionpCounter])*cos(theta_ionp[ionpCounter])*r_ionp[ionpCounter] + x1_agg[ionpCounter];
x2_ionp[ionpCounter] = sin(phi_ionp[ionpCounter])*sin(theta_ionp[ionpCounter])*r_ionp[ionpCounter] + x2_agg[ionpCounter];
x3_ionp[ionpCounter] = cos(phi_ionp[ionpCounter])*r_ionp[ionpCounter] + x3_agg[ionpCounter];
for(int m=0; m<ionpCounter; m++){ //impenetrable IONP to each other
ionpDist = sqrt(pow(x1_ionp[m]-x1_ionp[ionpCounter],2.)+pow(x2_ionp[m]-x2_ionp[ionpCounter],2.)+pow(x3_ionp[m]-x3_ionp[ionpCounter],2.));
//cout<<"spmDist: "<<spmDist<<endl;
if((j>0) && (ionpDist <= 2*Rionp)){
far_ionp = false;
cout<<"CLOSE ionp-ionp! Distanse ionp-ionp: "<<ionpDist<<endl;
}
}
if(far_ionp){
cout<<"IONP can break now! ionpCounter: "<<ionpCounter<<endl;
break;
}
}
cout<<"r_ionp: "<<r_ionp[ionpCounter]<<" x1_ionp: "<<x1_ionp[ionpCounter]<<" x2_ionp: "<<x2_ionp[ionpCounter]<<" x3_ionp: "<<x3_ionp[ionpCounter]<<endl;
cout<<"x1_agg: "<<x1_agg[ionpCounter]<<" x2_agg: "<<x2_agg[ionpCounter]<<" x3_agg: "<<x3_agg[ionpCounter]<<endl;
ionpCounter++;
}
aggCounter++;
}
}
}
}
cout<<"ionpCounter: "<<ionpCounter<<" aggCounter: "<<aggCounter<<endl;
//=====proton diffusion=============//
//outfile
//proton diffusion time-positionSPM_uniform
FILE *outP_tPos;
outP_tPos = fopen("V3_MAT_positionProtons_spherical.dat","wb+");
if(!outP_tPos){// file couldn't be opened
cerr << "Error: file could not be opened" << endl;
exit(1);
}
//proton diffusion time-positionSPM_uniform
FILE *outP_tB;
outP_tB = fopen("V3_MAT_positionB_spherical.dat","wb+");
if(!outP_tB){// file couldn't be opened
cerr << "Error: file could not be opened" << endl;
exit(1);
}
double *cosPhase_S, *sinPhase_S, *m_tot;
cosPhase_S = new double [tN];
sinPhase_S = new double [tN];
m_tot = new double [tN];
double tstep = 0.; // time of each step
int stepCounter = 0; // counter for the steps for each proton
int cnt_stpMin=0; //, cnt_stpMax=0; //counters for the step length conditions
for (int i=0; i<parN; i++){// repetition for all the protons in the sample
stepCounter = 0; //reset
cout<<"Now diffusion calculated for proton: "<<i<<endl;
double x0[3]={0.}, xt[3]={0.}, vt[3]={0.};
double tt=0.;
double stepL_min = Rionp/8.; //min step length
double stepL_max = Rionp; //max step length
double stepL = 0.;
double extraL = 0.; //extra length beyond central cube
bool hit_ionp = false;
double pIONPDist = 0.; // proton-IONP Distance (vector ||)
double pIONPCosTheta = 0.; //proton_IONP vector cosTheta with Z axis
double Bloc = 0.; //B 1 IONP
double Btot = 0.; //SUM B all IONP
double Dphase = 0.; //Delta phase for step 1p
double phase = 0.; //phase 1p
double theta_p=0., phi_p=0.;
//randomized initial position of the particle;
x0[0] = realDist(rng)*l_c - l_c/2.;
x0[1] = realDist(rng)*l_c - l_c/2.;
x0[2] = realDist(rng)*l_c - l_c/2.;
//for (int j=0; j<tN; j++){ //steps
bool diffTime = true; // flag protons are allowed to diffuse (tt<10ms)
while(diffTime){ // steps loop
//unit vector for 4p direction
theta_p = 2.*PI*realDist(rng);
phi_p = acos(1. - 2.*realDist(rng));
vt[0] = sin(phi_p)*cos(theta_p);
vt[1] = sin(phi_p)*sin(theta_p);
vt[2] = cos(phi_p);;
//determine length of step
for(int k=0; k<ionpCounter; k++){
if(abs(sqrt(pow(x1_ionp[k]-x0[0],2.)+pow(x2_ionp[k]-x0[1],2.)+pow(x3_ionp[k]-x0[2],2.))-Rionp) <= 8*Rionp){
//spm closer than 8R
stepL = Rionp/8;
cnt_stpMin ++;
break;
}
else if(abs(sqrt(pow(x1_ionp[k]-x0[0],2.)+pow(x2_ionp[k]-x0[1],2.)+pow(x3_ionp[k]-x0[2],2.))-Rionp) > 8*Rionp){
stepL = Rionp;
}
else{
cout<<"sth wrong with the proton-IONP distance!"<<endl;
}
}
//determine Dt step duration
tstep = pow(stepL,2.)/(6.*D_const);
tt += tstep;
if(tt>t_tot){
diffTime = false; //proton is not allowed to diffuse any longer
cout<<"Proton id: "<<i<<" has reached diffusion time! -> Move to next one!"<<endl;
cout<<"stepCounter: "<<stepCounter<<" cnt_stpMin: "<<cnt_stpMin<<endl;
}
while(true){
xt[0]=x0[0]+vt[0]*stepL;
xt[1]=x0[1]+vt[1]*stepL;
xt[2]=x0[2]+vt[2]*stepL;
for(int m=0; m<3; m++){
if(abs(xt[m]) > l_c/2.){ //particle outside central cube,// reflected, elastic collision(no!)
//particle enters fron the other way, periodicity
// hit_cx[m] = true; //I don't need it yet
extraL = abs(xt[m]) - l_c/2.;
// xt[m]=-x0[m];
cout<<"proton outside! xt[m]: "<<xt[m]<<" extra lenght: "<<extraL<<endl;
xt[m] = xt[m]-l_c;
cout<<"Relocating => new x[t]: "<<xt[m]<<endl;
}
}
for(int k=0; k<ionpCounter; k++){//check if proton inside SPM
pIONPDist = sqrt(pow((x1_ionp[k]-xt[0]),2.)+pow((x2_ionp[k]-xt[1]),2.)+pow((x3_ionp[k]-xt[2]),2.)) - Rionp;
if(pIONPDist <= 0.){
cout<<"proton inside IONP => reposition! Distance: "<<pIONPDist<<" Rionp: "<<Rionp<<endl;
hit_ionp = true;
}
else if(pIONPDist > 0.){
hit_ionp=false; //with this I don't have to reset flag in the end
//calculations of Bloc for this position
pIONPCosTheta = (x3_ionp[k]-xt[2])/pIONPDist;
Bloc = pow(Rionp,3.)*Beq*(3.*pIONPCosTheta - 1.)/pow(pIONPDist,3.);
Btot += Bloc;
//cout<<"pSPMDist: "<<pSPMDist<<" pSPMCosTheta: "<<pSPMCosTheta<<" Bloc: "<<Bloc<<" Btot: "<<Btot<<endl;
}
else{
cout<<"Something wrong with the calculation of pIONPDist! "<<pIONPDist<<endl;
hit_ionp = true;
}
}
if(!hit_ionp){
// hit_spm=false; //reset flag (unnessesary alreaty false)
break;
}
}// end of while for new position -> the new position is determined, Btot calculated
// Dphase, phase
Dphase = gI*Btot*tstep;
phase += Dphase;
//store phase for this step
//filled for each proton at this timepoint (step)
cosPhase_S[stepCounter] += cos(phase);
sinPhase_S[stepCounter] += sin(phase);
//reset Btot
Btot = 0.;
stepCounter++;
} //end of for loop step
} //end of for loop particles
//-----calculate the <m> the total magnetization
for(int t=0; t<tN; t++){
m_tot[t] = sqrt(pow(cosPhase_S[t],2.) + pow(sinPhase_S[t],2.))/(double)parN;
//cout<<"m_tot[t]: "<<m_tot[t]<<endl;
}
fclose(outP_tPos); //proton time-position
fclose(outP_tB); //proton time-B
//====== outfile data=============//
//----- output data of SPM position---------//
FILE *outP_S;
outP_S = fopen("V3_MAT_positionSPM_spherical.dat","wb+");
if(!outP_S){// file couldn't be opened
cerr << "Error: file could not be opened" << endl;
exit(1);
}
for (int i=0; i<ionpCounter; ++i){
fprintf(outP_S,"%.10f \t %.10f \t %.10f\n",x1_ionp[i],x2_ionp[i],x3_ionp[i]);
}
fclose(outP_S);
FILE *outP_agg;
outP_agg = fopen("V3_MAT_positionAggreg_spherical.dat","wb+");
if(!outP_agg){// file couldn't be opened
cerr << "Error: file could not be opened" << endl;
exit(1);
}
for (int j=0; j<ionpCounter; ++j){
fprintf(outP_agg,"%.10f \t %.10f \t %.10f\n",x1_agg[j],x2_agg[j],x3_agg[j]);
}
fclose(outP_agg);
FILE *outSngl;
outSngl = fopen("V3_MAT_positionSingle_spherical.dat","wb+");
if(!outSngl){// file couldn't be opened
cerr << "Error: file could not be opened" << endl;
exit(1);
}
int findAgg = (int)(realDist(rng)*aggN);
int idxMin = findAgg*ionpN;
int idxMax = idxMin + ionpN;
for (int k=idxMin; k<idxMax; ++k){
fprintf(outSngl,"%.10f\t%.10f\t%.10f\t%.10f\t%.10f\t%.10f\n",x1_agg[k],x2_agg[k],x3_agg[k],x1_ionp[k],x2_ionp[k],x3_ionp[k]);
}
fclose(outSngl);
//delete new arrays
delete[] x1_ionp;
delete[] x2_ionp;
delete[] x3_ionp;
delete[] theta_ionp;
delete[] phi_ionp;
delete[] r_ionp;
delete[] x1_agg;
delete[] x2_agg;
delete[] x3_agg;
delete[] cosPhase_S;
delete[] sinPhase_S;
delete[] m_tot;
}
I talked the problem in more steps, first thing I made the run reproducible:
mt19937 rng(127386261); //I want a deterministic seed
Then I create a script to compare the three output files generated by the program:
#!/bin/bash
diff V3_MAT_positionAggreg_spherical.dat V3_MAT_positionAggreg_spherical2.dat
diff V3_MAT_positionSingle_spherical.dat V3_MAT_positionSingle_spherical2.dat
diff V3_MAT_positionSPM_spherical.dat V3_MAT_positionSPM_spherical2.dat
Where the files ending in two is created by the optimized code and the other by your version.
I run your version compiling with O3 flag and marked the time (for 20 magnetic particles and 10 protons it is taking 79 seconds on my box, my architecture is not that important because we are just going to compare the differences).
Then I start refactoring steps by steps, running every small changes comparing the output files and the time, here are all the iterations:
Remove redundant else if gain 5 seconds (total run 74.0 s)
if(sqrt(pow(x1_ionp[k]-x0[0],2.)+pow(x2_ionp[k]-x0[1],2.)+pow(x3_ionp[k]-x0[2],2.)) <= 7*Rionp){
//spm closer than 8R
stepL = Rionp/8;
cnt_stpMin ++;
break;
}
else { //this was an else if and an else for error that will never happen
stepL = Rionp;
}
At this point, I run it under the profiler and pow function stood out.
Replacing pow with square and cube gain 61 seconds (total run 13.2 s)
Simply replacing pow(x,2.) with square(x) and pow(x,3.) with cube(x) will reduce the run time by about 600%
double square(double d)
{
return d*d;
}
double cube(double d)
{
return d*d*d;
}
Now the gain is reduced quite a lot for each improvement, but still.
Remove redundant sqrt gain (total run 12.9 s)
double ionpDist = square(x1_ionp[m]-x1_ionp[ionpCounter])+square(x2_ionp[m]-x2_ionp[ionpCounter])+square(x3_ionp[m]-x3_ionp[ionpCounter]);
//cout<<"spmDist: "<<spmDist<<endl;
if((j>0) && (ionpDist <= 4*square_Rionp)){
Introducing const variable square_Rionp and cube_Rionp (total run 12.7 s)
const double square_Rionp = square(Rionp);
const double cube_Rionp = cube(Rionp);
//replaced in the code like this
if((j>0) && (ionpDist <= 4*square_Rionp)){
Introducing variable for pi (total run 12.6 s)
const double Two_PI = PI*2.0;
const double FourThird_PI = PI*4.0/3.0;
Remove a (another) redundant else if (total run 11.9s)
if(pIONPDist <= 0.){
cout<<"proton inside IONP => reposition! Distance: "<<pIONPDist<<" Rionp: "<<Rionp<<endl;
hit_ionp = true;
}
else { //this was an else if without any reason
hit_ionp=false; //with this I don't have to reset flag in the end
//calculations of Bloc for this position
pIONPCosTheta = (x3_ionp[k]-xt[2])/pIONPDist;
...
}
Remove another redundant sqrare root (total run 11.2 s)
const double Seven_Rionp_squared =square(7*Rionp);
...
for(int k=0; k<ionpCounter; k++){
if(square(x1_ionp[k]-x0[0])+square(x2_ionp[k]-x0[1])+square(x3_ionp[k]-x0[2]) <= Seven_Rionp_squared){
//spm closer than 8R
stepL = stepL_min;
cnt_stpMin ++;
break;
}
I don't see many more things obvious to squeeze more performance out of it. Further optimization may require some thinking.
I did another comparison run with 50 magnetic particles and 10 protons and I have found that my version is 7 times faster then the yours and it is producing the exact same files.
I would do this exercise with the help of source control.
Your code is trivially parallelizable, but I will go that route just when you have optimized the single thread version.
EDIT
Change += with = operator (total run 6.23 s)
I have noticed that += operator is used for no reason, the substitution to operator = is a substanzial gain:
cosPhase_S[stepCounter] = cos(phase);
sinPhase_S[stepCounter] = sin(phase);

OpenMP implementation slower than serial implementation [duplicate]

This question already has an answer here:
OpenMP program is slower than sequential one
(1 answer)
Closed 5 years ago.
i am currently trying to get familiar with OpenMP. For practice i implemented a greedy "learning" algorithm with OpenMP. Then i measured the time with
time ./a.out
I compared with my serial implementation and no matter how many iterations my program is doing the OpenMP one is alway significant slower.
Here is my Code, comments should hopefully explain everything:
#include <omp.h>
#include <iostream>
#include <vector>
#include <cstdlib>
#include <cmath>
#include <stdio.h>
#include <ctime>
#define THREADS 4
using namespace std;
struct TrainData {
double input;
double output;
};
//Long Term Memory struct
struct LTM {
double a; //paramter a of the polynom
double b;
double c;
double score; //score to be minimized!
LTM()
{
a=0;
b=0;
c=0;
score=0;
}
//random LTM with paramters from low to high (including low and high)
LTM(int low, int high)
{
score=0;
a= rand() % high + low;
b= rand() % high + low;
c= rand() % high + low;
}
LTM(double _a, double _b, double _c)
{
a=_a;
b=_b;
c=_c;
}
void print()
{
cout<<"Score: "<<score<<endl;
cout<<"a: "<<a<<" b: "<<b<<" c: "<<c<<endl;
}
};
//the acutal polynom function evaluating with passed LTM
inline double evaluate(LTM &ltm, const double &x)
{
double ret;
ret = ltm.a*x*x + ltm.b*x + ltm.c;
return ret;
}
//scoring function calculates the Root Mean Square error (RMS)
inline double score_function(LTM &ltmnew, vector<TrainData> &td)
{
double score;
double val;
int tdsize=td.size();
score=0;
for(int i=0; i< tdsize; i++)
{
val = (td.at(i)).output - evaluate(ltmnew, (td.at(i)).input);
val *= val;
score += val;
}
score /= (double)tdsize;
score = sqrt(score);
return score;
}
LTM iterate(int iterations, vector<TrainData> td, int low, int high)
{
LTM fav = LTM(low,high);
fav.score = score_function(fav, td);
fav.print();
LTM favs[THREADS]; // array for collecting the favorites of each thread
#pragma omp parallel num_threads(THREADS) firstprivate(fav, low, high, td)
{
#pragma omp master
printf("Threads: %d\n", omp_get_num_threads());
LTM cand;
#pragma omp for private(cand)
for(int i=0; i<iterations; i++)
{
cand = LTM(low, high);
cand.score = score_function(cand, td);
if(cand.score < fav.score)
fav = cand;
}
//save the favorite before ending the parallel section
#pragma omp critical
favs[omp_get_thread_num()] = fav;
}
//search for the best one in the array
for(int i=0; i<THREADS; i++)
{
if(favs[i].score < fav.score)
fav=favs[i];
}
return fav;
}
//generate training data from -50 up to 50 with the train LTM
void generateTrainData(vector<TrainData> *td, LTM train)
{
#pragma omp parallel for schedule(dynamic, 25)
for(int i=-50; i< 50; i++)
{
struct TrainData d;
d.input = i;
d.output = evaluate(train, (double)i);
#pragma omp critical
td->push_back(d);
//cout<<"input: "<<d.input<<" -> "<<d.output<<endl;
}
}
int main(int argc, char *argv[])
{
int its= 10000000; //number of iterations
int a=2;
int b=4;
int c=6;
srand(time(NULL));
LTM pol = LTM(a,b,c); //original polynom parameters
vector<TrainData> td;
//first genarte some training data and save it to td
generateTrainData(&td, pol);
//try to find the best solution
LTM fav = iterate( its, td, 1, 6);
printf("Final: a=%f b=%f c=%f score: %f\n", fav.a, fav.b, fav.c, fav.score);
return 0;
}
At my home PC it took 12s with this implementation. The serial one only 6s.
If i increase the number of iterations by factor 10 it will be around 2min/1min (omp / serial).
Can anyone help me?
Okay, thanks to the comments of my initial question i could solve the performance issues.
Like in the comments said the problem was the rand() function i was using.
I replaced them with an appropriate thread safe drand48_r().
Like:
...
LTM(double low, double high, struct drand48_data *buff)
{
score=0;
double x;
drand48_r(buff,&x);
a= low + x * (high - low);
drand48_r(buff,&x);
b= low + x * (high - low);
drand48_r(buff,&x);
c= low + x * (high - low);
}
...
now i got times under one second!
Thanks! :)

Memory Overflow? std::badalloc

I have a program that solves generally for 1D brownian motion using an Euler's Method.
Being a stochastic process, I want to average it over many particles. But I find that as I ramp up the number of particles, it overloads and i get the std::badalloc error, which I understand is a memory error.
Here is my full code
#include <iostream>
#include <vector>
#include <fstream>
#include <cmath>
#include <cstdlib>
#include <limits>
#include <ctime>
using namespace std;
// Box-Muller Method to generate gaussian numbers
double generateGaussianNoise(double mu, double sigma) {
const double epsilon = std::numeric_limits<double>::min();
const double tau = 2.0 * 3.14159265358979323846;
static double z0, z1;
static bool generate;
generate = !generate;
if (!generate) return z1 * sigma + mu;
double u1, u2;
do {
u1 = rand() * (1.0 / RAND_MAX);
u2 = rand() * (1.0 / RAND_MAX);
} while (u1 <= epsilon);
z0 = sqrt(-2.0 * log(u1)) * cos(tau * u2);
z1 = sqrt(-2.0 * log(u1)) * sin(tau * u2);
return z0 * sigma + mu;
}
int main() {
// Initialize Variables
double gg; // Gaussian Number Picked from distribution
// Integrator
double t0 = 0; // Setting the Time Window
double tf = 10;
double n = 5000; // Number of Steps
double h = (tf - t0) / n; // Time Step Size
// Set Constants
const double pii = atan(1) * 4; // pi
const double eta = 1; // viscous constant
const double m = 1; // mass
const double aa = 1; // radius
const double Temp = 30; // Temperature in Kelvins
const double KB = 1; // Boltzmann Constant
const double alpha = (6 * pii * eta * aa);
// More Constants
const double mu = 0; // Gaussian Mean
const double sigma = 1; // Gaussian Std Deviation
const double ng = n; // No. of pts to generate for Gauss distribution
const double npart = 1000; // No. of Particles
// Initial Conditions
double x0 = 0;
double y0 = 0;
double t = t0;
// Vectors
vector<double> storX; // Vector that keeps displacement values
vector<double> storY; // Vector that keeps velocity values
vector<double> storT; // Vector to store time
vector<double> storeGaussian; // Vector to store Gaussian numbers generated
vector<double> holder; // Placeholder Vector for calculation operations
vector<double> mainstore; // Vector that holds the final value desired
storT.push_back(t0);
// Prepares mainstore
for (int z = 0; z < (n+1); z++) {
mainstore.push_back(0);
}
for (int NN = 0; NN < npart; NN++) {
holder.clear();
storX.clear();
storY.clear();
storT.clear();
storT.push_back(0);
// Prepares holder
for (int z = 0; z < (n+1); z++) {
holder.push_back(0);
storX.push_back(0);
storY.push_back(0);
}
// Gaussian Generator
srand(time(NULL));
for (double iiii = 0; iiii < ng; iiii++) {
gg = generateGaussianNoise(0, 1); // generateGaussianNoise(mu,sigma)
storeGaussian.push_back(gg);
}
// Solver
for (int ii = 0; ii < n; ii++) {
storY[ii + 1] =
storY[ii] - (alpha / m) * storY[ii] * h +
(sqrt(2 * alpha * KB * Temp) / m) * sqrt(h) * storeGaussian[ii];
storX[ii + 1] = storX[ii] + storY[ii] * h;
holder[ii + 1] =
pow(storX[ii + 1], 2); // Finds the displacement squared
t = t + h;
storT.push_back(t);
}
// Updates the Main Storage
for (int z = 0; z < storX.size(); z++) {
mainstore[z] = mainstore[z] + holder[z];
}
}
// Average over the number of particles
for (int z = 0; z < storX.size(); z++) {
mainstore[z] = mainstore[z] / (npart);
}
// Outputs the data
ofstream fout("LangevinEulerTest.txt");
for (int jj = 0; jj < storX.size(); jj++) {
fout << storT[jj] << '\t' << mainstore[jj] << '\t' << storX[jj] << endl;
}
return 0;
}
As you can see, npart is the variable that I change to vary the number of particles. But after each iteration, I do clear my storage vectors like storX,storY... So on paper, the number of particles should not affect memory? I am only just calling the compiler to repeat many more times, and add onto the main storage vector mainstore. I am running my code on a computer with 4GB ram.
Would greatly appreciate it if anyone could point out my errors in logic or suggest improvements.
Edit: Currently the number of particles is set to npart = 1000.
So when I try to ramp it up to like npart = 20000 or npart = 50000, it gives me memory errors.
Edit2 I've edited the code to allocate an extra index to each of the storage vectors. But it does not seem to fix the memory overflow
There is an out of bounds exception in the solver part. storY has size n and you access ii+1 where i goes up to n-1. So for your code provided. storY has size 5000. It is allowed to access with indices between 0 and 4999 (including) but you try to access with index 5000. The same for storX, holder and mainstore.
Also, storeGaussian does not get cleared before adding new variables. It grows by n for each npart loop. You access only the first n values of it in the solver part anyway.
Please note, that vector::clear removes all elements from the vector, but does not necessarily change the vector's capacity (i.e. it's storage array), see the documentation.
This won't cause the problem here, because you'll reuse the same array in the next runs, but it's something to be aware when using vectors.

Having problems with ctime, and working out function running time

I'm having trouble working out the time for my two maxsubarray functions to run. (right at the bottom of the code)
The output it gives me:
Inputsize: 101 Time using Brute Force:0 Time Using DivandCon: 12
is correct for the second time I use clock() but for the first difference diff1 it just gives me 0 and I'm not sure why?
Edit: Revised Code.
Edit2: Added Output.
#include <iostream>
#include <cmath>
#include <cstdlib>
#include <ctime>
#include <limits.h>
using namespace std;
int Kedane(int a[], int size)
{
int max_so_far = 0, max_ending_here = 0;
int i;
for(i = 0; i < size; i++)
{
max_ending_here = max_ending_here + a[i];
if(max_ending_here < 0)
max_ending_here = 0;
if(max_so_far < max_ending_here)
max_so_far = max_ending_here;
}
return max_so_far;
}
int BruteForce(int array[],int n)
{
int sum,ret=0;
for(int j=-1;j<=n-2;j++)
{
sum=0;
for(int k=j+1;k<+n-1;k++)
{
sum+=array[k];
if(sum>ret)
{
ret=sum;
}
}
}
return ret;
}
//------------------------------------------------------
// FUNCTION WHICH FINDS MAX OF 2 INTS
int max(int a, int b) { return (a > b)? a : b; }
// FUNCTION WHICH FINDS MAX OF 3 NUMBERS
// CALL MAX FUNCT FOR 2 VARIS TWICE!
int max(int a, int b, int c) { return max(max(a, b), c); }
// WORKS OUT FROM MIDDLE+1->RIGHT THE MAX SUM &
// THE MAX SUM FROM MIDDLE->LEFT + RETURNS SUM OF THESE
int maxCrossingSum(int arr[], int l, int m, int h)
{
int sum = 0; // LEFT OF MID
int LEFTsum = INT_MIN; // INITIALLISES SUM TO LOWEST POSSIBLE INT
for (int i = m; i >= l; i--)
{
sum = sum + arr[i];
if (sum > LEFTsum)
LEFTsum = sum;
}
sum = 0; // RIGHT OF MID
int RIGHTsum = INT_MIN;
for (int i = m+1; i <= h; i++)
{
sum = sum + arr[i];
if (sum > RIGHTsum)
RIGHTsum = sum;
}
// RETURN SUM OF BOTH LEFT AND RIGHT SIDE MAX'S
return LEFTsum + RIGHTsum;
}
// Returns sum of maxium sum subarray in aa[l..h]
int maxSubArraySum(int arr[], int l, int h)
{
// Base Case: Only one element
if (l == h)
return arr[l];
// Find middle point
int m = (l + h)/2;
/* Return maximum of following three possible cases
a) Maximum subarray sum in left half
b) Maximum subarray sum in right half
c) Maximum subarray sum such that the subarray crosses the midpoint */
return max(maxSubArraySum(arr, l, m),
maxSubArraySum(arr, m+1, h),
maxCrossingSum(arr, l, m, h));
}
// DRIVER
int main(void)
{
std::srand (time(NULL));
// CODE TO FILL ARRAY WITH RANDOMS [-50;50]
int size=30000;
int array[size];
for(int i=0;i<=size;i++)
{
array[i]=(std::rand() % 100) -50;
}
// TIMING VARI'S
clock_t t1,t2;
clock_t A,B;
clock_t K1,K2;
volatile int mb, md, qq;
//VARYING ELEMENTS IN THE ARRAY
for(int n=101;n<size;n=n+100)
{
t1=clock();
mb=BruteForce(array,n);
t2=clock();
A=clock();
md=maxSubArraySum(array, 0, n-1) ;
B=clock();
K1=clock();
qq=Kedane(array, n);
K2=clock();
cout<< n << "," << (double)t2-(double)t1 << ","<<(double)B-(double)A << ","<<(double)K2-(double)K1<<endl;
}
return 0;
}
101,0,0,0
201,0,0,0
301,1,0,0
401,0,0,0
501,0,0,0
601,0,0,0
701,0,0,0
801,1,0,0
901,1,0,0
1001,0,0,0
1101,1,0,0
1201,1,0,0
1301,0,0,0
1401,1,0,0
1501,1,0,0
1601,2,0,0
1701,1,0,0
1801,2,0,0
1901,1,1,0
2001,1,0,0
2101,2,0,0
2201,3,0,0
2301,2,0,0
2401,3,0,0
2501,3,0,0
2601,3,0,0
2701,4,0,0
2801,4,0,0
2901,4,0,0
3001,4,0,0
3101,4,0,0
3201,5,0,0
3301,5,0,0
3401,6,0,0
3501,5,0,0
3601,6,0,0
3701,6,0,0
3801,8,0,0
3901,7,0,0
4001,8,0,0
4101,7,0,0
4201,10,1,0
4301,9,0,0
4401,8,0,0
4501,9,0,0
4601,10,0,0
4701,11,0,0
4801,11,0,0
4901,11,0,0
5001,12,0,1
5101,11,1,0
5201,13,0,0
5301,13,0,0
5401,15,0,0
5501,14,0,0
5601,16,0,0
5701,15,0,0
5801,15,1,0
5901,16,0,0
6001,17,0,0
6101,18,0,0
6201,18,0,0
6301,19,0,0
6401,21,0,0
6501,19,0,0
6601,21,1,0
6701,20,0,0
6801,22,0,0
6901,23,0,0
7001,22,0,0
7101,24,0,0
7201,26,0,0
7301,26,0,0
7401,24,1,0
7501,26,0,0
7601,27,0,0
7701,28,0,0
7801,28,0,0
7901,30,0,0
8001,29,0,0
8101,31,0,0
8201,31,1,0
8301,35,0,0
8401,33,0,0
8501,35,0,0
8601,35,1,0
8701,35,0,0
8801,36,1,0
8901,37,0,0
9001,38,0,0
9101,39,0,0
9201,41,1,0
9301,40,0,0
9401,41,0,0
9501,42,0,0
9601,45,0,0
9701,45,0,0
9801,44,0,0
9901,47,0,0
10001,47,0,0
10101,48,0,0
10201,50,0,0
10301,51,0,0
10401,50,0,0
10501,51,0,0
10601,53,0,0
10701,55,0,0
10801,54,0,0
10901,56,0,0
11001,57,0,0
11101,56,0,0
11201,60,0,0
11301,60,0,0
11401,61,1,0
11501,61,1,0
11601,63,0,0
11701,62,1,0
11801,66,1,0
11901,65,0,0
12001,68,1,0
12101,68,0,0
12201,70,0,0
12301,71,0,0
12401,72,0,0
12501,73,1,0
12601,73,1,0
12701,76,0,0
12801,77,0,0
12901,78,1,0
13001,79,1,0
13101,80,0,0
13201,83,0,0
13301,82,0,0
13401,86,0,0
13501,85,1,0
13601,86,0,0
13701,89,0,0
13801,90,0,1
13901,90,0,0
14001,91,0,0
14101,97,0,0
14201,93,0,0
14301,96,0,0
14401,99,0,0
14501,100,0,0
14601,101,0,0
14701,101,0,0
14801,103,1,0
14901,104,0,0
15001,107,0,0
15101,108,0,0
15201,109,0,0
15301,109,0,0
15401,114,0,0
15501,114,0,0
15601,115,0,0
15701,116,0,0
15801,119,0,0
15901,118,0,0
16001,124,0,0
16101,123,1,0
16201,123,1,0
16301,125,0,0
16401,127,1,0
16501,128,1,0
16601,131,0,0
16701,132,0,0
16801,134,0,0
16901,134,1,0
17001,135,1,0
17101,139,0,0
17201,139,0,0
17301,140,1,0
17401,143,0,0
17501,145,0,0
17601,147,0,0
17701,147,0,0
17801,150,1,0
17901,152,1,0
18001,153,0,0
18101,155,0,0
18201,157,0,0
18301,157,1,0
18401,160,0,0
18501,160,1,0
18601,163,1,0
18701,165,0,0
18801,169,0,0
18901,171,0,1
19001,170,1,0
19101,173,1,0
19201,178,0,0
19301,175,1,0
19401,176,1,0
19501,180,0,0
19601,180,1,0
19701,182,1,0
19801,184,0,0
19901,187,1,0
20001,188,1,0
20101,191,0,0
20201,192,1,0
20301,193,1,0
20401,195,0,0
20501,199,0,0
20601,200,0,0
20701,201,0,0
20801,209,1,0
20901,210,0,0
21001,206,0,0
21101,210,0,0
21201,210,0,0
21301,213,0,0
21401,215,1,0
21501,217,1,0
21601,218,1,0
21701,221,1,0
21801,222,1,0
21901,226,1,0
22001,225,1,0
22101,229,0,0
22201,232,0,0
22301,233,1,0
22401,234,1,0
22501,237,1,0
22601,238,0,1
22701,243,0,0
22801,242,1,0
22901,246,1,0
23001,246,0,0
23101,250,1,0
23201,250,1,0
23301,254,1,0
23401,254,0,0
23501,259,0,1
23601,260,1,0
23701,263,1,0
23801,268,0,0
23901,266,1,0
24001,271,0,0
24101,272,1,0
24201,274,1,0
24301,280,0,1
24401,279,0,0
24501,281,0,0
24601,285,0,0
24701,288,0,0
24801,289,0,0
24901,293,0,0
25001,295,1,0
25101,299,1,0
25201,299,1,0
25301,302,0,0
25401,305,1,0
25501,307,0,0
25601,310,1,0
25701,315,0,0
25801,312,1,0
25901,315,0,0
26001,320,1,0
26101,320,0,0
26201,322,0,0
26301,327,1,0
26401,329,0,0
26501,332,1,0
26601,339,1,0
26701,334,1,0
26801,337,0,0
26901,340,0,0
27001,341,1,0
27101,342,1,0
27201,347,0,0
27301,348,1,0
27401,351,1,0
27501,353,0,0
27601,356,1,0
27701,360,0,1
27801,361,1,0
27901,362,1,0
28001,366,1,0
28101,370,0,1
28201,372,0,0
28301,375,1,0
28401,377,1,0
28501,380,0,0
28601,384,1,0
28701,384,0,0
28801,388,1,0
28901,391,1,0
29001,392,1,0
29101,399,1,0
29201,399,0,0
29301,404,1,0
29401,405,0,0
29501,409,1,0
29601,412,2,0
29701,412,1,0
29801,422,1,0
29901,419,1,0
The return values from BruteForce and maxSubArraySum are never used, and this gives the compiler a lot of lattitude when it comes to optimizing them.
On my machine for example, using clang -O3 reduces the call to BruteForce to a vector copy and nothing else.
One method for forcing the evaluation of these functions is to write their results to volatile variables:
volatile int mb, md;
// ...
mb = BruteForce(array, n);
// ...
md = maxSubArraySum(array, 0, n-1);
As the variables are volatile, the value given by the right-hand side of the assignments must be stored, despite the absence of any other side-effects, which prevents the compiler from optimising the computation away.

array "not used in this scope". Why?

I'm getting an error saying that "adjacencymatrix' was not used in this scope" right at the end of main (before the function makebond at the end) (the commented line 112 "BROKEN LINE"). Why? Sorry about this being simple. I'm compiling with g++ ($ g++ a.c -o f).
Heres the code:
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
using namespace std;
#define PI 3.1415926535897932384626433832795
#define sqr(x) ((x)*(x))
#define count 500
double density;
double volume;
int N;
double beta = 0.1;
double R = 5;
double rob = 1;
int dimension = 2;
double eps=0.1; // Increase in density
double mindensity = 0; // Minimum density
double maxdensity = 8; // max.dens (scaled for the sake of ensuring int()
int makebond(double x);
int main(){
srand(time(0));
for (int rho=mindensity;rho<=(maxdensity/eps);density++){
N = floor(density*volume);
double nodepositions[N][dimension];
// Place nodes in volume (square side L, circle volume *R and obstacle *rob)
for (int i=0;i<N;i++){
int L = 5;
double distancefromorigin;
double x = (L*(rand()/RAND_MAX))-(L/2);
double y = (L*(rand()/RAND_MAX))-(L/2);
distancefromorigin = sqrt((x*x)+(y*y));
if(distancefromorigin<R){
if(distancefromorigin>rob){
nodepositions[i][0] = x;
nodepositions[i][1] = y;
}
}
}
double adjacencymatrix [N][N];
double itzhak; //distance of node 1 from the centre
double isaac; //distance of node 2 from the centre
double vivaldi; //distance between node 1 and node 2
double phi; // a function of the above 3 doubles (see later usage)
double rubicon; // maximum distance nodes within the icecream can be apart before becoming visually indepdendent
double maxtheta; // "in the icecream" means theta < maxtheta
double theta; // angular displacement of inner point from the line bisecting the icecream
// Create adjacency matrix (note alternative implementation using incidence lists)
for (int i=0;i<N;i++){
for (int j=0;j<N;j++){
double x0 = nodepositions[i][0];
double y0 = nodepositions[i][1];
double x1 = nodepositions[j][0];
double y1 = nodepositions[j][1];
itzhak = sqrt(sqr(x0) + sqr(y0));
isaac = sqrt(sqr(x1) + sqr(y1));
vivaldi = sqrt(sqr(x0-x1) + sqr(y0-y1));
phi = ((sqr(vivaldi)+sqr(itzhak)-sqr(isaac))/(2*vivaldi*itzhak));
rubicon = ((itzhak*phi) - sqrt((sqr(rob)) - ((sqr(itzhak))*(1-sqr(phi)))));
maxtheta = asin(rob/itzhak);
theta = acos(phi);
if (x0==x1 && y0==y1){
adjacencymatrix[i][j] = 0;
}
else{
if (isaac<itzhak && theta<maxtheta) {
if (vivaldi>rubicon){
adjacencymatrix[i][j] = 0;}
else {
adjacencymatrix[i][j] = makebond(vivaldi);}
}
else{adjacencymatrix[i][j] = makebond(vivaldi);}
}
}
}
}
FILE *datafc1;
datafc1 = fopen("matrix.dat", "w");
for (int ii = 0; ii<N; ii++){
for (int jj = 0; jj<N; jj++){
int aaa;
aaa = adjacencymatrix[ii][jj];///////////////*******BROKEN LINE******
fprintf(datafc1,"%i", aaa);
}
}
fclose(datafc1);
return 0;
}
/////////////////////////////
////////////////
/////// --End Main--
////////////////
////////////////////////////
int makebond(double x){
// This function takes in the euc. dist. between two nodes and draws a link with prob. H(r)
double randomnumber = (rand()/RAND_MAX); // Random number between 0 and 1
double hr = exp(-beta*sqr(x));// ***Connection function***
int a = 1; // Number to be put into adjacency matrix
if (randomnumber > hr){
a = 0;
}
return a; //Returns 0 or 1 depending on prob. dist.
}
adjacencymatrix is declared in your first for loop, so it's out of scope before the last spot you're using it, in the print-out loop at the bottom.
In addition, you have a useless using namespace std; line. Your code doesn't include any headers that contain std namespace symbols.
Your code in line 57:
double adjacencymatrix [N][N];
is inside a for loop, outside that loop, adjacencymatrix is undefined.
You matrix is defined in the for loop on line 11. Therefore it is out of scope on line 112.
FILE *datafc1;
datafc1 = fopen("matrix.dat", "w");
for (int ii = 0; ii<N; ii++){
for (int jj = 0; jj<N; jj++){
int aaa;
//error adjacencymatrix is declared in your first for loop
aaa = adjacencymatrix[ii][jj];///////////////*******BROKEN LINE******
fprintf(datafc1,"%i", aaa);
}
}