OpenMP more threads drastically increases total time - c++

I am writing a Pi estimator for class. The point is to estimate Pi using OpenMP and to analyze the speedup provided by using three separate schedules, static, dynamic and guided. However my total time skyrockets when I add threads. My total time for 1 thread is around 21 seconds, and for 2 threads is roughly 145 seconds. I can't figure out why. Here is my code:
#include <stdio.h>
#include <time.h>
#include <ctime>
#include <omp.h>
#include <assert.h>
#include <cstdlib>
#include <string>
#include <iostream>
#include <stdlib.h>
using namespace std;
void computePi(int, int);
int main(int argc, char *argv[])
{
cout.precision(20);
omp_set_dynamic(0);
int i, p;
int n;
// loop {number of iterations} [number of threads]
if (argc > 1)
{
n = atoll(argv[1]);
p = atoi(argv[2]);
}
else
{
n = 10000000;
p = 8;
}
printf("Debug: dart throws = %d \n", n);
printf("Debug: number of requested threads = %d\n", p);
omp_set_num_threads(p);
double time = omp_get_wtime();
//dispArray(a,n);
computePi(n, p);
time = omp_get_wtime() - time;
printf("Total time = %f seconds \n ", time);
return 0;
}
void computePi(int n, int p) {
omp_set_num_threads(p);
int i;
int hits = 0;
srand(time(NULL));
double timeStatic = omp_get_wtime();
#pragma omp parallel for shared(i) schedule(static) reduction(+:hits)
for (i = 0; i<n; i++)
{
float r, x, xdiff, y, ydiff;
x = (float)rand() / (float)RAND_MAX;
y = (float)rand() / (float)RAND_MAX;
if (y > .50)
{
ydiff = y - .50;
}
else
{
ydiff = .50 - y;
}
if (x > .50)
{
xdiff = x - .50;
}
else
{
xdiff = .50 - x;
}
xdiff *= xdiff;
ydiff *= ydiff;
r = sqrt(ydiff + xdiff);
if (r <= .50)
{
hits += 1;
}
}
timeStatic = omp_get_wtime() - timeStatic;
float percentage;
percentage = (float)hits / (float)n;
cout << "Static Loop" << endl;
cout << "Hit Percentage: " << percentage * 100 << "%" << endl;
cout << "Pi Estimation: " << percentage * 4 << endl;
cout << "Time Taken: " << timeStatic << endl << endl;
hits = 0;
double timeDynamic = omp_get_wtime();
#pragma omp parallel for shared(i) schedule(dynamic) reduction(+:hits)
for (i = 0; i<n; i++)
{
float r, x, xdiff, y, ydiff;
x = (float)rand() / (float)RAND_MAX;
y = (float)rand() / (float)RAND_MAX;
if (y > .50)
{
ydiff = y - .50;
}
else
{
ydiff = .50 - y;
}
if (x > .50)
{
xdiff = x - .50;
}
else
{
xdiff = .50 - x;
}
xdiff *= xdiff;
ydiff *= ydiff;
r = sqrt(ydiff + xdiff);
if (r <= .50)
{
hits += 1;
}
}
timeDynamic = omp_get_wtime() - timeDynamic;
percentage = (float)hits / (float)n;
cout << "Dynamic Loop" << endl;
cout << "Hit Percentage: " << percentage * 100 << "%" << endl;
cout << "Pi Estimation: " << percentage * 4 << endl;
cout << "Time Taken: " << timeDynamic << endl << endl;
hits = 0;
double timeGuided = omp_get_wtime();
#pragma omp parallel for shared(i) schedule(guided) reduction(+:hits)
for (i = 0; i<n; i++)
{
float r, x, xdiff, y, ydiff;
x = (float)rand() / (float)RAND_MAX;
y = (float)rand() / (float)RAND_MAX;
if (y > .50)
{
ydiff = y - .50;
}
else
{
ydiff = .50 - y;
}
if (x > .50)
{
xdiff = x - .50;
}
else
{
xdiff = .50 - x;
}
xdiff *= xdiff;
ydiff *= ydiff;
r = sqrt(ydiff + xdiff);
if (r <= .50)
{
hits += 1;
}
}
timeGuided = omp_get_wtime() - timeGuided;
percentage = (float)hits / (float)n;
cout << "Guided Loop" << endl;
cout << "Hit Percentage: " << percentage * 100 << "%" << endl;
cout << "Pi Estimation: " << percentage * 4 << endl;
cout << "Time Taken: " << timeGuided << endl << endl;
return;
}
Any help would be appreciated.

Related

Logistic Regression Returning Wrong Prediction

I'm trying to implement logistic regression in C++, but the predictions I'm getting are not even close to what I am expecting. I'm not sure if there is an error in my understanding of logistic regression or the code.
I have reviewed the algorithms and messed with the learning rate, but the results are very inconsistent.
double theta[4] = {0,0,0,0};
double x[2][3] = {
{1,1,1},
{9,9,9},
};
double y[2] = {0,1};
//prediction data
double test_x[1][3] = {
{9,9,9},
};
int test_m = sizeof(test_x) / sizeof(test_x[0]);
int m = sizeof(x) / sizeof(x[0]);
int n = sizeof(theta) / sizeof(theta[0]);
int xn = n - 1;
struct Logistic
{
double sigmoid(double total)
{
double e = 2.71828;
double sigmoid_x = 1 / (1 + pow(e, -total));
return sigmoid_x;
}
double h(int x_row)
{
double total = theta[0] * 1;
for(int c1 = 0; c1 < xn; ++c1)
{
total += theta[c1 + 1] * x[x_row][c1];
}
double final_total = sigmoid(total);
//cout << "final total: " << final_total;
return final_total;
}
double cost()
{
double hyp;
double temp_y;
double error;
for(int c1 = 0; c1 < m; ++c1)
{
//passes row of x to h to calculate sigmoid(xi * thetai)
hyp = h(c1);
temp_y = y[c1];
error += temp_y * log(hyp) + (1 - temp_y) * log(1 - hyp);
}// 1 / m
double final_error = -.5 * error;
return final_error;
}
void gradient_descent()
{
double alpha = .01;
for(int c1 = 0; c1 < n; ++c1)
{
double error = cost();
cout << "final error: " << error << "\n";
theta[c1] = theta[c1] - alpha * error;
cout << "theta: " << c1 << " " << theta[c1] << "\n";
}
}
void train()
{
for(int epoch = 0; epoch <= 10; ++epoch)
{
gradient_descent();
cout << "epoch: " << epoch << "\n";
}
}
vector<double> predict()
{
double temp_total;
double total;
vector<double> final_total;
//hypothesis equivalent function
temp_total = theta[0] * 1;
for(int c1 = 0; c1 < test_m; ++c1)
{
for(int c2 = 0; c2 < xn; ++c2)
{
temp_total += theta[c2 + 1] * test_x[c1][c2];
}
total = sigmoid(temp_total);
//cout << "final total: " << final_total;
final_total.push_back(total);
}
return final_total;
}
};
int main()
{
Logistic test;
test.train();
vector<double> prediction = test.predict();
for(int c1 = 0; c1 < test_m; ++c1)
{
cout << "prediction: " << prediction[c1] << "\n";
}
}
start with a very small learning rate wither larger iteration number at try. Haven`t tested ur code. But I guess the cost/error/energy jumps from hump to hump.
Somewhat unrelated to your question, but rather than computing e^-total using pow, use exp instead (it's a hell of a lot faster!). Also there is no need to make the sigmoid function a member func, make it static or just a normal C func (it doesn't require any member variable from your struct).
static double sigmoid(double total)
{
return 1.0 / (1.0 + exp(-total));
}

C++/OpenMP error in rand

That's my code that should count Pi with monte Carlo method. We give in input: thread_count - number of processor threads, n - number of random generated points.
Here's my code below
using namespace std;
int main (int argc, char* argv[]) {
/*sprawdzanie danych: thread_count - liczba wątków, n - liczba punktów*/
if (argc != 3) {
cout << "Co Ty piszesz!. Ma być: ./pi <thread_count> <n>" << endl;
exit(-1);
}
/*Sprawiamy by liczby były całkowite*/
int thread_count = atoi(argv[1]);
long n = atoi(argv[2]);
/*Test wartosci liczb*/
if (thread_count <= 0 || n <= 0) {
cout << "Co Ty piszesz!. Ma byc większe od 0" << endl;
exit(-1);
}
unsigned ziarno;
double x, y;
long int Ustrzelone = 0;
double Start = omp_get_wtime();
#pragma omp parallel default(none) private(x,y, ziarno) firstprivate(n) reduction(+:Ustrzelone) num_threads(thread_count)
{
ziarno = 25231 + 16*omp_get_thread_num();
#pragma omp for schedule(dynamic)
for(long i = 0; i <= n; i++) {
x = (double) rand_r(&ziarno)/RAND_MAX * 2 - 1;
y = (double) rand_r(&ziarno)/RAND_MAX * 2 - 1;
if ((x*x) + (y*y) <= 1.0) {
Ustrzelone += 1;
}
}
}
double Stop = omp_get_wtime();
double czas_obliczen = 1000 * (Stop - Start);
/*Ustawienie ilosci liczb po przecinku i wyswietlenie wyniku*/
cout.precision(15);
//double pi = (double) 4*Ustrzelone/n;
//cout << "Pi wynosi " << pi << endl;
cout << czas_obliczen << endl;
return 0;
In output I get error like that:
[Error] 'rand_r' was not declared in this scope
Not have much time left for this so hope you can bring me to some conclusion how to make it better.
Thank in advance.
well since you are using namespace std; (which is not recommended), perhaps try ::rand_r to help it find the right implementation...

Eigen remove for in plane line(s) intersection method

I'm trying to do a plane line(s) intersection method using Eigen. The code works, but I want to remove the for loop (used to process each point). The goal is to make this method run as fast as possible.
Input: planeNormal, planePoint, linesP0 (3xN), linesp1 (3xN)
Output: intersetcionsPoints (3xN)
Number of points is N
Complete code:
void PlaneLineIntersect(const Vector3f &planeNormal, const Vector3f &planeP0, const Matrix3Xf &linesP0, const Matrix3Xf &linesP1, Matrix3Xf &I, float &t)
{
Matrix3Xf p0l0(linesP0.rows(), linesP0.cols());
// ################################
// ## HOW TO REMOVE THIS FOR (colwise?) ##
for (int i = 0; i < linesP0.cols(); i++)
{
float denom = planeNormal.dot(linesP1.col(i) - linesP0.col(i));
if (denom > 1e-6 || denom < -1e-6) // há uma interseção
{
Vector3f p0l0 = planeP0 - linesP0.col(i);
t = p0l0.dot(planeNormal) / denom;
p0l0 = (t * (linesP1.col(i) - linesP0.col(i))) + linesP0.col(i);
I.col(i) = p0l0;
}
}
}
int main(cli::array<System::String ^> ^args)
{
int n = 1000;
Eigen::Vector3f planeNormal = Eigen::Vector3f(0, 0, -1);
Eigen::Vector3f planeP0 = Eigen::Vector3f(0, 0, 7);
Eigen::Matrix3Xf linesP0 = Matrix3Xf::Random(3, n);
Eigen::Matrix3Xf linesP1 = Matrix3Xf::Random(3, n);
Eigen::Matrix3Xf I = Matrix3Xf::Zero(3, n);
float t;
high_resolution_clock::time_point t1 = high_resolution_clock::now();
#pragma omp parallel for
for (int i = 0; i < 100; i++)
PlaneLineIntersect(planeNormal, planeP0, linesP0, linesP1, I, t);
high_resolution_clock::time_point t2 = high_resolution_clock::now();
double duration = (double) (duration_cast<microseconds>(t2 - t1).count());
cout << "\nTempo total = " << duration / 1000 << " ms" << endl;
cout << "\nTempo / ponto = " << duration / (n * 100) << " us\n" << endl;
system("pause");
return 0;
}

Molecular dynamic, velocity verlet: Kinetic energy divergence

I'm trying to write a simple MD program in C/C++ (I'm used to C but I'm trying to learning C++, so my code is a little "mix"... I know that this is suboptimal and I will move to full C++ as soon as I fully understand it).
Everything seems to run but I have divergences in Kinetic energy, the system does not thermalize and temperature (prop to K) goes from order(10°K) to order(10000°K) in a single step.
I'm working with a low time-step of 0.002 (total time of simulation: 30) so I should not have this enormous error...
This is my code, if something is not clear I can try to explain it better
int main(){
...
int n, t, m, i;
double r, K, U, E,P, totalE, temperature, d, x,y,z, temp;
...
double data[5][PARTICELLE], vel[3][PARTICELLE],dataNew[3][PARTICELLE]; //0,1,2 are x,y,z. 3, 4 for data are Energy and Pressure
double force[3][PARTICELLE], forceNew[3][PARTICELLE];
double velQ[PARTICELLE]; //square velocity
ofstream out(OUTDATA);
//inizio MD
for(t=0; t<PASSI; t++){
//inizialization
K=0;
U=0;
E=0;
P=0;
fill(data[3], data[3]+PARTICELLE, 0); //E=0 for each particle
fill(data[4], data[4]+PARTICELLE, 0);
fill(velQ, velQ+PARTICELLE, 0);
for(i=0; i<3; i++){
fill(force[i], force[i]+PARTICELLE, 0);
fill(forceNew[i], forceNew[i]+PARTICELLE, 0);
}
for(n=0; n<PARTICELLE; n++) { //for on the n_ particle. A step is a move of n=PARTICELLE particles
for (i = 0; i < 3; i++) { //compute vSquare
velQ[n] += vel[i][n] * vel[i][n];
}
K += 0.5 * MASSA * velQ[n]; //compute Kinetic Energy
for(m=0; m<PARTICELLE; m++){ //loop on m!=n to compute F, E, P
if(m!=n){
r=0;
for(i=0; i<3; i++){ //calculation of radius and x,y,z
d = data[i][m] - data[i][n];
d = d - (NINT(d / LATO) * LATO);
if(i==0)x=d;
if(i==1)y=d;
if(i==2)z=d;
r += d * d;
}
//if (t<2) cout << "x y z" << x << " " << y << " " << z << endl;
r=sqrt(r);
if (r < R) {
data[3][n] += energy(r); //update Energy of n
for(i=0; i<3; i++){
if(i==0)temp=x;
if(i==1)temp=y;
if(i==2)temp=z;
force[i][n]+=forza(r,temp); //compute force (cartesian components)
//if(t<2)cout << "force " <<n << " " << m << " "<< force[i][n] << endl;
}
if (m < n)data[4][n] += (-energy(r) * (1 + r)); //pressure
}
}
}
U+=data[3][n]; //total potential energy
P+=data[4][n]; //total pressure
for (i = 0; i < 3; i++) { //Verlet update, part 1
dataNew[i][n] = data[i][n] + vel[i][n] * DeltaT + 0.5 * force[i][n] * DeltaT * DeltaT / MASSA;
}
for(m=0; m<PARTICELLE; m++){ //update force
if(m!=n){
r=0;
for(i=0; i<3; i++){
d = data[i][m] - dataNew[i][n];
d = d - (NINT(d / LATO) * LATO);
if(i==0)x=d;
if(i==1)y=d;
if(i==2)z=d;
r += d * d;
}
r=sqrt(r);
if (r < R) {
for(i=0; i<3; i++) {
if (i == 0)temp = x;
if (i == 1)temp = y;
if (i == 2)temp = z;
forceNew[i][n] += forza(r, temp);
}
}
}
}
for(i=0; i<3; i++){ //new position and Verlet part 2
data[i][n]=dataNew[i][n];
vel[i][n]=vel[i][n] + DeltaT * 0.5*(forceNew[i][n] + force[i][n]) / MASSA;
}
}
totalE=U+K; //total energy
temperature = 2*K/(PARTICELLE*3);
out << t*DeltaT << " " << U << " " << P << " " << totalE << " " << temperature << endl;
}
out.close();
return 0;
}
where my system is under a potential e^-r/r, so I have:
double energy( double r){
return (A*SIGMA*exp(-r/SIGMA)/r);
}
double forza(double r, double h){ //h is for x,y,z
double bubba;
bubba= (A*SIGMA*(exp(-r)*h*(r+1)/(r*r*r)));
return bubba;
}
Thanks for any help. I'm working on this code since April and still I have no solution...
edit: to be clearer: CAPITAL terms and DeltaT are values defined in DEFINE

Harmonic Oscillator not getting oscillations

I am writing a harmonic oscillator program , but it does an exponential decay instead and I am not sure why. I have the code below. I use F= -k x and F = m a
I assume m = 1 and k = 1, so a = -x
So my equations to find velocity and position are
v(t) = v(t-dt) - x(t-dt) * dt
x(t) = x(t-dt) + v(t-dt) * dt
I am not sure what I am doing wrong
The code associated with this is
#include <iostream>
#include <fstream>
using namespace std;
double updateX(double intialV, double intialX, double step);
double updateV(double intialV, double intialX, double step);
int main()
{
long double position[2];long double velocity[2];
position[0] = 0.0; velocity[0] = 1.0; double time = 0.0; double step = .1;
ofstream outputFile;
outputFile.open("velo1.dat");
outputFile << time << " " << position[0] << " " << velocity[0] << "\n";
for(int i = 1; i < 50; i++)
{
time = i * step;
velocity[1] = updateV(velocity[0], position[0], step);
position[1] = updateX(velocity[0], position[0], step);
outputFile << time << " " << position[1] << " " << velocity[1] << "\n";
velocity[0] = velocity[1];
position[0] = velocity[1];
}
return 0;
outputFile.close();
}
double updateX(double intialV, double intialX, double step)
{
double stuff =(intialX + intialV * step);
return stuff;
}
double updateV(double intialV, double intialX, double step)
{
double stuff = (intialV - intialX * step) ;
return stuff;
}