Implementation of Euler Totient function in OpenMP - c++

I am new to OpenMP and I am trying to implement Euler Totient function in OpenMP.
This is my first attempt to code in OpenMP. My goal is to implement it in parallel (fastest, if possible) and then in sequential. After that I calculate the speedup and efficiency.
Below is my code:
/*
============================================================================
Name : 55390_ass3.cpp
Author : Kamil Kamili
Version :
Copyright : Your copyright notice
Description : Euler’s Totient function φ(n) in OpenMP
============================================================================
*/
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
using namespace std;
int eulerTotient(int number)
{
int sNumber = number;
for(int i = 2; i*i <= number; ++i)
{
if(number % i == 0)
{
while(number % i == 0)
{
number /= i;
}
}
}
if(number > 1)
{
sNumber -= sNumber / number;
}
return sNumber;
}
int main (int argc, char *argv[])
{
int nTheads = omp_get_num_threads();
int nMaxThreads = omp_get_max_threads();
printf("Parallel Execution: \n");
double startTimeForParallel = omp_get_wtime();
#pragma omp parallel for
for(int i = 2; i < 10000; i++)
{
printf("φ(%d) = %d\n", i, eulerTotient(i));
}
double parallelTime = omp_get_wtime() - startTimeForParallel;
printf("Sequential Execution: \n");
double startTimeForSequential = omp_get_wtime();
for(int i = 2; i < 10000; i++)
{
printf("φ(%d) = %d\n", i, eulerTotient(i));
}
double sequentialTime = omp_get_wtime() - startTimeForSequential;
double speedUp = sequentialTime/parallelTime;
printf("Parallel Execution Time: ",parallelTime);
printf("Sequential Execution Time: " << sequentialTime);
printf("SpeedUp: "<<speedUp);
return 0;
}
But it does not calculate 'Parallel Time'. Any idea where I am going wrong?
So I edited my code a bit here and there after doing some research. I think I am not using the function omp_get_wtime() correctly.
By Edited Code:
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
double omp_get_wtime(void);
int eulerTotient(int number)
{
int sNumber = number;
for(int i = 2; i*i <= number; ++i)
{
if(number % i == 0)
{
while(number % i == 0)
{
number /= i;
}
}
}
if(number > 1)
{
sNumber -= sNumber / number;
}
return sNumber;
}
int main (int argc, char *argv[])
{
int nTheads = omp_get_num_threads();
int nMaxThreads = omp_get_max_threads();
printf("Parallel Execution: %f\n");
double parallelTime, endTimeForParallel;
double startTimeForParallel = omp_get_wtime();
#pragma omp parallel for
for(int i = 2; i < 10000; i++)
{
printf("O(%d) = %d\n", i, eulerTotient(i));
}
endTimeForParallel = omp_get_wtime();
parallelTime = startTimeForParallel = endTimeForParallel;
printf("Sequential Execution: \n");
double sequentialTime,endTimeForSequential,startTimeForSequential = omp_get_wtime();
for(int i = 2; i < 10000; i++)
{
printf("O(%d) = %d\n", i, eulerTotient(i));
}
endTimeForSequential = omp_get_wtime();
sequentialTime = startTimeForSequential - endTimeForSequential;
printf("Parallel Execution Time: %f seconds\n", parallelTime);
printf("Sequential Execution Time: %f seconds\n", sequentialTime);
double speedUp = sequentialTime/parallelTime;
printf("SpeedUp: %f\n\n",speedUp);
return 0;
}
The output is simply bizarre. Anyways, here it is:
Parallel Execution Time: 1477476938.765000 seconds
Sequential Execution Time: -0.099000 seconds
SpeedUp: -0.000000
So a very noob mistake: Fixed it
As stated by #Gilles. I fixed my code. Here it is:
/*
============================================================================
Name : 55390_ass3.c
Author : Kamil Kamili
Version :
Copyright : Your copyright notice
Description : Euler Totient's function in C
============================================================================
*/
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
double omp_get_wtime(void);
int eulerTotient(int number)
{
int sNumber = number;
for(int i = 2; i*i <= number; ++i)
{
if(number % i == 0)
{
while(number % i == 0)
{
number /= i;
}
}
}
if(number > 1)
{
sNumber -= sNumber / number;
}
return sNumber;
}
int main (int argc, char *argv[])
{
int nTheads = omp_get_num_threads();
int nMaxThreads = omp_get_max_threads();
printf("Parallel Execution: %f\n");
double parallelTime, endTimeForParallel;
double startTimeForParallel = omp_get_wtime();
#pragma omp parallel for
for(int i = 2; i < 10000; i++)
{
printf("O(%d) = %d\n", i, eulerTotient(i));
}
endTimeForParallel = omp_get_wtime();
parallelTime = endTimeForParallel - startTimeForParallel;
printf("Sequential Execution: \n");
double sequentialTime,endTimeForSequential,startTimeForSequential = omp_get_wtime();
for(int i = 2; i < 10000; i++)
{
printf("O(%d) = %d\n", i, eulerTotient(i));
}
endTimeForSequential = omp_get_wtime();
sequentialTime = endTimeForSequential - startTimeForSequential;
printf("Parallel Execution Time: %f seconds\n", parallelTime);
printf("Sequential Execution Time: %f seconds\n", sequentialTime);
double speedUp = sequentialTime/parallelTime;
printf("SpeedUp: %f\n\n",speedUp);
return 0;
}
And the output:
Parallel Execution Time: 0.014000 seconds
Sequential Execution Time: 0.095000 seconds
SpeedUp: 6.785746
I still think something is wrong, unless someone tells me otherwise.

Related

Segmentation fault , caused by wrong interval (2D array)

I wanted to learn how threads work, and I tried to make a program, which would use 2 threads, to copy a picture (just to test my newly acquired threading skills) . But I bumped into an error, probably because my interval (created by the interval function) is only working ( I believe) with one dimensional arrays.How can I change my program , to correctly create intervals , which work on 2 dimensional arrays, such as pictures ?
#include <iostream>
#include <vector>
#include <time.h>
#include <thread>
#include <mutex>
#include <png++/png.hpp>
std::mutex my_mutex;
std::vector<int> interval(int max, int n_threads)
{
std::vector<int> intervallum;
int ugras = max / n_threads;
int maradek = max % n_threads;
int n1 = 0;
int n2;
intervallum.push_back(n1);
for (int i = 0; i < n_threads; i++)
{
n2 = n1 + ugras;
if (i == n_threads - 1)
n2 += maradek;
intervallum.push_back(n2);
n1 = n2;
}
return intervallum;
}
void create_image(png::image<png::rgb_pixel> image, png::image<png::rgb_pixel> new_image, int start, int end)
{
std::lock_guard<std::mutex> lock(my_mutex);
for (int i = start; i < end; i++)
for (int j = start; j < end; j++)
{
new_image[i][j].red = image[i][j].red;
new_image[i][j].blue = image[i][j].blue;
new_image[i][j].green = image[i][j].green;
}
}
int main()
{
png::image<png::rgb_pixel> png_image("mandel.png");
int image_size = png_image.get_width() * png_image.get_height();
png::image<png::rgb_pixel> new_image(png_image.get_width(), png_image.get_height());
time_t start, end;
time(&start);
int size = 2;
std::vector<std::thread> threads;
std::vector<int> stuff_interval = interval(image_size, size);
for (int i = 0; i < size-1; i++)
threads.push_back(std::thread(create_image, std::ref(png_image), std::ref(new_image), stuff_interval[i], stuff_interval[i + 1]));
for (auto& i : threads)
i.join();
create_image(png_image,new_image,stuff_interval[size-2],stuff_interval[size-1]);
new_image.write("test.png");
time(&end);
std::cout << (start - end) << std::endl;
return 0;
}
Okay , I found a way around it (this way I am not getting segmentation error, but it does not copy the image correctly, the new image is fully black, here is the code :
EDIT : seems like, I was passing wrong the pictures, that is the reason why the picture was black.
#include <iostream>
#include <vector>
#include <time.h>
#include <thread>
#include <mutex>
#include <png++/png.hpp>
std::mutex my_mutex;
std::vector<int> interval(int max, int n_threads)
{
std::vector<int> intervallum;
int ugras = max / n_threads;
int maradek = max % n_threads;
int n1 = 0;
int n2;
intervallum.push_back(n1);
for (int i = 0; i < n_threads; i++)
{
n2 = n1 + ugras;
if (i == n_threads - 1)
n2 += maradek;
intervallum.push_back(n2);
n1 = n2;
}
return intervallum;
}
void create_image(png::image<png::rgb_pixel>& image, png::image<png::rgb_pixel>& new_image, int start, int end)
{
std::lock_guard<std::mutex> lock(my_mutex);
for (int i = start; i < end; i++)
for (int j = 0; j < image.get_height(); j++)
{
new_image[i][j].red = image[i][j].red;
new_image[i][j].blue = image[i][j].blue;
new_image[i][j].green = image[i][j].green;
}
}
int main()
{
png::image<png::rgb_pixel> png_image("mandel.png");
int image_size = png_image.get_width() * png_image.get_height();
png::image<png::rgb_pixel> new_image(png_image.get_width(), png_image.get_height());
time_t start, end;
time(&start);
int size = 2;
std::vector<std::thread> threads;
std::vector<int> stuff_interval = interval(png_image.get_width(), size);
new_image.write("test2.png");
for (int i = 0; i < size - 1; i++)
threads.push_back(std::thread(create_image, std::ref(png_image), std::ref(new_image), stuff_interval[i], stuff_interval[i + 1]));
for (auto &i : threads)
i.join();
create_image(std::ref(png_image), std::ref(new_image), stuff_interval[size - 1], stuff_interval[size]);
new_image.write("test.png");
time(&end);
std::cout << (start - end) << std::endl;
return 0;
}

Weird crashing program while debug runs smoothly (Eclipse C++)

I'm writing a program for my algorithmic math class at university and I'm using Win 7 (x64), Eclipse Oxygen.1a Release (4.7.1a) with MinGW 6.3.0.
Whenever I build and run the program it crashes with windows claiming 'Abgabe3.exe stopped working' but when trying to find the problem using the debugger and breakpoints I step trough the whole program and it finishes without errors...
I stripped everything not used by the problematic function and copied everything into a seperate file and the exact problem occurs.
Maybe somebody has a clue what happened at my side. ^^
#include <math.h> /* pow, sqrt */
#include <iostream> /* cin, cout */
#include <new> /* new */
#include <string> /* string */
#include <stdlib.h> /* srand, rand */
#include <time.h> /* time */
using namespace std;
void NORM(double* res, double* x, int n){
res[0] = 0.0;
for(int i = 0; i < n; i++){
res[0] += pow(x[i], 2);
}
res[0] = sqrt(res[0]);
}
void initRand(double* x, int n){
srand (time(NULL) * rand());
for(int i = 0; i < n; i++){
x[i] = (((double) rand()) / ((double) RAND_MAX));
}
}
void createArray(double* &x, int n){
if (n > 0){
x = new double[n];
initRand(x, n);
}
}
void printArray(double* x, int n){
if (x != NULL){
cout<<"(\n";
for(int i = 0; i < n; i++){
if(i+1 == n) cout<<x[i];
else if ((i % 5) == 0) cout<<x[i];
else if ( ((i+1) % 5) == 0 ){
cout<<", "<<x[i]<<"\n";
}
else {
cout<<", "<<x[i];
}
}
cout<<"\n)\n";
}
else cout<<"\nError: pointer = NULL\n";
}
unsigned long long int bin(unsigned int n, unsigned int k){
unsigned long long res = 1;
if(k == 0) return 1;
else if( n >= k){
for(unsigned long long int i = 1; i <= k; i++){
res *= (n + 1 - i) / i;
}
}
else return 0;
return res;
}
void newArray(double** x, unsigned int v, unsigned int n){
for(unsigned int i = 0; i < v; i++){
double* ptr = x[i];
createArray(ptr,n);
x[i] = ptr;
}
}
void experiment(double** vektorArray){
unsigned int n = 10, v = 20;
cout<<"Dimension n = "<<n<<"\nAnzahl Versuche v = "<<v<<endl;
//Erstellen der Vektoren
cout<<"Erstellen - starte\n";
vektorArray = new double*[n];
newArray(vektorArray, v, n);
cout<<"Erstellen - fertig\n";
for(unsigned int i = 0; i < v; i++){
if(i%10 == 0) printArray(vektorArray[i], n);
}
}
int main(int argc, char** argv){
double** vektorArray = NULL;
experiment(vektorArray);
return 0;
}
vektorArray = new double*[n];
created an array of size n, but
void newArray(double** x, unsigned int v, unsigned int n)
{
for (unsigned int i = 0; i < v; i++)
{
double* ptr = x[i];
createArray(ptr, n);
x[i] = ptr;
}
}
and
for (unsigned int i = 0; i < v; i++)
{
if (i % 10 == 0)
printArray(vektorArray[i], n);
}
index that array with v. Looks like you got your variables crossed. Strongly recommend giving variables better, more descriptive names to help make this more obvious.

Declaring array as a shared variable in pragma parallel directive and stabilizing the code

I have been trying to parallelize computing the sum value of series using certain number of terms to the processors using block allocation.
In this program, I am generating arithmetic series and want to pass array as a shared variable in the pragma and trying to restructure the pragma parallel directive.
I am new to OPENMP-C. Kindly help me how to insert array value as a shared variable and stabilize the code. I am attaching the code below
#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
int main (int argc, char *argv[])
{
int rank, comm_sz;
int number, i, first, difference, global_sum1, global_sum, nprocs, step, local_sum1, local_n;
int* a;
int BLOCK_LOW, BLOCK_HIGH;
double t0, t1;
comm_sz = atoi(argv[1]);
first = atoi(argv[2]);
difference = atoi(argv[3]);
number = atoi(argv[4]);
omp_set_num_threads (comm_sz);
rank = omp_get_thread_num();
a = (int*) malloc (n*sizeof(int));
printf("comm_sz=%d, first=%d, difference=%d, number of terms=%d\n",comm_sz, first, difference, number);
for(i=1; i <= number; i++){
a[i-1] = first + (i-1)*difference;
printf("a[%d]=%d\n",i-1,a[i]);
}
for(i=0; i < number; i++){
printf("a[%d]=%d\n",i,a[i]);}
t0 = omp_get_wtime();
#pragma omp parallel omp_set_num_threads(comm_sz, number, comm_sz, first, difference, global_sum1)
{
BLOCK_LOW = (rank * number)/comm_sz;
BLOCK_HIGH = ((rank+1) * number)/comm_sz;
#pragma omp parallel while private(i, local_sum1)
//int local_sum1 = 0;
i=BLOCK_LOW;
while( i < BLOCK_HIGH )
{
printf("%d, %d\n",BLOCK_LOW,BLOCK_HIGH);
local_sum1 = local_sum1 + a[i];
i++;
}
//global_sum1 = global_sum1 + local_sum1;
#pragma omp while reduction(+:sum1)
i=0;
for (i < comm_sz) {
global_sum1 = global_sum1 + local_sum1;
i++;
}
}
step = 2*first + (n-1)*difference;
sum = 0.5*n*step;
printf("sum is %d\n", global_sum );
t1 = omp_get_wtime();
printf("Estimate of pi: %7.5f\n", global_sum1);
printf("Time: %7.2f\n", t1-t0);
}
There are several mistakes in your code. I've tried to infer what you would like to do. So, I have rewritten your code according to my understanding.
Here is my suggestion:
int main (int argc, char *argv[])
{
int comm_sz, number, i, first, difference, global_sum, step;
int* a;
double t0, t1, sum;
comm_sz = atoi(argv[1]);
first = atoi(argv[2]);
difference = atoi(argv[3]);
number = atoi(argv[4]);
omp_set_num_threads (comm_sz);
a = (int*) malloc (number*sizeof(int));
printf("comm_sz=%d, first=%d, difference=%d, number of terms=%d\n",comm_sz, first, difference, number);
for(i=0; i < number; i++){
a[i] = first + (i)*difference;
printf("a[%d]=%d\n",i,a[i]);
}
t0 = omp_get_wtime();
global_sum = 0;
#pragma omp parallel for private(i) reduction(+:global_sum)
for (i=0; i < number; i++){
global_sum += a[i];
}
step = 2*first + (number-1)*difference;
sum = 0.5*number*step;
t1 = omp_get_wtime();
printf("sum is %d\n", global_sum);
printf("Estimate of pi: %7.5f\n", sum);
printf("Time: %7.2f\n", t1-t0);
}

CPU TIME OF THREAD

How I calculate the time in each thread ? the CPU_time not work in this case , because If the process is multithreaded, the CPU time is the sum for all threads.
Pseudocode example:
PROGRAM MAIN
implicit none
REAL Times_thread1_Started,Times_thread2_Started,....
REAL Times_thread1_finiched
!$OMP PARALLEL
!$OMP DO !for each thread do :
call CPU_TIME_thread1(Times_thread1_Started)
call CPU_TIME_thread2(Times_thread2_Started)
..........
..........
!$OMP END DO
......................
......................
processing multithread
............
............
!$OMP PARALLEL
!$OMP DO !for each thread do :
call CPU_TIME_thread1(Times_thread1_finiched)
write(*,*) 'Thread1 times:',Times_thread1_finiched-Times_thread1_Started
call CPU_TIMEE_thread2(Times_thread2)
write(*,*) 'Thread1 times:',Times_thread1_finiched-Times_thread1_Started
..........
..........
!$OMP END DO
!$OMP END PARALLEL
END
in c++:
#include <stdio.h>
#include <tchar.h>
#include <windows.h>
#include <omp.h>
//---------------------------------------------------------
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
extern void UseTiming1();
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
struct Thread_time {
double* Thread;
};
class Timing {
public:
Timing();
~Timing();
void StartTiming();
void StopTiming();
Thread_time GetUserSeconds() const {
for (int i = 0; i < nthreads; i++){
time.Thread[i]=double(m_userTime[i])/ 10000000.0;
}
//delete[] m_userTime;
return (time);
}
private:
__int64* GetUserTime() const;
__int64* m_userTime;
Thread_time time;
int nthreads;
};
Timing::Timing(){
#pragma omp parallel
{
nthreads=omp_get_num_threads();
}
printf("numbzer thread = %d\n",nthreads);
m_userTime=new __int64[nthreads];
time.Thread=new double[nthreads];
}
Timing::~Timing(){
delete[] m_userTime;
delete[] time.Thread;
}
__int64* Timing::GetUserTime() const {
FILETIME creationTime;
FILETIME exitTime;
FILETIME kernelTime;
FILETIME userTime;
__int64 *CurTime;
CurTime=new __int64[nthreads];
#pragma omp parallel for private(creationTime,exitTime,kernelTime,userTime)
for (int i = 0; i < nthreads; i++){
GetThreadTimes(GetCurrentThread(),
&creationTime, &exitTime,
&kernelTime, &userTime);
CurTime[i] = userTime.dwHighDateTime;
CurTime[i] <<= 32;
CurTime[i] += userTime.dwLowDateTime;
}
return CurTime;
}
void Timing::StartTiming() {
m_userTime = GetUserTime();
}
void Timing::StopTiming() {
//for (int i = 0; i < Number_Thread; i++)
__int64 *curUserTime;
curUserTime = GetUserTime();
for (int i = 0; i < nthreads; i++){
m_userTime[i] = curUserTime[i] - m_userTime[i];
}
}
//---------------------------------------------------------
void Calc()
{
unsigned sum = 0;
// #pragma omp parallel for reduction(+:sum) num_threads(2)
for (int i = 0; i < 1000000; i++)
{
char str[1000];
for (int j = 0; j < 999; j++)
str[j] = char(((i + j) % 254) + 1);
str[999] = 0;
for (char c = 'a'; c <= 'z'; c++)
if (strchr(str, c) != NULL)
sum += 1;
}
printf("sum = %u\n", sum);
}
void UseTiming1()
{
Timing t;
t.StartTiming();
Calc();
t.StopTiming();
for (int i = 0; i < 2; i++)
printf("Thread %d Timing: %.3G seconds.\n", i,t.GetUserSeconds().Thread[i]);
}

measured runtime from c++ "time.h" is double than real

I am running this pthread-c++ program (gauss elimination) on my laptop to measure its runtime.
The program runs about 10 seconds in real but my output shows about 20 seconds. What is wrong with this program?
I used
g++ -pthread main.c
./a.out 32 2048
to run
#include <stdio.h>
#include <stdlib.h>
#include <ctime>
#include <cstdlib>
#include <pthread.h>
#include <iostream>
typedef float Type;
void mat_rand (Type**, int, int);
Type** mat_aloc (int, int);
void mat_free (Type**);
void mat_print (Type**, int, int);
void* eliminate(void*);
unsigned int n, max_threads, active_threads, thread_length;
Type** A;
int current_row;
struct args
{
int start;
int end;
};
typedef struct args argument;
void *print_message_function( void *ptr );
int main(int argc, char *argv[])
{
if (argc < 3)
{
printf ("Error!. Please Enter The Matrix Dimension and No. of Threads!\n");
return 0;
} else
{
n = atoi(argv[2]);
max_threads = atoi(argv[1]);
if (n > 4096)
{
printf ("The maximum allowed size is 4096!\n");
return 0;
}
if (max_threads > 32)
{
printf ("The maximum allowed Threads Count is 32!\n");
return 0;
}
}
A = mat_aloc(n , n+1);
mat_rand (A, n, n+1);
//mat_print (A, n, n+1);
std::clock_t start;
double exe_time;
start = std::clock();
pthread_attr_t attr;
pthread_attr_init(&attr);
argument* thread_args = new argument[max_threads];
pthread_t* thread = new pthread_t[max_threads];
for (int i=0; i<n-1; i++)
{
current_row = i;
if (max_threads >= n-i)
active_threads = n-i-1;
else
active_threads = max_threads;
thread_length = (n-i-1)/active_threads;
for (int j=0; j<active_threads-1; j++)
{
thread_args[j].start = i+1+j*thread_length;
thread_args[j].end = i+1+(j+1)*thread_length;
pthread_create( &thread[j], &attr, eliminate, (void*) &thread_args[j]);
}
thread_args[active_threads-1].start = i+1+(active_threads-1)*thread_length;
thread_args[active_threads-1].end = n-1;
pthread_create(&thread[active_threads-1], &attr, eliminate, (void*) &thread_args[active_threads-1]);
for (int j=0; j<active_threads; j++)
{
pthread_join(thread[j], NULL);
}
}
exe_time = (clock() - start) / (double) CLOCKS_PER_SEC;
printf("Execution time for Matrix of size %i: %f\n", n, exe_time);
//mat_print (A, n, n+1);
return 0;
}
void* eliminate(void* arg)
{
Type k, row_constant;
argument* info = (argument*) arg;
row_constant = A[current_row][current_row];
for (int i=info->start; i<=info->end; i++)
{
k = A[i][current_row] / row_constant;
A[i][current_row] = 0;
for (int j=current_row+1; j<n+1; j++)
{
A[i][j] -= k*A[current_row][j];
}
}
}
// matrix random values
void mat_rand (Type** matrix, int row, int column)
{
for (int i=0; i<row; i++)
for (int j=0; j<column; j++)
{
matrix[i][j] = (float)(1) + ((float)rand()/(float)RAND_MAX)*256;
}
}
// allocates a 2d matrix
Type** mat_aloc (int row, int column)
{
Type* temp = new Type [row*column];
if (temp == NULL)
{
delete [] temp;
return 0;
}
Type** mat = new Type* [row];
if (temp == NULL)
{
delete [] mat;
return 0;
}
for (int i=0; i<row; i++)
{
mat[i] = temp + i*column;
}
return mat;
}
// free memory of matrix
void mat_free (Type** matrix)
{
delete[] (*matrix);
delete[] matrix;
}
// print matrix
void mat_print (Type** matrix, int row, int column)
{
for (int i=0; i<row; i++)
{
for (int j=0; j<column; j++)
{
std::cout<< matrix[i][j] << "\t\t";
}
printf("\n");
}
printf(".................\n");
}
clock reports CPU time used. If you have 2 CPUs and run a thread on each one for 10 seconds, clock will report 20 seconds.