I'm trying to make a parallel version of "Harmonic Progression Sum" problem using MPI.
But I'm new with MPI and I don't know how to run this method with MPI, because it isn't work.
Parallel Program:
//#include "stdafx.h"
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <iostream>
#include <sstream>
#include <mpi.h>
#define d 10 //Numbers of Digits (Example: 5 => 0,xxxxx)
#define n 1000 //Value of N (Example: 5 => 1/1 + 1/2 + 1/3 + 1/4 + 1/5)
using namespace std;
int numProcess, rank, msg, source, dest, tag, qtd_elemento;
int escravo(long unsigned int *digits, int ValueEnd)
{
MPI_Status status;
MPI_Recv(digits, (d + 11), MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
for (int i = 1; i <= ValueEnd; ++i) {
long unsigned int remainder = 1;
for (long unsigned int digit = 0; digit < d + 11 && remainder; ++digit) {
long unsigned int div = remainder / i;
long unsigned int mod = remainder % i;
digits[digit] += div;
remainder = mod * 10;
}
}
MPI_Send(&digits, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
}
void HPSSeguencial(char* output) {
long unsigned int digits[d + 11];
int DivN = n / 4; //Limiting slave.
for (int digit = 0; digit < d + 11; ++digit)
digits[digit] = 0;
if (rank != 0){
escravo(digits, (DivN * 1 ) );
escravo(digits, (DivN * 2 ) );
escravo(digits, (DivN * 3 ) );
escravo(digits, (DivN * 4 ) );
}
for (int i = d + 11 - 1; i > 0; --i) {
digits[i - 1] += digits[i] / 10;
digits[i] %= 10;
}
if (digits[d + 1] >= 5) {
++digits[d];
}
for (int i = d; i > 0; --i) {
digits[i - 1] += digits[i] / 10;
digits[i] %= 10;
}
stringstream stringstreamA;
stringstreamA << digits[0] << ",";
for (int i = 1; i <= d; ++i) {
stringstreamA << digits[i];
}
string stringA = stringstreamA.str();
stringA.copy(output, stringA.size());
}
int main() {
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &numProcess);
char output[d + 10];
HPSSeguencial(output);
cout << output << endl;
MPI_Finalize();
system("PAUSE");
return 0;
}
Original Code
#include "stdafx.h"
#include <iostream>
#include <sstream>
#include <time.h>
#define d 10 //Numbers of Digits (Example: 5 => 0,xxxxx)
#define n 1000 //Value of N (Example: 5 => 1/1 + 1/2 + 1/3 + 1/4 + 1/5)
using namespace std;
void HPS(char* output) {
long unsigned int digits[d + 11];
for (int digit = 0; digit < d + 11; ++digit)
digits[digit] = 0;
for (int i = 1; i <= n; ++i) {
long unsigned int remainder = 1;
for (long unsigned int digit = 0; digit < d + 11 && remainder; ++digit) {
long unsigned int div = remainder / i;
long unsigned int mod = remainder % i;
digits[digit] += div;
remainder = mod * 10;
}
}
for (int i = d + 11 - 1; i > 0; --i) {
digits[i - 1] += digits[i] / 10;
digits[i] %= 10;
}
if (digits[d + 1] >= 5) {
++digits[d];
}
for (int i = d; i > 0; --i) {
digits[i - 1] += digits[i] / 10;
digits[i] %= 10;
}
stringstream stringstreamA;
stringstreamA << digits[0] << ",";
for (int i = 1; i <= d; ++i) {
stringstreamA << digits[i];
}
string stringA = stringstreamA.str();
stringA.copy(output, stringA.size());
}
int main() {
char output[d + 10];
HPS(output);
cout << output<< endl;
system("PAUSE");
return 0;
}
Examples:
Input:
#define d 10
#define n 1000
Output:
7,4854708606╠╠╠╠╠╠╠╠╠╠╠╠
Input:
#define d 12
#define n 7
Output:
2,592857142857╠╠╠╠╠╠╠╠╠╠╠╠╠╠ÀÂ♂ü─¨#
Regards
Original Code
http://regulus.pcs.usp.br/marathon/current/warmup.pdf
I am assuming that you want to parallelize this part:
for (int i = 1; i <= ValueEnd; ++i)
{
long unsigned int remainder = 1;
for (long unsigned int digit = 0; digit < d + 11 && remainder; ++digit)
{
long unsigned int div = remainder / i;
long unsigned int mod = remainder % i;
digits[digit] += div;
remainder = mod * 10;
}
}
You can divide each for iteration by each MPI process:
int idP = getProcessId(), numP = numberProcess();
for (int i = idP; i <= ValueEnd; i+=numP)
{
...
}
The getProcessId() gives you the process ID and numberProcess() gives you the number of process:
int getProcessId(){
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
return rank;
}
// Get number of process
int numberProcess(){
int numProc;
MPI_Comm_size(MPI_COMM_WORLD, &numProc);
return numProc;
}
Each process will have a copy of the array digits; After the parallel for, the master process collects the results from all slaves process using a MPI_reduce. Or if you want to combine the values from all processes and distribute the result back to all processes you can use MPI_Allreduce.
long unsigned int digits[d + 11];
int DivN = n / 4; //Limiting slave.
for (int digit = 0; digit < d + 11; ++digit)
digits[digit] = 0;
if (rank != 0){
escravo(digits, (DivN * 1 ) );
escravo(digits, (DivN * 2 ) );
escravo(digits, (DivN * 3 ) );
escravo(digits, (DivN * 4 ) );
}
According to the code above process 0 will not execute the method escravo. Furthermore, you are not distributing correctly the work among processes. Process 1 will execute the out for loop inside the method escravo from 1 until n/4, but then process 2 will execute from 1 until 2n/4... Thus, you have different processes executing the same iterations, when what you really want is to divide these iterations among process.
Related
I am now trying to make a program to find the Absolute Euler Pseudoprimes ('AEPSP' in short, not Euler-Jacobi Pseudoprimes), with the definition that n is an AEPSP if
a(n-1)/2 ≡ ±1 (mod n)
for all positive integers a satisfying that the GCD of a and n is 1.
I used a C++ code to generate AEPSPs, which is based on a code to generate Carmichael numbers:
#include <iostream>
#include <cmath>
#include <algorithm>
#include <numeric>
using namespace std;
unsigned int bm(unsigned int a, unsigned int n, unsigned int p){
unsigned long long b;
switch (n) {
case 0:
return 1;
case 1:
return a % p;
default:
b = bm(a,n/2,p);
b = (b*b) % p;
if (n % 2 == 1) b = (b*a) % p;
return b;
}
}
int numc(unsigned int n){
int a, s;
int found = 0;
if (n % 2 == 0) return 0;
s = sqrt(n);
a = 2;
while (a < n) {
if (a > s && !found) {
return 0;
}
if (gcd(a, n) > 1) {
found = 1;
}
else {
if (bm(a, (n-1)/2, n) != 1) {
return 0;
}
}
a++;
}
return 1;
}
int main(void) {
unsigned int n;
for (n = 3; n < 1e9; n += 2){
if (numc(n)) printf("%u\n",n);
}
return 0;
}
Yet, the program is very slow. It generates AEPSPs up to 1.5e6 in 20 minutes. Does anyone have any ideas on optimizing the program?
Any help is most appreciated. :)
I've come up with a different algorithm, based on sieving for primes upfront while simultaneously marking off non-squarefree numbers. I've applied a few optimizations to pack the information into memory a bit tighter, to help with cache-friendliness as well. Here is the code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#define PRIME_BIT (1UL << 31)
#define SQUARE_BIT (1UL << 30)
#define FACTOR_MASK (SQUARE_BIT - 1)
void sieve(uint64_t size, uint32_t *buffer) {
for (uint64_t i = 3; i * i < size; i += 2) {
if (buffer[i/2] & PRIME_BIT) {
for (uint64_t j = i * i; j < size; j += 2 * i) {
buffer[j/2] &= SQUARE_BIT;
buffer[j/2] |= i;
}
for (uint64_t j = i * i; j < size; j += 2 * i * i) {
buffer[j/2] |= SQUARE_BIT;
}
}
}
}
int main(int argc, char **argv) {
if (argc < 2) {
printf("Usage: prog LIMIT\n");
return 1;
}
uint64_t size = atoi(argv[1]);
uint32_t *buffer = malloc(size * sizeof(uint32_t) / 2);
memset(buffer, 0x80, size * sizeof(uint32_t) / 2);
sieve(size, buffer);
for (uint64_t i = 5; i < size; i += 4) {
if (buffer[i/2] & PRIME_BIT)
continue;
if (buffer[i/2] & SQUARE_BIT)
continue;
uint64_t num = i;
uint64_t factor;
while (num > 1) {
if (buffer[num/2] & PRIME_BIT)
factor = num;
else
factor = buffer[num/2] & FACTOR_MASK;
if ((i / 2) % (factor - 1) != 0) {
break;
}
num /= factor;
}
if (num == 1)
printf("Found pseudo-prime: %ld\n", i);
}
}
This produces the pseudo-primes up to 1.5e6 in about 8ms on my machine, and for 2e9 it takes 1.8sec.
The time complexity of the solution is O(n log n) - the sieve is O(n log n), and for each number we either do constant time checks or do a divisibility test for each of its factors, which there are at most log n. So, the main loop is also O(n log n), resulting in O(n log n) overall.
I am creating a script that calculates the sum of first n Fibonacci numbers and returns the last digit of the sum.
The python script works perfect but the C++ script does not and the logic is same.
Any help will be appreciated.
Python Code
def calc_fib(n):
f = [0, 1]
for i in range(2, 61):
f.insert(i, (f[i-1] + f[i-2]) % 10)
rem = n % 60
quotient = (n - rem) / 60
return int((sum(f) * quotient + sum(f[0: rem+1])) % 10)
n = int(input())
print(calc_fib(n))
C++ Code
#include <iostream>
#include <vector>
#include <numeric>
using namespace std;
long long fibonacci_sum_last_digit(long long n) {
vector<int> f(n + 1);
f[0] = 0;
f[1] = 1;
for (int i = 2; i <= 60; i++){
f[i] = (f[i-1] + f[i-2]) % 10;
}
int rem = n % 60;
int quotient = (n - rem) / 60;
return (accumulate(f.begin(), f.end(), 0) * quotient + accumulate(f.begin(), f.begin() + rem + 1, 0)) % 10;
}
int main() {
int n;
cin >> n;
if (n <= 1)
cout << n;
else
cout << fibonacci_sum_last_digit(n) << '\n';
return 0;
}
vector<int> f(n + 1);
f[0] = 0;
f[1] = 1;
for (int i = 2; i <= 60; i++){
f[i] = (f[i-1] + f[i-2]) % 10;
}
The vector is size n+1 and you access until 60 => it's a bug
This should fix :
vector<int> f(60 + 1);
Or
vector<int> f;
f.push_back(0);
f.push_back(1);
for (int i = 2; i <= 60; i++){
f.push_back((f[i-1] + f[i-2]) % 10);
}
I want to write a function to reverse one of two parts of number :
Input is: num = 1234567; part = 2
and output is: 1234765
So here is part that can be only 1 or 2
Now I know how to get part 1
int firstPartOfInt(int num) {
int ret = num;
digits = 1, halfDig = 10;
while (num > 9) {
ret = ret / 10;
digits++;
}
halfDigits = digits / 2;
for (int i = 1; i < halfDigits; i++) {
halfDigits *= 10;
}
ret = num;
while (num > halfDigits) {
ret = ret / 10;
}
return ret;
}
But I don't know how to get part 2 and reverse the number. If you post code here please do not use vector<> and other C++ feature not compatible with C
One way is to calculate the total number of digits in the number and then calculate a new number extracting digits from the original number in a certain order, complexity O(number-of-digits):
#include <stdio.h>
#include <stdlib.h>
unsigned reverse_decimal_half(unsigned n, unsigned half) {
unsigned char digits[sizeof(n) * 3];
unsigned digits10 = 0;
do digits[digits10++] = n % 10;
while(n /= 10);
unsigned result = 0;
switch(half) {
case 1:
for(unsigned digit = digits10 / 2; digit < digits10; ++digit)
result = result * 10 + digits[digit];
for(unsigned digit = digits10 / 2; digit--;)
result = result * 10 + digits[digit];
break;
case 2:
for(unsigned digit = digits10; digit-- > digits10 / 2;)
result = result * 10 + digits[digit];
for(unsigned digit = 0; digit < digits10 / 2; ++digit)
result = result * 10 + digits[digit];
break;
default:
abort();
}
return result;
}
int main() {
printf("%u %u %u\n", 0, 1, reverse_decimal_half(0, 1));
printf("%u %u %u\n", 12345678, 1, reverse_decimal_half(12345678, 1));
printf("%u %u %u\n", 12345678, 2, reverse_decimal_half(12345678, 2));
printf("%u %u %u\n", 123456789, 1, reverse_decimal_half(123456789, 1));
printf("%u %u %u\n", 123456789, 2, reverse_decimal_half(123456789, 2));
}
Outputs:
0 1 0
12345678 1 43215678
12345678 2 12348765
123456789 1 543216789
123456789 2 123459876
if understand this question well you need to reverse half of the decimal number. If the number has odd number of digits I assume that the first part is longer (for example 12345 - the first part is 123 the second 45). Because reverse is artihmetic the reverse the part 1 of 52001234 is 521234.
https://godbolt.org/z/frXvCM
(some numbers when reversed may wrap around - it is not checked)
int getndigits(unsigned number)
{
int ndigits = 0;
while(number)
{
ndigits++;
number /= 10;
}
return ndigits;
}
unsigned reverse(unsigned val, int ndigits)
{
unsigned left = 1, right = 1, result = 0;
while(--ndigits) left *= 10;
while(left)
{
result += (val / left) * right;
right *= 10;
val = val % left;
left /= 10;
}
return result;
}
unsigned reversehalf(unsigned val, int part)
{
int ndigits = getndigits(val);
unsigned parts[2], digits[2], left = 1;
if(ndigits < 3 || (ndigits == 3 && part == 2))
{
return val;
}
digits[0] = digits[1] = ndigits / 2;
if(digits[0] + digits[1] < ndigits) digits[0]++;
for(int dig = 0; dig < digits[1]; dig++) left *= 10;
parts[0] = val / left;
parts[1] = val % left;
parts[part - 1] = reverse(parts[part - 1], digits[part - 1]);
val = parts[0] * left + parts[1];
return val;
}
int main()
{
for(int number = 0; number < 40; number++)
{
unsigned num = rand();
printf("%u \tpart:%d\trev:%u\n", num,(number & 1) + 1,reversehalf(num, (number & 1) + 1));
}
}
My five cents.:)
#include <iostream>
int reverse_part_of_integer( int value, bool first_part = false )
{
const int Base = 10;
size_t n = 0;
int tmp = value;
do
{
++n;
} while ( tmp /= Base );
if ( first_part && n - n / 2 > 1 || !first_part && n / 2 > 1 )
{
n = n / 2;
int divider = 1;
while ( n-- ) divider *= Base;
int first_half = value / divider;
int second_half = value % divider;
int tmp = first_part ? first_half : second_half;
value = 0;
do
{
value = Base * value + tmp % Base;
} while ( tmp /= Base );
value = first_part ? value * divider + second_half
: first_half * divider +value;
}
return value;
}
int main()
{
int value = -123456789;
std::cout << "initial value: "
<< value << '\n';
std::cout << "First part reversed: "
<< reverse_part_of_integer( value, true ) << '\n';
std::cout << "Second part reversed: "
<< reverse_part_of_integer( value ) << '\n';
}
The program output is
initial value: -123456789
First part reversed: -543216789
Second part reversed: -123459876
Just for fun, a solution that counts only half the number of digits before reversing:
constexpr int base{10};
constexpr int partial_reverse(int number, int part)
{
// Split the number finding its "halfway"
int multiplier = base;
int abs_number = number < 0 ? -number : number;
int parts[2] = {0, abs_number};
while (parts[1] >= multiplier)
{
multiplier *= base;
parts[1] /= base;
}
multiplier /= base;
parts[0] = abs_number % multiplier;
// Now reverse only one of the two parts
int tmp = parts[part];
parts[part] = 0;
while (tmp)
{
parts[part] = parts[part] * base + tmp % base;
tmp /= base;
}
// Then rebuild the number
int reversed = parts[0] + multiplier * parts[1];
return number < 0 ? -reversed : reversed;
}
int main()
{
static_assert(partial_reverse(123, 0) == 123);
static_assert(partial_reverse(-123, 1) == -213);
static_assert(partial_reverse(1000, 0) == 1000);
static_assert(partial_reverse(1009, 1) == 109);
static_assert(partial_reverse(123456, 0) == 123654);
static_assert(partial_reverse(1234567, 0) == 1234765);
static_assert(partial_reverse(-1234567, 1) == -4321567);
}
My Code is:
#include <iostream>
#include <utility>
#include <algorithm>
//#include <iomanip>
#include <cstdio>
//using namespace std;
inline int overlap(std::pair<int,int> classes[],int size)
{
std::sort(classes,classes+size);
int count=0,count1=0,count2=0;
int tempi,tempk=1;
for(unsigned int i=0;i<(size-1);++i)
{
tempi = classes[i].second;
for(register unsigned int j=i+1;j<size;++j)
{
if(!(classes[i].first<classes[j].second && classes[i].second>classes[j].first))
{ if(count1 ==1)
{
count2++;
}
if(classes[i].second == tempi)
{
tempk =j;
count1 = 1;
}
////cout<<"\n"<<"Non-Overlapping Class:\t";
////cout<<classes[i].first<<"\t"<<classes[i].second<<"\t"<<classes[j].first<<"\t"<<classes[j].second<<"\n";
classes[i].second = classes[j].second;
count++;
if(count1==1 && j ==(size-1))
{
j= tempk;
classes[i].second = tempi;
count1= 0;
if(count2 !=0)
{
count = (count + ((count2)-1));
}
count2 =0;
}
}
else
{
if(j ==(size-1))
{
if(count>0)
{
j= tempk;
classes[i].second = tempi;
count1= 0;
if(count2 !=0)
{
count = (count + ((count2)-1));
}
count2 =0;
}
}
}
}
}
count = count + size;
return count;
}
inline int fastRead_int(int &x) {
register int c = getchar_unlocked();
x = 0;
int neg = 0;
for(; ((c<48 || c>57) && c != '-'); c = getchar_unlocked());
if(c=='-') {
neg = 1;
c = getchar_unlocked();
}
for(; c>47 && c<58 ; c = getchar_unlocked()) {
x = (x<<1) + (x<<3) + c - 48;
}
if(neg)
x = -x;
return x;
}
int main()
{
int N;
////cout<<"Please Enter Number Of Classes:";
clock_t begin,end;
float time_interval;
begin = clock();
while(fastRead_int(N))
{
switch(N)
{
case -1 : end = clock();
time_interval = float(end - begin)/CLOCKS_PER_SEC;
printf("Execution Time = %f",time_interval);
return 0;
default :
unsigned int subsets;
unsigned int No = N;
std::pair<int,int> classes[N];
while(No--)
{
////cout<<"Please Enter Class"<<(i+1)<<"Start Time and End Time:";
int S, E;
fastRead_int(S);
fastRead_int(E);
classes[N-(No+1)] = std::make_pair(S,E);
}
subsets = overlap(classes,N);
////cout<<"\n"<<"Total Number Of Non-Overlapping Classes is:";
printf("%08d",subsets);
printf("\n");
break;
}
}
}
and Input and output of my program:
Input:
5
1 3
3 5
5 7
2 4
4 6
3
500000000 1000000000
1 5
1 5
1
999999999 1000000000
-1
Output:
Success time: 0 memory: 3148 signal:0
00000012
00000005
00000001
Execution Time = 0.000036
I tried to calculate by having clocks at start of main and end of main and found out the time.But it said only some 0.000036 secs.But when I tried to post the same code in Online Judge(SPOJ).My program got 'Time Limit Exceeded' Error. Time Limit for the above program in SPOJ is 2.365 secs.Could somebody help me figure out this?
I consider that your question is about the overlap function.
In it, you have
A sort call: O(n×ln(n))
two for loops:
the first is roughly 0..Size
the second (nested in the first) is roughly i..Size
The inside of the second loop is called Size(Size+1) / 2 (reverse sum of the N first integer) times with no breaks.
So your algorithm is O(n²) where n is the size.
How to create all possible numbers, starting from a given one, where all digits of the new ones are moved one slot to the right? For example if we have 1234. I want to generate 4123, 3412 and 2341.
What I have come out with so far is this:
int move_digits(int a)
{
int aux = 0;
aux = a % 10;
for(int i=pow(10, (number_digits(a) - 1)); i>0; i=i/10)
aux = aux * 10 + ((a % i) / (i/10));
return aux;
}
But it doesn't work.
The subprogram number_digits looks like this (it just counts how many digits the given number has):
int number_digits(int a)
{
int ct = 0;
while(a != 0)
{
a = a/10;
ct++;
}
return ct;
}
I think there is no need to write separate function number_digits.
I would write function move_digits simpler
#include <iostream>
#include <cmath>
int move_digits( int x )
{
int y = x;
double n = 0.0;
while ( y /= 10 ) ++n;
return ( x / 10 + x % 10 * std::pow( 10.0, n ) );
}
int main()
{
int x = 1234;
std::cout << x << std::endl;
std::cout << move_digits( x ) << std::endl;
}
Retrieving the last digit of n: n % 10.
To "cut off" the last digit, you could use number / 10.
Say you have a three-digit number n, then you can prepend a new digit d using 1000 * d + n
That said, you probably want to compute
aux = pow(10, number_digits - 1) * (aux % 10) + (aux / 10)
Calculatea/(number_digits(a) - 1) and a%(number_digits(a) - 1)
And your answer is (a%(number_digits(a) - 1))*10 + a/(number_digits(a) - 1)
int i =0 ;
int len = number_digits(a);
while(i < len){
cout << (a%(len - 1))*10 + a/(len - 1) <<endl;
a = (a%(len - 1))*10 + a/(len - 1);
}
void move_digits(int a)
{
int digits = 0;
int b = a;
while(b / 10 ){
digits++;
b = b / 10;
}
for (int i = 0; i < digits; ++i)
{
int c = a / 10;
int d = a % 10;
int res = c + pow(10, digits) * d;
printf("%d\n", res);
a = res;
}
printf("\n");
}
int main()
{
move_digits(12345);
}