Related
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
using namespace std;
int ceil(int x, int y) {
return x / y + (x % y > 0);
}
void create_group_and_comm(MPI_Group *world_group, MPI_Group *group, MPI_Comm *comm, int size, bool is_even) {
int *ranks;
int count = is_even ? ceil(size, 2) : size / 2;
ranks = (int *)malloc(count * sizeof(int));
int i = is_even ? 0 : 1, j=0;
while(i < size) {
ranks[j] = i;
j++;
i+=2;
}
MPI_Group_incl(*world_group, j, ranks, group);
MPI_Comm_create(MPI_COMM_WORLD, *group, comm);
free(ranks);
}
int main(int argc, char *argv[])
{
int size, rank, *result_odd, *result_even;
int rank_gr;
char processor_name[MPI_MAX_PROCESSOR_NAME];
MPI_Status status;
MPI_Comm even_comm, odd_comm;
MPI_Group even_group, odd_group, world_group;
int *A, *Rows;
int namelen;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Get_processor_name(processor_name, &namelen);
if (rank == 0)
{
A = (int *)malloc(size * size * sizeof(int));
for (int i = 0; i < size * size; i++) {
A[i] = rand() / 1000000;
}
printf("Initial data:\n");
for (int i = 0; i < size; i++)
{
putchar('|');
for (int j = 0; j < size; j++)
printf("%.4d ", A[i*size+j]);
printf("|\n");
}
MPI_Barrier(MPI_COMM_WORLD);
}
else
MPI_Barrier(MPI_COMM_WORLD);
MPI_Comm_group(MPI_COMM_WORLD, &world_group);
create_group_and_comm(&world_group, &even_group, &even_comm, size, true);
create_group_and_comm(&world_group, &odd_group, &odd_comm, size, false);
Rows = new int[size];
MPI_Scatter(A, size, MPI_INT, Rows, size, MPI_INT, 0, MPI_COMM_WORLD);
result_odd = new int[size];
result_even = new int[size];
if(rank % 2 == 0) {
MPI_Reduce(Rows,result_even,size,MPI_INT,MPI_MAX,0,even_comm);
} else {
MPI_Reduce(Rows,result_odd,size,MPI_INT,MPI_MIN,0,odd_comm);
}
if(rank == 0) {
printf("Max values for columns on even:\n");
for(int idx = 0; idx < size;idx++) {
printf("Column %d: %d\n", idx+1, result_even[idx]);
}
printf("Max values for columns on odd:\n");
for(int idx = 0; idx < size;idx++) {
printf("Column %d: %d\n", idx+1, result_odd[idx]);
}
}
//MPI_Comm_free(&even_comm);
//MPI_Comm_free(&odd_comm);
MPI_Group_free(&even_group);
MPI_Group_free(&odd_group);
MPI_Finalize();
return 0;
}
Hello i'm writing an application using MPI library, i'm trying to create 2 groups with each of them with their own communicator. Basically one group which holds processors with rank even calculates the maximum value per column using MPI_Reduce between them(processors in group), and the second one calculates the minimum for each column in matrice. For even rank MPI_Reduce works as expected but for processors with odd rank is not working as it should, can someone help me what i'm doing wrong? Below is a picture with the problem i described:
image here
i want to create a link between two programs throughout the execs functions .
my idea is to create function then point on it by a function pointer then send it to the other program to test it . this is my first programenter code here
1- is this possible ?
2- how ?
i get this idea because i find each time to change the function name in the main function but the remainning still as it was but if i send a pointer function as a character pointer then my programm still as it without changing
#include <iostream>
#include <cstdlib>
#include <unistd.h>
using namespace std;
void
Random(int* ,const int );
int*
selection_sort(int *arr ,const int length)
{
int i = 0,minIndex{0},tmp{0},k{0};
while(i < length-1) // T(n-1) * C1
{
minIndex = i; // Tn * C2
for(int j = i+1 ; j < length ; j++ ) // som(Ti) from i = 0 to i = length-1 )*C3.
{
if((arr)[j] < (arr)[minIndex])
minIndex = j;
}
if(minIndex != i) // Tn * C4
{
tmp = (arr)[i];
(arr)[i] = (arr)[minIndex];
(arr)[minIndex] = tmp;
}
i++;
}
return arr;
}
void
Random(int* array,const int length)
{
srand(time(nullptr));
int i{-1};
while(i++ < length)
{
array[i] = rand()%100;
sleep(0.2);
}
}
int main(int argc,char* argv[])
{
int* (*ptr)(int*,const int ) = selection_sort;
execl("/home/hellios/Documents/Algorithms/sort_Algorithms/main",(char*)ptr,0); // complete the call
return EXIT_SUCCESS;
}
sort_Algorithms/main.c
#include <iostream>
#include <unistd.h>
#include <stdlib.h>
#include <ctime>
using namespace std;
void
Random(int* array,const int length);
int
main(int argc,char* argv[])
{
int* (*ptr)(int *,const int ) =(int* (*) (int*,const int)) argv[1];
int arr1[100],k{0},*arr;
Random(arr1,100);
arr = (*ptr)(arr1,100);
//selection_sort(arr,100);
cout<<"out of selection_sort"<<endl;
for(int j = 0 ; j < 100 ; j++ )
{
printf("[%d]\t", arr[j]);
if(!(++k %10))
cout<<endl;
}
printf("\n");
return EXIT_SUCCESS ;
}
void
Random(int* array,const int length)
{
srand(time(nullptr));
int i {-1};
while(i++ < length)
{
array[i] = rand()%100;
sleep(0.2);
}
}
I am a beginner in MPI and am trying to write sort code(BubbleSort)
The code works, but it seems like I'm missing something
Code is here:--->
#define N 10`
#include <iostream>
#include <stdio.h>
#include <math.h>
#include <time.h>
#include <stdlib.h>
#include <stddef.h>
#include "mpi.h"
using namespace std;
int main(int argc, char* argv[])
{
int i, j, k, rank, size;
int a[N] = { 10,9,8,7,6,5,4,3,2,1 };
int c[N];
int aa[N], cc[N];
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Scatter(a, N/size, MPI_INT, aa, N/size , MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
int n = N/size;
for (int i = 0; i < n - 1; i++) {
for (int j = 0; j < n - i - 1; j++) {
if (aa[j] > aa[j + 1]) {
int temp = aa[j];
aa[j] = aa[j + 1];
aa[j + 1] = temp;
}
}
}
for (int i = 0; i < n; i++) {
cc[i] = aa[i];
};
MPI_Barrier(MPI_COMM_WORLD);
MPI_Gather(cc, N/size , MPI_INT, c, N/size, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
MPI_Finalize();
cout << cc[9];
if (rank == 0) {
cout << "C is look like : " << endl;
for (int i = 0; i < N; i++) {
cout << c[i] << " ";
}
}
}
Output of the program:-->
In the end we get errors
In general, my MPI is configured as 4 processors
-858993460 C is look like :
-858993460
-858993460
-858993460
9 10 7 8 5 6 3 4 -858993460 -858993460
There are several issues in your program :
cc[9] is used uninitialized
you only operate on (N/size)*size) elements, and in your case N=10, size=4, it means you operate on only 8 elements. The cure is to use MPI_Scatterv() and MPI_Gatherv()
assuming your bubble sort is correct (I did not check that part), your program gathers sorted (sub)arrays, and you cannot naively expect the outcome is a (full size) sorted array.
I have problem with MPI_Scatter. Dont know hot to use it and my current program crashes with seg fault when I launch.
I guess that the problem in parameters of MPI_Scatter, particularly in calling it with right operator (& or * or void), but I've tried almost every combination and nothing actually helped.
#include <iostream>
#include <stdio.h>
#include <mpi.h>
// k = 3, N = 12, 1,2,3, 4,5,6, 7,8,9, 10,11,12
int main(int argc, char **argv) {
int N, size, myrank;
int k;
std::cin >> N;
std::cin >> k;
int *mass = new int[N];
int *recv = new int[k];
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
if (myrank == 0) {
std::cout << "get k and n \n";
for (int i = 0; i < N; ++i) {
mass[i] = i;
std::cout << i << " written\n";
}
}
MPI_Scatter(mass, k, MPI_INT, recv, k, MPI_INT, 0, MPI_COMM_WORLD);
int sum = 0;
std::cout << "myrank" << myrank << '\n';
for (int i = 0; i < k; ++i) {
std::cout << recv[i] << '\n';
}
MPI_Finalize();
return 0;
}
When I launch this code, it prints this:
N = 12
k = 3
get k and n
0 written
1 written
2 written
3 written
4 written
5 written
6 written
7 written
8 written
9 written
10 written
11 written
myrank0
0
1
2
myrank1
myrank3
myrank2
[1570583203.522390] [calc:32739:0] mpool.c:38 UCX WARN object 0x7fe1f08b2f60 was not returned to mpool mm_recv_desc
[1570583203.523214] [calc:32740:0] mpool.c:38 UCX WARN object 0x7f4643986f60 was not returned to mpool mm_recv_desc
[1570583203.524205] [calc:32741:0] mpool.c:38 UCX WARN object 0x7f22535d4f60 was not returned to mpool mm_recv_desc
MPI typically redirects stdout to rank 0, so N and k are not correctly set on the other ranks.
Here is a working version of your program
#include <iostream>
#include <cassert>
#include <stdio.h>
#include <mpi.h>
// k = 3, N = 12, 1,2,3, 4,5,6, 7,8,9, 10,11,12
int main(int argc, char **argv) {
int k, N, size, myrank;
int *mass;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
if (myrank == 0) {
std::cout << "get k and n \n";
std::cin >> N;
std::cin >> k;
assert (N >= k*size);
mass = new int[N];
for (int i = 0; i < N; ++i) {
mass[i] = i;
std::cout << i << " written\n";
}
}
MPI_Bcast(&k, 1, MPI_INT, 0, MPI_COMM_WORLD);
int *recv = new int[k];
MPI_Scatter(mass, k, MPI_INT, recv, k, MPI_INT, 0, MPI_COMM_WORLD);
int sum = 0;
std::cout << "myrank" << myrank << '\n';
for (int i = 0; i < k; ++i) {
std::cout << recv[i] << '\n';
}
MPI_Finalize();
return 0;
}
I have two codes that are both working, yet I cannot figure out why one is so much faster than the other. To my knowledge, BLAS with MKL (Intel) should be much faster than GSL (GNU), although my code is showing quite the opposite. Here are the codes themselves where I am simply creating 2 matrices at the master node and then sending different rows to different "slave" processors (with OpenMPI) which compute the final matrices elements and then return them back to the master node.
GSL example (the fast code):
#include <iostream>
#include <stdio.h>
#include <iostream>
#include <cmath>
#include <mpi.h>
#include <gsl/gsl_blas.h>
using namespace std;
int main(int argc, char** argv){
int noprocs, nid;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &nid);
MPI_Comm_size(MPI_COMM_WORLD, &noprocs);
int master = 0;
const int nsame = 1000; //must be same if matrices multiplied together = acols = brows
const int arows = 1000;
const int bcols = 1000;
int rowsent;
double * buff;
buff = new double [nsame];
double * b;
b = new double [nsame*bcols];
double** c = new double*[arows];
for(int i = 0; i < arows; ++i)
c[i] = new double[bcols];
double * CC;
CC = new double [1*bcols]; //here ncols corresponds to numbers of rows for matrix b
for (int i = 0; i < bcols; i++){
CC[i] = 0.;
}; //this is imply a 1-d array of zeros which will be updated and passed by processors
// Master part
if (nid == master ) {
double** a = new double*[arows];
for(int i = 0; i < arows; ++i){
a[i] = new double[nsame];}
for (int i = 0; i < arows; i++){
for (int j = 0; j < nsame; j++){
if (i == j)
a[i][j] = 1.;
else
a[i][j] = 0.;
}
}
for (int i = 0; i < (nsame*bcols); i++){
b[i] = (10.*i + 3.)/(3.*i - 2.) ;
}
MPI_Bcast(b,nsame*bcols, MPI_DOUBLE_PRECISION, master, MPI_COMM_WORLD); //assumes stored as contguous block of code
// send one row to each slave tagged with row number, assume nprocs<nrows
rowsent=0;
for (int i=1; i < (noprocs); i++) { //must be equal to noprocs otherwise it will not send to 3
MPI_Send(a[rowsent], nsame, MPI_DOUBLE_PRECISION,i,rowsent+1,MPI_COMM_WORLD);
rowsent++;
}
for (int i=0; i<arows; i++) {
MPI_Recv(CC, bcols, MPI_DOUBLE_PRECISION, MPI_ANY_SOURCE, MPI_ANY_TAG,
MPI_COMM_WORLD, &status);
int sender = status.MPI_SOURCE;
int anstype = status.MPI_TAG; //row number+1
int IND_I = 0;
while (IND_I < bcols){
c[anstype - 1][IND_I] = CC[IND_I];
IND_I++;
}
if (rowsent < arows) {
MPI_Send(a[rowsent], nsame,MPI_DOUBLE_PRECISION,sender,rowsent+1,MPI_COMM_WORLD);
rowsent++;
}
else { // tell sender no more work to do via a 0 TAG
MPI_Send(MPI_BOTTOM,0,MPI_DOUBLE_PRECISION,sender,0,MPI_COMM_WORLD);
}
}
}
// Slave part
else {
MPI_Bcast(b,nsame*bcols, MPI_DOUBLE_PRECISION, master, MPI_COMM_WORLD);
MPI_Recv(buff,nsame,MPI_DOUBLE_PRECISION,master,MPI_ANY_TAG,MPI_COMM_WORLD,&status);
while(status.MPI_TAG != 0) {
int crow = status.MPI_TAG;
gsl_matrix_view AAAA = gsl_matrix_view_array(buff, 1, nsame);
gsl_matrix_view BBBB = gsl_matrix_view_array(b, nsame, bcols);
gsl_matrix_view CCCC = gsl_matrix_view_array(CC, 1, bcols);
/* Compute C = A B */
gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, &AAAA.matrix, &BBBB.matrix,
0.0, &CCCC.matrix);
MPI_Send(CC,bcols,MPI_DOUBLE_PRECISION, master, crow, MPI_COMM_WORLD);
MPI_Recv(buff,nsame,MPI_DOUBLE_PRECISION,master,MPI_ANY_TAG,MPI_COMM_WORLD,&status);
// cout << ans << " OUTPUT \n";
}
}
MPI_Finalize();
return 0;
};
MKL example (the slow code):
#include <iostream>
#include <stdio.h>
#include <iostream>
#include <cmath>
#include <mpi.h>
#include </opt/intel/compilers_and_libraries_2017.1.126/mac/mkl/include/mkl.h>
using namespace std;
int main(int argc, char** argv){ //THE IDENTITY MATRIX ONLY WORKS IF arows = nsame!
int noprocs, nid;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &nid);
MPI_Comm_size(MPI_COMM_WORLD, &noprocs);
int master = 0;
const int nsame = 1000;
const int arows = 1000;
const int bcols = 1000;
int rowsent;
double * buff;
buff = new double [nsame];
double * b;
b = new double [nsame*bcols];
double** c = new double*[arows];
for(int i = 0; i < arows; ++i)
c[i] = new double[bcols];
double * CC;
CC = new double [1*bcols];
for (int i = 0; i < bcols; i++){
CC[i] = 0.;
};
// Master part
if (nid == master ) {
double** a = new double*[arows];
for(int i = 0; i < arows; ++i){
a[i] = new double[nsame];}
for (int i = 0; i < arows; i++){
for (int j = 0; j < nsame; j++){
if (i == j)
a[i][j] = 1.;
else
a[i][j] = 0.;
}
}
for (int i = 0; i < (nsame*bcols); i++){
b[i] = (10.*i + 3.)/(3.*i - 2.) ; // = 1.*i as test value
}
MPI_Bcast(b,nsame*bcols, MPI_DOUBLE_PRECISION, master, MPI_COMM_WORLD); //assumes stored as contguous block of code nprocs<nrows
delete[] b;
rowsent=0;
for (int i=1; i < (noprocs); i++) { //must be equal to noprocs otherwise it will not send to 3
MPI_Send(a[rowsent], nsame, MPI_DOUBLE_PRECISION,i,rowsent+1,MPI_COMM_WORLD);
delete[] a[rowsent];
rowsent++;
}
for (int i=0; i<arows; i++) {
MPI_Recv(CC, bcols, MPI_DOUBLE_PRECISION, MPI_ANY_SOURCE, MPI_ANY_TAG,
MPI_COMM_WORLD, &status);
int sender = status.MPI_SOURCE;
int anstype = status.MPI_TAG; //row number+1
int IND_I = 0;
while (IND_I < bcols){
c[anstype - 1][IND_I] = CC[IND_I];
IND_I++;
}
if (rowsent < arows) {
MPI_Send(a[rowsent], nsame,MPI_DOUBLE_PRECISION,sender,rowsent+1,MPI_COMM_WORLD);
delete[] a[rowsent];
rowsent++;
}
else { // tell sender no more work to do via a 0 TAG
MPI_Send(MPI_BOTTOM,0,MPI_DOUBLE_PRECISION,sender,0,MPI_COMM_WORLD);
}
}
}
// Slave part
else {
MPI_Bcast(b,nsame*bcols, MPI_DOUBLE_PRECISION, master, MPI_COMM_WORLD);
MPI_Recv(buff,nsame,MPI_DOUBLE_PRECISION,master,MPI_ANY_TAG,MPI_COMM_WORLD,&status);
while(status.MPI_TAG != 0) {
int crow = status.MPI_TAG;
/* Compute C = A B */
cblas_dgemm (CblasRowMajor, CblasNoTrans, CblasNoTrans, 1, bcols, nsame, 1.0, buff, nsame, b, bcols,
0.0, CC, bcols);
MPI_Send(CC,bcols,MPI_DOUBLE_PRECISION, master, crow, MPI_COMM_WORLD);
MPI_Recv(buff,nsame,MPI_DOUBLE_PRECISION,master,MPI_ANY_TAG,MPI_COMM_WORLD,&status);
}
}
MPI_Finalize();
return 0;
};
I was thinking it might be due to me not deleting any of the new elements created, although I use essentially the same approach to initialize the arrays in both codes. I even tried deleting values in the MKL code (as shown) yet this appears to not have much of an effect. When I increase the size of the arrays from nsame = arows = bcols = 1000 to nsame = arows = bcols = 10000, the time differences in the two codes can readily be observed (the GSL code takes approximately 45 seconds while the MKL code takes quite a few minutes). Thus I am wondering if this is simply inherent to the way GSL and MKL are designed and incorporated in my code or if there is perhaps something else more subtle going on.