I have following code:
double * myX;
double * myY;
double * myZ;
int amount;
int count; // number of process
void SomeClass::someMethod(double *x, double *y, double *z, int amount) {
if (myId == 0) {
myX = x;
myY = y;
myZ = z;
amount = amount;
for(int i = 1; i < count; ++i) {
MPI_Send(&amount, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
MPI_Send(myX, amount, MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
MPI_Send(myY, amount, MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
MPI_Send(myX, amount, MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
}
}
}
void SomeClass::anotherMethod(void) {
if(myId != 0) {
MPI_Recv(&amount, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Recv(myX, amount, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Recv(myY, amount, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Recv(myZ, amount, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}
// rest of code
MPI_Reduce(args);
}
But I have problem because I get Null buffer pointer when I run this code or Segmentation fault when I change something, for example set & before var name and then run.
MPI_init and other required function are called in other class, where I also create this class objects.
Can someone help me?
MPI_Recv will copy the data it receives to the buffer specified by the first parameter (myX in the case below):
MPI_Recv(myX, amount, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
The problem is that you haven't created a buffer to store this.
You could do:
myX = new double[amount];
For example to create the buffer, not forgetting to free the memory again afterwards with:
delete[] myX;
Related
I am working on a simple program in C++ that uses MPI to communicate between two processes. If I want to send an array to another process, the MPI_Send and MPI_Recv functions expect a pointer to the said array:
int MPI_Send(void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm)
int MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status *status);
In an online tutorial I saw the following usage of MPI_Send and MPI_Recv:
int values[5] = {1, 2, 3, 4, 5};
MPI_Send(values, 5, MPI_INT, 1, 0, MPI_COMM_WORLD);
and
int values[10];
MPI_Recv(&values, 10, MPI_INT, 3, MPI_ANY_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
Why does the tutorial use in one case only values and in the other case also the address operator &values?
I wrote a program to send and receive arrays between two processes and it seems that it works with and without the address operator. Why is this the case? I'm certainly mistaken in my thinking. Can you help me find my mistake?
Here is my code:
#include <iostream>
#include <mpi.h>
int main(int argc, char** argv) {
MPI_Init(&argc, &argv);
// Reading size and rank
int size, rank;
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
// For every process create array
double array[2];
if (rank == 0) {
array[0] = 0.1;
array[1] = 0.2;
} else {
if (rank == 1) {
array[0] = 1.1;
array[1] = 1.2;
}
}
// Send and receive
double other_array[2];
if (rank == 0) {
MPI_Send(&array, 2, MPI_DOUBLE, 1, 99, MPI_COMM_WORLD);
MPI_Recv(&other_array, 2, MPI_DOUBLE, 1, 99, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
// OR
// MPI_Send(array, 2, MPI_DOUBLE, 1, 99, MPI_COMM_WORLD);
// MPI_Recv(other_array, 2, MPI_DOUBLE, 1, 99, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
std::cout << rank << " " << other_array[0] << " " << other_array[1] << std::endl;
} else {
MPI_Recv(&other_array, 2, MPI_DOUBLE, 0, 99, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Send(&array, 2, MPI_DOUBLE, 0, 99, MPI_COMM_WORLD);
// OR
// MPI_Recv(other_array, 2, MPI_DOUBLE, 0, 99, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
// MPI_Send(array, 2, MPI_DOUBLE, 0, 99, MPI_COMM_WORLD);
std::cout << rank << " " << other_array[0] << " " << other_array[1] << std::endl;
}
// Finalisation
int MPI_Finalize();
return 0;
}
I compiled and ran the programm using
mpic++ -O -Wall main.cpp -o main
mpirun -np 2 ./main
I am learning MPI and I am trying to convert my MPI program from Point to Point Communication to MPI Collectives ..
Below is a fragment of my code for Matrix Multiplication using MPI Point to Point communication ...
int i;
if(rank == 0) {
for(i = 1; i < size; i++){
MPI_Send(&rows, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
MPI_Send(&columns, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
}
} else {
MPI_Recv(&rows, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
MPI_Recv(&columns, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
}
int local_block_size = rows / size;
int process, column_pivot;
if(rank == 0) {
for(i = 1; i < size; i++){
MPI_Send((matrix_1D_mapped + (i * (local_block_size * rows))), (local_block_size * rows), MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
MPI_Send((rhs + (i * local_block_size)), local_block_size, MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
}
for(i = 0; i < local_block_size * rows; i++){
matrix_local_block[i] = matrix_1D_mapped[i];
}
for(i = 0; i < local_block_size; i++){
rhs_local_block[i] = rhs[i];
}
} else {
MPI_Recv(matrix_local_block, local_block_size * rows, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
MPI_Recv(rhs_local_block, local_block_size, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
}
I am thinking about replacing MPI_Send with MPI_Bcast ... will that be the correct approach ?
For the first communication that data sent to all receivers is in fact identical, thus MPI_Bcast is the correct approach. The second communication distributes different chunks of a larger array to the recipients, this is done as a collective with MPI_Scatter. Note that scatter includes the root rank in the communication, so you can omit the manual local copy.
The code below tries to finds the max number of an array using mpi. However I keep getting the following error:
Rank 2 in job 47 caused collective abort of all ranks.
Exit status of rank 2 : killed by signal 9
Can anyone please tell me what's wrong?
#include <stdio.h>
#include <stdlib.h>
#include "mpi.h"
int main(int argc , char * argv[])
{
int myRank , numOfProcesses;
int source , destination;
int tag = 0;
int i = 0, j = 0, k = 0;
int masterArray[] = {5,6,8,10,12,3,9,-1,3,7};
int max , globalMax = -100000;
int flag = 0;
MPI_Init(&argc, &argv);
MPI_Status status;
MPI_Comm_rank(MPI_COMM_WORLD , &myRank);
MPI_Comm_size(MPI_COMM_WORLD , &numOfProcesses);
printf("Process : %d \n" , myRank);
int masterSize = sizeof(masterArray)/sizeof(int);
//printf("%d \n" , masterSize);
int slaveSize = masterSize/(numOfProcesses-1);
//printf("%d \n" , slaveSize);
int slaveArray[slaveSize];
if (myRank == 0){
for (i=1; i<numOfProcesses; i++){
for (j=0; j<slaveSize; j++){
slaveArray[j] = masterArray[k];
// printf("%d \n" , masterArray[k]);
k++;
}
MPI_Send(slaveArray, slaveSize, MPI_INT, i, tag, MPI_COMM_WORLD);
}
for (i=1; i<numOfProcesses; i++){
MPI_Recv(max , 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
if (globalMax < max)
max = globalMax;
}
printf("Global Maximum %d \n" , globalMax);
}
else{
MPI_Recv(slaveArray , slaveSize, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
max = slaveArray[0];
for (i=0; i<slaveSize; i++){
if (slaveArray[i] > max)
max = slaveArray[i];
}
printf("Max in %d %d \n" , myRank, max);
MPI_Send(max , 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
}
MPI_Finalize();
return 0;
}
Sending and receiving messages in MPI always works through addresses. In the following:
MPI_Recv(max , 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
...
MPI_Send(max , 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
You use the value. You must add & to take the address.
You should also learn to use the appropriate collective operations: MPI_Scatter and MPI_Reduce.
By the way, this line is also in the wrong order:
max = globalMax;
Please also learn to listen to your compiler! Any reasonable compiler at resonable settings will warn you of passing an integer as an address.
I am trying to simply sum up all variables called "train_hr" and "test_hr" from all 10 processors and store and print the sum on processor 0. I checked to make sure the individual sums are NOT 0 (they are not, they are all in the 1000s). The sum it keeps reporting is 0. I have no idea why. I have looked at many examples of this, and I have done it exactly as instructed. Any help would be appreciated.
double train_hr = 0, test_hr = 0;
double train_hr_global = 0, test_hr_global = 0;
//Master processor
if (my_rank == 0) {
// sends a task to each processor
int curr_task = 0;
for(i = 0; i < num_procs; i++) {
if (curr_task < nsamples_all) {
MPI_Send(&curr_task, 1, MPI_INT, i, 1, MPI_COMM_WORLD);
curr_task++;
}
}
int r;
MPI_Status status;
//keeps sending tasks to processors until there are no more tasks
while (curr_task < nsamples_all) {
MPI_Recv(&r, 1, MPI_INT, MPI_ANY_SOURCE, 1, MPI_COMM_WORLD, &status);
MPI_Send(&curr_task, 1, MPI_INT, status.MPI_SOURCE, 1, MPI_COMM_WORLD);
curr_task++;
}
//tell all processors to stop receiving
int a = -1;
for (i = 0; i < num_procs; i++) {
MPI_Send(&a, 1, MPI_INT, i, 1, MPI_COMM_WORLD);
}
}
//Helper processors
else {
int stop = 1;
while(stop != 0){
int i;
//Receives task OR stop alert from master
MPI_Status status;
MPI_Recv(&i, 1, MPI_INT, 0, 1, MPI_COMM_WORLD, &status);
if (i == -1) {
stop = 0;
}
//computations
else{
float r;
//unimportant computations here
train_hr += r;
test_hr += r;
//Tells master processor it is done
MPI_Send(&i, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
}
}
}
//At this point I checked the current values of train_hr and test_hr on each helper processor. They are all non-zero.
MPI_Reduce(&train_hr, &train_hr_global, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce(&test_hr, &test_hr_global, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
//at this point, the vales of train_hr_global and test_hr_global on the master processor (processor 0) are 0 when they should be the sum of all the processors values.
}
I'm writing program for testing whether numbers are prime. At the beginning I calculate how much numbers assign to each process, then send this amount to the processes. Next, calculations are performed and data send back to process 0 that save the results. Below code works but when I increase number of process my program doesn't speedup. It seems to me that my program doesn't work in parallel. What's wrong? This is my first program in MPI so any advices are welcome.
I use mpich2 an I test my program on Intel Core i7-950.
main.cpp:
if (rank == 0) {
int workers = (size-1);
readFromFile(path);
int elements_per_proc = (N + (workers-1)) / workers;
int rest = N % elements_per_proc;
for (int i=1; i <= workers; i++) {
if((i == workers) && (rest != 0))
MPI_Send(&rest, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
else
MPI_Send(&elements_per_proc, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
}
int it = 1;
for (int i=0; i < N; i++) {
if((i != 0) && ((i % elements_per_proc) == 0))
it++;
MPI_Isend(&input[i], 1, MPI_INT, it, 0, MPI_COMM_WORLD, &send_request);
}
}
if (rank != 0) {
int count;
MPI_Recv(&count, 1, MPI_INT, 0, MPI_ANY_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
for (int j=0; j < count; j++) {
MPI_Recv(&number, 1, MPI_INT, 0, MPI_ANY_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
result = test(number, k);
send_array[0] = number;
send_array[1] = result;
MPI_Send(send_array, 2, MPI_INT, 0, 0, MPI_COMM_WORLD);
}
}
if (rank == 0) {
for (int i=0; i < N; i++) {
MPI_Recv(rec_array, 2, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
// save results
}
}
Your implementation probably doesn't scale well to many processes, since you communicate in every step. You currently communicate the numbers and results for each single input, which incurs a large latency overhead. Instead you should think about communicating the input in-bulk (ie, using a single message).
Furthermore, using MPI collective operations (MPI_Scatter/MPI_Gather) instead of loops of MPI_Send/MPI_Recv might increase your performance further.
Additionally, you can utilize the master process to work on a chunk of the input as well.
A much more scalable implementation might then look as follows:
// tell everybody how many elements there are in total
MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD);
// everybody determines how many elements it will work on
// (include the master process)
int num_local_elements = N / size + (N % size < rank ? 1 : 0);
// allocate local size
int* local_input = (int*) malloc(sizeof(int)*num_local_elements);
// distribute the input from master to everybody using MPI_Scatterv
int* counts; int* displs;
if (rank == 0) {
counts = (int*)malloc(sizeof(int) * size);
displs = (int*)malloc(sizeof(int) * size);
for (int i = 0; i < size; i++) {
counts[i] = N / size + (N % size < i ? 1 : 0);
if (i > 0)
displs[i] = displs[i-1] + counts[i-1];
}
// scatter from master
MPI_Scatterv(input, counts, displs, MPI_INT, local_input, num_local_elements, MPI_INT, 0, MPI_COMM_WORLD);
} else {
// receive scattered numbers
MPI_Scatterv(NULL, NULL, NULL, MPI_DATATYPE_NULL, local_input, num_local_elements, MPI_INT, 0, MPI_COMM_WORLD);
}
// perform prime testing
int* local_results = (int*) malloc(sizeof(int)*num_local_elements);
for (int i = 0; i < num_local_elements; ++i) {
local_results[i] = test(local_input[i], k);
}
// gather results back to master process
int* results;
if (rank == 0) {
results = (int*)malloc(sizeof(int)*N);
MPI_Gatherv(local_results, num_local_elements, MPI_INT, results, counts, displs, MPI_INT, 0, MPI_COMM_WORLD);
// TODO: save results on master process
} else {
MPI_Gatherv(local_results, num_local_elements, MPI_INT, NULL, NULL, NULL, MPI_INT, 0, MPI_COMM_WORLD);
}