Say 2 processes are participating. Process 0 (rank 0) has
A = { a d
b e
c f
}
and process 1 (rank 1) has
A = { g
h
i
}
I want both processors to send these columns to rank 0 so that rank 0 will have the following in say another 2D-array.
B = { a d g
b e h
c f i
}
I create a new column data type for the MPI_Gatherv and am trying the following code, which is getting me no where.
My specific questions are:
- How should I approach this
- What should be the send_type and recv_type.
- How should displacements be specified ( should they be in term of the new data type or MPI_CHAR)
Thanks.
This is my code:
#include <stdio.h>
#include <mpi.h>
int main(int argc, char *argv[])
{
int numprocs, my_rank;
long int i, j;
MPI_Status status;
char **A;
char **B;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
if(my_rank == 0)
{
A = (char **)calloc((3), sizeof(char *));
B = (char **)calloc((3), sizeof(char *));
for(i=0; i<3; ++i)
{
A[i] = (char *)calloc(2, sizeof(char));
B[i] = (char *)calloc(3, sizeof(char));
}
A[0][0] = 'a';
A[1][0] = 'b';
A[2][0] = 'c';
A[0][1] = 'd';
A[1][1] = 'e';
A[2][1] = 'f';
}
else
{
A = (char **)calloc((3), sizeof(char *));
for(i=0; i<3; ++i)
{
A[i] = (char *)calloc(1, sizeof(char));
}
A[0][0] = 'g';
A[1][0] = 'h';
A[2][0] = 'i';
}
MPI_Datatype b_col_type;
MPI_Type_vector(3, 1, 1, MPI_CHAR, &b_col_type);
MPI_Type_commit(&b_col_type);
int displs[2] = {0, 2};
int recvcounts[2] = {2, 1};
MPI_Gatherv(&A[0][0], recvcounts[my_rank], b_col_type, &B[0][0], recvcounts, displs, b_col_type, 0, MPI_COMM_WORLD);
if(my_rank == 0)
{
for(i=0; i<3; ++i)
{
for(j=0; j<3; ++j)
printf("%c ", B[i][j]);
printf("\n");
}
}
MPI_Finalize();
return 0;
}
So first off - and this comes up with MPI and C arrays all the time - you can't really do the standard C two dimensional array thing. Let's look at this:
A = (char **)calloc((3), sizeof(char *));
for(i=0; i<3; ++i)
{
A[i] = (char *)calloc(2, sizeof(char));
}
This will definately allocate a 3x2 array of characters, but you have no idea how the resulting data is laid out in memory. In particular, there's no guarantee at all that A[1][0]
immediately follows A[0][1]
. That makes it very difficult to create MPI datatypes which span the data structure! You need to allocate 3x2 contiguous bytes, and then make the array point into it:
char **charalloc2d(int n, int m) {
char *data = (char *)calloc(n*m,sizeof(char));
char **array = (char **)calloc(n, sizeof(char *));
for (int i=0; i<n; i++)
array[i] = &(data[i*m]);
return array;
}
void charfree2d(char **array) {
free(array[0]);
free(array);
return;
}
/* ... */
nrows = 3;
ncols = 2;
A = charalloc2d(nrows,ncols);
Now we know something about the layout of the array, and can depend on that to build datatypes.
You're on the right track with the data types --
MPI_Datatype b_col_type;
MPI_Type_vector(3, 1, 1, MPI_CHAR, &b_col_type);
MPI_Type_commit(&b_col_type);
the signature of MPI_Type_vector is (count, blocklen, stride, old_type, *newtype).
We want nrows characters, that come in blocks of 1; but they're spaced ncols apart; so that's the stride.
Note that this is really the column type of the A
array, rather than B
; the type will depend on the number of columns in the array. So each process is using a different sendtype, which is fine.
MPI_Datatype a_col_type;
MPI_Type_vector(nrows, 1, ncols, MPI_CHAR, &a_col_type);
MPI_Type_commit(&a_col_type);
The final step is the MPI_Gatherv
, and here you have to be a little cute. The trick is, we want to send (and receive) multiple of these things at a time - that is, several consecutive ones. But we need the next column not to be nrows*ncols chars away, but just one char away. Luckily, we can do that by setting the upper bound of the data structure to be just one character away from the lower bound, so that the next element does start in the right place. This is allowed by the standard, and in fact one of their examples in section 4.1.4 there hinges on it.
To do that, we create a resized type that ends just one byte after it starts:
MPI_Type_create_resized(a_col_type, 0, 1*sizeof(char), &new_a_col_type);
MPI_Type_commit(&new_a_col_type);
and similarly for B
; and now we can send and recieve multiples of these as one would expect. So the following works for me:
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
char **charalloc2d(int n, int m) {
char *data = (char *)calloc(n*m,sizeof(char));
char **array = (char **)calloc(n, sizeof(char *));
for (int i=0; i<n; i++)
array[i] = &(data[i*m]);
return array;
}
void charfree2d(char **array) {
free(array[0]);
free(array);
return;
}
int main(int argc, char *argv[])
{
int numprocs, my_rank;
int nrows, ncols, totncols;
long int i, j;
char **A;
char **B;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
if(my_rank == 0)
{
nrows=3;
ncols=2;
totncols = 3;
A = charalloc2d(nrows, ncols);
B = charalloc2d(nrows, totncols);
A[0][0] = 'a';
A[1][0] = 'b';
A[2][0] = 'c';
A[0][1] = 'd';
A[1][1] = 'e';
A[2][1] = 'f';
}
else
{
nrows = 3;
ncols = 1;
A = charalloc2d(nrows, ncols);
B = charalloc2d(1,1); /* just so gatherv survives */
A[0][0] = 'g';
A[1][0] = 'h';
A[2][0] = 'i';
}
MPI_Datatype a_col_type, new_a_col_type;
MPI_Type_vector(nrows, 1, ncols, MPI_CHAR, &a_col_type);
MPI_Type_commit(&a_col_type);
/* make the type have extent 1 character -- now the next
* column starts in the next character of the array
*/
MPI_Type_create_resized(a_col_type, 0, 1*sizeof(char), &new_a_col_type);
MPI_Type_commit(&new_a_col_type);
MPI_Datatype b_col_type, new_b_col_type;
if (my_rank == 0) {
MPI_Type_vector(nrows, 1, totncols, MPI_CHAR, &b_col_type);
MPI_Type_commit(&b_col_type);
/* similarly "resize" b columns */
MPI_Type_create_resized(b_col_type, 0, 1*sizeof(char), &new_b_col_type);
MPI_Type_commit(&new_b_col_type);
}
int displs[2] = {0, 2};
int recvcounts[2] = {2, 1};
MPI_Gatherv(A[0], recvcounts[my_rank], new_a_col_type,
B[0], recvcounts, displs, new_b_col_type,
0, MPI_COMM_WORLD);
if(my_rank == 0)
{
for(i=0; i<3; ++i)
{
for(j=0; j<3; ++j)
printf("%c ", B[i][j]);
printf("\n");
}
}
MPI_Finalize();
return 0;
}