OpenMPI strange output error

2019-07-24 05:01发布

I am using OpenMPI 1.3 on a small cluster.

This is the function that I am calling:

void invertColor_Parallel(struct image *im, int size, int rank)
{
     int i,j,aux,r;

     int total_pixels = (*im).ih.width * (*im).ih.height;
     int qty = total_pixels/(size-1);
     int rest = total_pixels % (size-1);
     MPI_Status status;

     //printf("\n%d\n", rank);

     if(rank == 0)
     {
         for(i=1; i<size; i++){
         j = i*qty - qty;
         aux = j;

         if(rest != 0 && i==size-1) {qty=qty+rest;} //para distrubuir toda la carga
         //printf("\nj: %d  qty: %d  rest: %d\n", j, qty, rest);

         MPI_Send(&aux, 1, MPI_INT, i, MASTER_TO_SLAVE_TAG+1, MPI_COMM_WORLD);
         MPI_Send(&qty, 1, MPI_INT, i, MASTER_TO_SLAVE_TAG+2, MPI_COMM_WORLD);

         MPI_Send(&(*im).array[j], qty*3, MPI_BYTE, i, MASTER_TO_SLAVE_TAG, MPI_COMM_WORLD);
        }

     }
     else
     {
        MPI_Recv(&aux, 1, MPI_INT, MPI_ANY_SOURCE, MASTER_TO_SLAVE_TAG+1, MPI_COMM_WORLD,&status);
        MPI_Recv(&qty, 1, MPI_INT, MPI_ANY_SOURCE, MASTER_TO_SLAVE_TAG+2, MPI_COMM_WORLD,&status);

        pixel *arreglo = (pixel *)calloc(qty, sizeof(pixel));
        MPI_Recv(&arreglo[0], qty*3, MPI_BYTE, MPI_ANY_SOURCE, MASTER_TO_SLAVE_TAG, MPI_COMM_WORLD,&status);
        //printf("Receiving node=%d, message=%d\n", rank, aux);

        for(i=0;i<qty;i++)
        {
            arreglo[i].R = 255-arreglo[i].R;
            arreglo[i].G = 255-arreglo[i].G;
            arreglo[i].B = 255-arreglo[i].B;
        }

        MPI_Send(&aux, 1, MPI_INT, 0, SLAVE_TO_MASTER_TAG+1, MPI_COMM_WORLD);
        MPI_Send(&qty, 1, MPI_INT, 0, SLAVE_TO_MASTER_TAG+2, MPI_COMM_WORLD);
        MPI_Send(&arreglo[0], qty*3, MPI_BYTE, 0, SLAVE_TO_MASTER_TAG, MPI_COMM_WORLD);

        free(arreglo);
     }


    if (rank==0){
        //printf("\nrank: %d\n", rank);
        for (i=1; i<size; i++) // untill all slaves have handed back the processed data
        {
            MPI_Recv(&aux, 1, MPI_INT, MPI_ANY_SOURCE, SLAVE_TO_MASTER_TAG+1, MPI_COMM_WORLD,&status);
            MPI_Recv(&qty, 1, MPI_INT, MPI_ANY_SOURCE, SLAVE_TO_MASTER_TAG+2, MPI_COMM_WORLD,&status);
            MPI_Recv(&(*im).array[aux], qty*3, MPI_BYTE, MPI_ANY_SOURCE, SLAVE_TO_MASTER_TAG, MPI_COMM_WORLD,&status);
        }
    }
}

int main(int argc, char *argv[])
{
    //////////time counter
    clock_t begin;


    int rank, size;

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Status status;

    int op = (int)atof(argv[1]);
    char filename_toload[50];
    int bright_number=0;
    struct image image2;

    if (rank==0)
    {
        loadImage(&image2, argv[2]);
    }

    //Broadcast the user's choice to all other ranks
    MPI_Bcast(&op, 1, MPI_INT, 0, MPI_COMM_WORLD);


    switch(op)
    {
        case 1:
                if (rank==0) {begin = clock();}
                MPI_Barrier(MPI_COMM_WORLD);
                invertColor_Parallel(&image2, size, rank);
                MPI_Barrier(MPI_COMM_WORLD);
                if (rank==0) {runningTime(begin, clock()); printf("Se invirtieron los colores de la imagen\n\n");}
                break;
    }

    MPI_Barrier(MPI_COMM_WORLD);

    if (rank==0)
    {
        saveImage(&image2, argv[3]);
        free(image2.array);
    }

    MPI_Finalize();

    return 0;
}

and sometimes I get the following error.

cluster@maestro:/mpi$ mpirun -np 60 -hostfile /home/hostfile paralelo
1 image.bmp out.bmp

cluster@nodo1's password:
[maestro:5194] *** An error occurred in MPI_Recv
[maestro:5194] *** on communicator MPI_COMM_WORLD
[maestro:5194] *** MPI_ERR_TRUNCATE: message truncated
[maestro:5194] *** MPI_ERRORS_ARE_FATAL (your MPI job will now abort)
--------------------------------------------------------------------------
mpirun has exited due to process rank 0 with PID 5194 on node maestro
exiting without calling "finalize". This may have caused other
processes in the application to be terminated by signals sent by
mpirun (as reported here).
--------------------------------------------------------------------------
[nodo1] [[49223,1],55][../../../../../../ompi/mca/btl/tcp/btl_tcp_frag.c:216:mca_btl_tcp_frag_recv] mca_btl_tcp_frag_recv: readv failed: Connection reset by peer (104)

It depends on the process count whether it will throw an error or not, e.g. with -np 99 it works pretty good.

Any idea on what is going on?

标签: c openmpi
1条回答
霸刀☆藐视天下
2楼-- · 2019-07-24 05:25

This code is probably the culprit:

if (rank==0){
    //printf("\nrank: %d\n", rank);
    for (i=1; i<size; i++) // untill all slaves have handed back the processed data
    {
        MPI_Recv(&aux, 1, MPI_INT, MPI_ANY_SOURCE, SLAVE_TO_MASTER_TAG+1, MPI_COMM_WORLD,&status);
        MPI_Recv(&qty, 1, MPI_INT, MPI_ANY_SOURCE, SLAVE_TO_MASTER_TAG+2, MPI_COMM_WORLD,&status);
        MPI_Recv(&(*im).array[aux], qty*3, MPI_BYTE, MPI_ANY_SOURCE, SLAVE_TO_MASTER_TAG, MPI_COMM_WORLD,&status);
    }
}

Since you are (ab-)using MPI_ANY_SOURCE, you are essentially creating the perfect conditions for message reception races. It is entirely possible that the first MPI_Recv matches a message from rank i, the second one matches a message from rank j and the third one matches a message from rank k, where i, j, and k have completely different values. Therefore it is possible that you receive the wrong number of pixels into the wrong image slot. Also, if it happens that rank k sends more pixels than the value of qty from rank j specifies, you'll get a truncation error (and you are actually getting it). A word of advice: never use MPI_ANY_SOURCE frivolously unless absolutely sure that the algorithm is correct and no races could occur.

Either rewrite the code as:

if (rank==0){
    //printf("\nrank: %d\n", rank);
    for (i=1; i<size; i++) // untill all slaves have handed back the processed data
    {
        MPI_Recv(&aux, 1, MPI_INT, i, SLAVE_TO_MASTER_TAG+1, MPI_COMM_WORLD, &status);
        MPI_Recv(&qty, 1, MPI_INT, i, SLAVE_TO_MASTER_TAG+2, MPI_COMM_WORLD, &status);
        MPI_Recv(&(*im).array[aux], qty*3, MPI_BYTE, i, SLAVE_TO_MASTER_TAG, MPI_COMM_WORLD, &status);
    }
}

or even better as:

if (rank==0){
    //printf("\nrank: %d\n", rank);
    for (i=1; i<size; i++) // untill all slaves have handed back the processed data
    {
        MPI_Recv(&aux, 1, MPI_INT, MPI_ANY_SOURCE, SLAVE_TO_MASTER_TAG+1,
                 MPI_COMM_WORLD, &status);
        MPI_Recv(&qty, 1, MPI_INT, status.MPI_SOURCE, SLAVE_TO_MASTER_TAG+2,
                 MPI_COMM_WORLD, &status);
        MPI_Recv(&(*im).array[aux], qty*3, MPI_BYTE, status.MPI_SOURCE, SLAVE_TO_MASTER_TAG,
                 MPI_COMM_WORLD, &status);
    }
}

That way the three receives will always get messages from the same process and the race condition will be eliminated. The way the second version works is that it first receives a message from any rank but then uses the status.MPI_SOURCE field to get the actual rank and use it for the following receive.

查看更多
登录 后发表回答