LAM/MPI logo

LAM/MPI General User's Mailing List Archives

  |   Home   |   Download   |   Documentation   |   FAQ   |   all just in this list

From: Jeff Squyres (jsquyres_at_[hidden])
Date: 2008-06-17 10:29:38


To follow up for the web archives, this was answered on the OMPI users
list:

     http://www.open-mpi.org/community/lists/users/2008/06/5880.php

On Jun 12, 2008, at 11:16 AM, Siegmar Gross wrote:

> Hi,
>
> I have a problem with derived data types and MPI_Scatter/MPI_Gather
> (Solaris 10 sparc, LAM-MPI 7.1.4).
>
> I want to distribute the columns of a matrix. At first I wrote a C
> program which implemented the derived data type "coltype" and
> distributed
> the columns via MPI_Send/MPI_Recv without problems. Next I modified
> the
> program and used MPI_Scatter/MPI_Gather to distribute and collect the
> columns. I implemented "coltype" once more with MPI_Type_struct. The
> program didn't work, so I used a 2x2 matrix to figure out what's
> wrong.
> Each process prints its column elements after MPI_Scatter. The process
> with rank 1 didn't get the values "2" and "4" (see below), but more or
> less 0. Now I used a 4x2 matrix and still a 2-element column (so I
> should
> see the upper 2x2 "matrix" in my columns) to get an idea which values
> are used for process 1. As you can see below it got "5" and "7", i.e.
> the values of the block which starts just after the first block and
> not
> the values of the block which starts after the first element of the
> first block (a[2][0] instead of a[0][1]).
>
> Since I wasn't sure if I could use MPI_Type_struct I rewrote the
> program
> with MPI_Type_vector. This time the result was better but still not
> satisfying. Process 1 got values from the second column but one
> value too
> late (starting with a[1][1] instead of a[1][0]).
>
> I assume that I have misunderstood a concept or I have a programming
> error in my code, because I run into the same problem with MPICH,
> MPICH2, and OpenMPI, and it is not very likely that all
> implementations
> have a bug. Since I dont't know how to proceed, I would be very
> grateful
> if someone could tell me if I must blame myself for the error or if it
> is eventually a bug in the implementations of the MPI libraries (how
> unlikely it is).
>
>
> MPI_Type_struct
> ===============
>
> tyr e5 158 mpicc e5_1a.c
> tyr e5 159 mpirun -np 2 a.out
>
> original matrix:
>
> 1 2
> 3 4
>
> rank: 0 c0: 1 c1: 3
> rank: 1 c0: 5.51719e-313 c1: 4.24399e-314
>
>
> tyr e5 160 mpicc e5_1a.c
> tyr e5 161 mpirun -np 2 a.out
>
> original matrix:
>
> 1 2
> 3 4
> 5 6
> 7 8
>
> rank: 0 c0: 1 c1: 3
> rank: 1 c0: 5 c1: 7
>
>
>
> MPI_Type_vector
> ===============
>
> tyr e5 119 mpicc e5_1b.c
> tyr e5 120 mpirun -np 2 N a.out
>
> original matrix:
>
> 1 2
> 3 4
> 5 6
> 7 8
>
> rank: 0 c0: 1 c1: 3
> rank: 1 c0: 4 c1: 6
>
>
> Thank you very much for any help or suggestions in advance.
>
>
> Kind regards
>
> Siegmar
>
> #include <stdio.h>
> #include <stdlib.h>
> #include "mpi.h"
>
> #define P 4 /* # of rows */
> #define Q 2 /* # of columns */
> #define FAKTOR 2 /* multiplicator for col. elem. */
>
> static void print_matrix (int p, int q, double **mat);
>
> int main (int argc, char *argv[])
> {
> int ntasks, /* number of parallel tasks */
> mytid, /* my task id */
> namelen, /* length of processor name */
> i, j, /* loop variables */
> tmp; /* temporary value */
> double matrix[P][Q],
> column[2];
> /* column[P];*/
> int blockcounts[2] = {1, 1};
>
> MPI_Datatype types[2] = {MPI_DOUBLE, MPI_UB};
> MPI_Aint offsets[2] = {0, Q * sizeof (double)};
> MPI_Datatype coltype;
>
> MPI_Init (&argc, &argv);
> MPI_Comm_rank (MPI_COMM_WORLD, &mytid);
> MPI_Comm_size (MPI_COMM_WORLD, &ntasks);
> /* check that we have the correct number of processes in our
> universe */
> if (mytid == 0)
> {
> if (ntasks != Q)
> {
> printf ("\n\nWe need exactly %d processes.\n"
> "Usage: mpirun -w -np %d N %s\n\n\n",
> Q, Q, argv[0]);
> }
> }
> if (ntasks != Q)
> {
> MPI_Finalize ();
> exit (EXIT_FAILURE);
> }
> /* build the new type for a strided vector */
> MPI_Type_struct (2, blockcounts, offsets, types, &coltype);
> MPI_Type_commit (&coltype);
> if (mytid == 0)
> {
> tmp = 1;
> for (i = 0; i < P; ++i) /* initialize matrix */
> {
> for (j = 0; j < Q; ++j)
> {
> matrix[i][j] = tmp++;
> }
> }
> printf ("\n\noriginal matrix:\n\n");
> print_matrix (P, Q, (double **) matrix);
> }
> /* distribute columns */
> MPI_Scatter (matrix, 2, coltype, column, 2, MPI_DOUBLE, 0,
> /* MPI_Scatter (matrix, P, coltype, column, P, MPI_DOUBLE, 0,*/
> MPI_COMM_WORLD);
> printf ("rank: %d c0: %g c1: %g\n", mytid, column[0], column[1]);
> for (i = 0; i < 2; ++i)
> /* for (i = 0; i < P; ++i)*/
> {
> if ((mytid % 2) == 0)
> {
> column[i] = column[i] * column[i];
> }
> else
> {
> column[i] = column[i] * FAKTOR;
> }
> }
> /* wait for result vectors */
> MPI_Gather (column, 2, MPI_DOUBLE, matrix, 2, coltype, 0,
> /* MPI_Gather (column, P, MPI_DOUBLE, matrix, P, coltype, 0,*/
> MPI_COMM_WORLD);
> if (mytid == 0)
> {
> printf ("\n\nresult matrix:\n"
> "(odd columns: elements squared; even columns: elements "
> "multiplied with %d)\n\n", FAKTOR);
> print_matrix (P, Q, (double **) matrix);
> }
> MPI_Type_free (&coltype);
> MPI_Finalize ();
> return EXIT_SUCCESS;
> }
>
>
> /* print the values of a matrix
> *
> * input parameters: p number of rows
> * q number of columns
> * mat 2D-matrix of "double" values
> * output parameters: none
> * return value: none
> * side effects: none
> *
> */
> void print_matrix (int p, int q, double **mat)
> {
> int i, j; /* loop variables */
>
> for (i = 0; i < p; ++i)
> {
> for (j = 0; j < q; ++j)
> {
> printf ("%6g", *((double *) mat + i * q + j));
> }
> printf ("\n");
> }
> printf ("\n");
> }
>
> #include <stdio.h>
> #include <stdlib.h>
> #include "mpi.h"
>
> #define P 4 /* # of rows */
> #define Q 2 /* # of columns */
> #define FAKTOR 2 /* multiplicator for col. elem. */
>
> static void print_matrix (int p, int q, double **mat);
>
> int main (int argc, char *argv[])
> {
> int ntasks, /* number of parallel tasks */
> mytid, /* my task id */
> namelen, /* length of processor name */
> i, j, /* loop variables */
> tmp; /* temporary value */
> double matrix[P][Q],
> column[2];
> MPI_Datatype coltype;
>
> MPI_Init (&argc, &argv);
> MPI_Comm_rank (MPI_COMM_WORLD, &mytid);
> MPI_Comm_size (MPI_COMM_WORLD, &ntasks);
> /* check that we have the correct number of processes in our
> universe */
> if (mytid == 0)
> {
> if (ntasks != Q)
> {
> printf ("\n\nWe need exactly %d processes.\n"
> "Usage: mpirun -w -np %d N %s\n\n\n",
> Q, Q, argv[0]);
> }
> }
> if (ntasks != Q)
> {
> MPI_Finalize ();
> exit (EXIT_FAILURE);
> }
> /* build the new type for a strided vector */
> MPI_Type_vector (2, 1, Q, MPI_DOUBLE, &coltype);
> MPI_Type_commit (&coltype);
> if (mytid == 0)
> {
> tmp = 1;
> for (i = 0; i < P; ++i) /* initialize matrix */
> {
> for (j = 0; j < Q; ++j)
> {
> matrix[i][j] = tmp++;
> }
> }
> printf ("\n\noriginal matrix:\n\n");
> print_matrix (P, Q, (double **) matrix);
> }
> /* distribute columns */
> MPI_Scatter (matrix, 1, coltype, column, 2, MPI_DOUBLE, 0,
> MPI_COMM_WORLD);
> printf ("rank: %d c0: %g c1: %g\n", mytid, column[0], column[1]);
> for (i = 0; i < 2; ++i)
> {
> if ((mytid % 2) == 0)
> {
> column[i] = column[i] * column[i];
> }
> else
> {
> column[i] = column[i] * FAKTOR;
> }
> }
> /* wait for result vectors */
> MPI_Gather (column, 2, MPI_DOUBLE, matrix, 1, coltype, 0,
> MPI_COMM_WORLD);
> if (mytid == 0)
> {
> printf ("\n\nresult matrix:\n"
> "(odd columns: elements squared; even columns: elements "
> "multiplied with %d)\n\n", FAKTOR);
> print_matrix (P, Q, (double **) matrix);
> }
> MPI_Type_free (&coltype);
> MPI_Finalize ();
> return EXIT_SUCCESS;
> }
>
>
> /* print the values of a matrix
> *
> * input parameters: p number of rows
> * q number of columns
> * mat 2D-matrix of "double" values
> * output parameters: none
> * return value: none
> * side effects: none
> *
> */
> void print_matrix (int p, int q, double **mat)
> {
> int i, j; /* loop variables */
>
> for (i = 0; i < p; ++i)
> {
> for (j = 0; j < q; ++j)
> {
> printf ("%6g", *((double *) mat + i * q + j));
> }
> printf ("\n");
> }
> printf ("\n");
> }
> _______________________________________________
> This list is archived at http://www.lam-mpi.org/MailArchives/lam/

-- 
Jeff Squyres
Cisco Systems