======================================
From: Neil Storer <Neil.Storer_at_xxxxxxxxx>
Date: Tue, 13 Jul 2004 11:28:30 +0100
References: <BAY12-F21IEktho3sD100024f5c_at_[hidden]>
There is no way the compiler can know that this is the same as "y = x", but
the LAM runtime libraries do know if the ranks of the source and destination
are the same and optimise this (i.e. dont use TCP/IP or Shared-Mem to do the
copy). The routines can't simply copy "x" to "y" willy-nilly though (what if
another node sent a message earlier? then "y" would be in use and you would
overwrite it!).
=======================================
To what extent is MPI optimized fort local Send/Recv?
I have the following MPI program, which I compiled/ran using LAM-MPI 6.5.9
and gcc 3.2 on a Sun workstation. I used ISend to avoid deadlock. The time
used if I used ISend/Recv is significantly more than the time if I copied
between buffers. Could you see if I did anything wrong? Or can I conclude
that there is still large room for improving the optimization if the
Send/Recv are on a local node?
#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include <mpi.h>
// This simple example shows that
// with MPI one node can send to itself.
// But the cost of sending to onself
// is high.
#define MPI 1
#define DIRECT_COPY 0
int main(int argc, char **argv) {
int i, j;
int N;
int rank;
int num_processors;
int recv_cnt=0;
double start_time, end_time;
float x[3], y[3], sum;
float *a;
MPI_Status status;
MPI_Request request;
/* initialize MPI variables and assign ranks to processors */
#if(MPI)
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &num_processors);
printf("***** num_processors: %d, rank: %d *****\n\n",
num_processors, rank);
#endif //(MPI)
if(argc != 2) {
printf("Usage: a.out N\n");
#if(MPI)
MPI_Finalize();
#endif
return -1;
}
else {
N = atoi(argv[1]);
}
a = (float *)malloc((N+1)*sizeof(float));
init(N, a);
start_time = MPI_Wtime();
// original sequential algorithm
// "left-looking"
for(j=2; j<=N; j++) {
for(i=1; i<=j-1; i++) {
#if(MPI)
x[0] = a[j];
x[1] = j;
x[2] = i;
#if(!DIRECT_COPY)
MPI_Isend(&x, 3, MPI_FLOAT, rank, 1, MPI_COMM_WORLD, &request);
MPI_Recv(&y, 3, MPI_FLOAT, rank, MPI_ANY_TAG,
MPI_COMM_WORLD, &status);
#else
y[0] = x[0];
y[1] = x[1];
y[2] = x[2];
#endif //(!DIRECT_COPY)
j = y[1];
i = y[2];
a[j] = y[0];
recv_cnt ++;
#endif //(MPI)
a[j] = j*(a[j]+a[i])/(j+i);
}
#if(MPI)
x[0] = a[j];
x[1] = j;
x[2] = i;
#if(!DIRECT_COPY)
MPI_Isend(&x, 3, MPI_FLOAT, rank, 1, MPI_COMM_WORLD, &request);
MPI_Recv(&y, 3, MPI_FLOAT, rank, MPI_ANY_TAG,
MPI_COMM_WORLD, &status);
#else
y[0] = x[0];
y[1] = x[1];
y[2] = x[2];
#endif // (!DIRECT_COPY)
j = y[1];
i = y[2];
a[j] = y[0];
recv_cnt ++;
#endif // (MPI)
}
end_time = MPI_Wtime();
printf("time = %f\n", end_time-start_time);
sum = 0.0;
for(j=1; j<=N; j++) {
sum += a[j];
}
printf("sum: %f\n", sum);
free(a);
#if(MPI)
printf("recv_cnt: %d\n", recv_cnt);
MPI_Finalize();
#endif
return 0;
}
int init(int n, float a[]) {
int j;
for(j=1; j<=n; j++) {
a[j] = j-1;
}
return 0;
}
_________________________________________________________________
MSN Toolbar provides one-click access to Hotmail from any Web page FREE
download! http://toolbar.msn.click-url.com/go/onm00200413ave/direct/01/
|