Hi,
I'm trying to code a SPMD program which either become a server if the
service SERVER is already published or become a client if not.
This program runs as I expected but I havn't found any answers
(man,mailing,doc) to these questions :
--- Is there a distributed lock within LAM daemons at publish time ?
--- Is the publish/unpublish/publish sequence for the same service gives
always the same port ?
--- How to get the MPI_Comm_connect primitive failed after a timeout if
it doesn't connect ?
--- Is there a way to ask for a list of already published services ?
--- How many processes can be simultaneously running in LAM universe ?
--- What is the maximum number of Connect requests that can be waiting
for an accept ?
Is someone can help ?
Thanks a lot.
Eric Fayolle
EDF R&D.
Here is the program :
#include "mpi.h"
#include "stdio.h"
#include <errno.h>
#define TIMEOUT 20
#define MAX_DATA 100
#define ITERATIONS 10000
int main( int argc, char **argv )
{
MPI_Comm client,server;
MPI_Status status;
char port_name [MPI_MAX_PORT_NAME];
char port_name_clt [MPI_MAX_PORT_NAME];
double buf [MAX_DATA];
int i=0,it=0,size, again, myrank;
int clt=0, srv=0;
MPI_Init( &argc, &argv );
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Open_port(MPI_INFO_NULL, port_name);
/* Try to be a client. If there is no service SERVER published, try to
be a server */
/* Lookup for service SERVER for TIMEOUT seconds*/
MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
while ( i != TIMEOUT ) {
/* A lookup for an unpublished service generate an error */
if ( MPI_Lookup_name("SERVER", MPI_INFO_NULL, port_name_clt) ==
MPI_SUCCESS ) {
printf("Rank %i : I get the service SERVER !\n",myrank);
clt = 1;
break;
}
/* Does LAM authorize two simultaneous call to MPI_Publish_name
or is there a distributed lock ?
*/
if ( MPI_Publish_name("SERVER", MPI_INFO_NULL, port_name) ==
MPI_SUCCESS ) {
printf("Rank %i : I've managed to publish the service SERVER
!\n",myrank);
srv = 1;
break;
}
/* sleep(1); */
i++;
printf("Rank %i : Waiting 1s for getting/for publishing service
SERVER !\n",myrank);
}
MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_ARE_FATAL);
if ( i == TIMEOUT ) {
printf("Waiting too long exiting !\n");
MPI_Close_port(port_name);
MPI_Finalize();
exit(-1);
}
if ( srv ) { /*I am the Master */
printf("server available at %s\n",port_name);
while (1) {
printf("Rank %i : Before MPI_Comm_accept\n",myrank);
/*
Remember MPI_Comm_accept is a collective operation :
- If we use mpirun -c n -----> we have to use
MPI_COMM_SELF
- If we use client_server & client_server ----> we can use both
MPI_COMM_SELF,MPI_COMM_WORLD
*/
MPI_Comm_accept( port_name, MPI_INFO_NULL, 0, MPI_COMM_SELF,
&client );
printf("Rank %i : After MPI_Comm_accept\n",myrank);
again = 1;
while (again) {
MPI_Recv( buf, MAX_DATA, MPI_DOUBLE, MPI_ANY_SOURCE,
MPI_ANY_TAG, client, &status );
switch (status.MPI_TAG) {
case 2:
printf("Rank %i : Doing something.....\n",myrank);
it++;
if ( it < ITERATIONS )
break;
else {
printf("Rank %i : Max iterations reached, exiting...\n",myrank);
/*go to case 0 (no break) */
};
case 0:
printf("Rank %i : Exiting...\n",myrank);
MPI_Comm_free( &client );
MPI_Unpublish_name("SERVER", MPI_INFO_NULL, port_name);
MPI_Close_port( port_name );
MPI_Finalize();
return 0;
case 1:
/* The client asked for a deconnection */
MPI_Comm_disconnect( &client );
again = 0;
break;
default: /* Unexpected message type */
MPI_Abort( MPI_COMM_WORLD, 1 );
}
}
}
} else { /*I am a Slave */
/* Since I'm not a Master I close the unused openned SERVER port */
printf("Rank %i : Closing unused server port %s\n",myrank,port_name);
MPI_Close_port( port_name );
/* Connect to service SERVER, get the inter-communicator server*/
/* Attention MPI_Comm_connect est un appel collectif :
- Si lancement mpirun -c n -----> uniquement MPI_COMM_SELF
fonctionne
- Si lancement client_server&client_server ----> MPI_COMM_WORLD
fonctionne */
/* TIMEOUT is inefficient since MPI_Comm_Connect doesn't return if
we asked for
a service that has been unpublished !*/
MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
i = 0;
while ( i != TIMEOUT ) {
printf("Rank %i : Trying to get the connection with SERVER %s
!\n",myrank, port_name_clt);
if ( MPI_Comm_connect(port_name_clt, MPI_INFO_NULL, 0,
MPI_COMM_SELF, &server )
== MPI_SUCCESS ) {
printf("Rank %i : I get the connection with SERVER %s
!\n",myrank, port_name_clt);
break;
}
/* sleep(1); */
i++;
printf("Rank %i : Waiting 1s for connecting to service SERVER %s
!\n",myrank,port_name_clt);
}
MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_ARE_FATAL);
if ( i == TIMEOUT ) {
printf("Waiting too long exiting !\n");
MPI_Finalize();
exit(-1);
}
for (i=0;i<10;i++) {
printf("Performing Task %i\n",i);
/*Ask the server to work (TAG==2)*/
MPI_Send( buf, 0, MPI_DOUBLE, 0, 2, server ); /* etc */
}
/*Disconnect from the SERVER (TAG==1) */
MPI_Send( buf, 0, MPI_DOUBLE, 0, 1, server );
MPI_Comm_disconnect( &server );
MPI_Finalize();
}
}
|