LAM/MPI logo

LAM/MPI General User's Mailing List Archives

  |   Home   |   Download   |   Documentation   |   FAQ   |   all just in this list

From: Eric FAYOLLE (fayolle_at_[hidden])
Date: 2004-03-22 09:48:46


Hi,

I'm trying to code a SPMD program which either become a server if the
service SERVER is already published or become a client if not.
This program runs as I expected but I havn't found any answers
(man,mailing,doc) to these questions :

--- Is there a distributed lock within LAM daemons at publish time ?
--- Is the publish/unpublish/publish sequence for the same service gives
always the same port ?
--- How to get the MPI_Comm_connect primitive failed after a timeout if
it doesn't connect ?
--- Is there a way to ask for a list of already published services ?
--- How many processes can be simultaneously running in LAM universe ?
--- What is the maximum number of Connect requests that can be waiting
for an accept ?

Is someone can help ?

Thanks a lot.
Eric Fayolle
EDF R&D.

Here is the program :

#include "mpi.h"
#include "stdio.h"
#include <errno.h>

#define TIMEOUT 20
#define MAX_DATA 100
#define ITERATIONS 10000

int main( int argc, char **argv )
{
  MPI_Comm client,server;
  MPI_Status status;

  char port_name [MPI_MAX_PORT_NAME];
  char port_name_clt [MPI_MAX_PORT_NAME];
  double buf [MAX_DATA];
  int i=0,it=0,size, again, myrank;
  int clt=0, srv=0;

  MPI_Init( &argc, &argv );
 
  MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
  MPI_Comm_size(MPI_COMM_WORLD, &size);

  MPI_Open_port(MPI_INFO_NULL, port_name);

  /* Try to be a client. If there is no service SERVER published, try to
be a server */
  /* Lookup for service SERVER for TIMEOUT seconds*/
  MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
  while ( i != TIMEOUT ) {

    /* A lookup for an unpublished service generate an error */
     if ( MPI_Lookup_name("SERVER", MPI_INFO_NULL, port_name_clt) ==
MPI_SUCCESS ) {
       printf("Rank %i : I get the service SERVER !\n",myrank);
       clt = 1;
       break;
     }
    
     /* Does LAM authorize two simultaneous call to MPI_Publish_name
        or is there a distributed lock ?
     */
     if ( MPI_Publish_name("SERVER", MPI_INFO_NULL, port_name) ==
MPI_SUCCESS ) {
       printf("Rank %i : I've managed to publish the service SERVER
!\n",myrank);
       srv = 1;
       break;
     }
     /* sleep(1); */
     i++;
     printf("Rank %i : Waiting 1s for getting/for publishing service
SERVER !\n",myrank);

  }
  MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_ARE_FATAL);
 
  if ( i == TIMEOUT ) {
    printf("Waiting too long exiting !\n");
    MPI_Close_port(port_name);
    MPI_Finalize();
    exit(-1);
  }

 
  if ( srv ) { /*I am the Master */

    printf("server available at %s\n",port_name);

    while (1) {
     
      printf("Rank %i : Before MPI_Comm_accept\n",myrank);
      /*
        Remember MPI_Comm_accept is a collective operation :
      - If we use mpirun -c n -----> we have to use
MPI_COMM_SELF
      - If we use client_server & client_server ----> we can use both
MPI_COMM_SELF,MPI_COMM_WORLD
      */
      MPI_Comm_accept( port_name, MPI_INFO_NULL, 0, MPI_COMM_SELF,
&client );
      printf("Rank %i : After MPI_Comm_accept\n",myrank);
      again = 1;
   
      while (again) {
        MPI_Recv( buf, MAX_DATA, MPI_DOUBLE, MPI_ANY_SOURCE,
MPI_ANY_TAG, client, &status );
        switch (status.MPI_TAG) {
   
        case 2:
          printf("Rank %i : Doing something.....\n",myrank);
          it++;
          if ( it < ITERATIONS )
            break;
          else {
            printf("Rank %i : Max iterations reached, exiting...\n",myrank);
            /*go to case 0 (no break) */
          };
        case 0:
          printf("Rank %i : Exiting...\n",myrank);
          MPI_Comm_free( &client );
          MPI_Unpublish_name("SERVER", MPI_INFO_NULL, port_name);
          MPI_Close_port( port_name );
          MPI_Finalize();
          return 0;
        case 1:
          /* The client asked for a deconnection */
          MPI_Comm_disconnect( &client );
          again = 0;
          break;

        default: /* Unexpected message type */
          MPI_Abort( MPI_COMM_WORLD, 1 );
        }
      }
    }
  } else { /*I am a Slave */
   
    /* Since I'm not a Master I close the unused openned SERVER port */
    printf("Rank %i : Closing unused server port %s\n",myrank,port_name);
    MPI_Close_port( port_name );

    /* Connect to service SERVER, get the inter-communicator server*/
    /* Attention MPI_Comm_connect est un appel collectif :
     - Si lancement mpirun -c n -----> uniquement MPI_COMM_SELF
fonctionne
     - Si lancement client_server&client_server ----> MPI_COMM_WORLD
fonctionne */
   
    /* TIMEOUT is inefficient since MPI_Comm_Connect doesn't return if
we asked for
       a service that has been unpublished !*/
    MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
    i = 0;
    while ( i != TIMEOUT ) {
      printf("Rank %i : Trying to get the connection with SERVER %s
!\n",myrank, port_name_clt);
      if ( MPI_Comm_connect(port_name_clt, MPI_INFO_NULL, 0,
MPI_COMM_SELF, &server )
           == MPI_SUCCESS ) {
        printf("Rank %i : I get the connection with SERVER %s
!\n",myrank, port_name_clt);
        break;
      }
      /* sleep(1); */
      i++;
      printf("Rank %i : Waiting 1s for connecting to service SERVER %s
!\n",myrank,port_name_clt);
    }
    MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_ARE_FATAL);

    if ( i == TIMEOUT ) {
      printf("Waiting too long exiting !\n");
      MPI_Finalize();
      exit(-1);
    }

    for (i=0;i<10;i++) {
      printf("Performing Task %i\n",i);
      /*Ask the server to work (TAG==2)*/
      MPI_Send( buf, 0, MPI_DOUBLE, 0, 2, server ); /* etc */
    }

    /*Disconnect from the SERVER (TAG==1) */
    MPI_Send( buf, 0, MPI_DOUBLE, 0, 1, server );
    
    MPI_Comm_disconnect( &server );
 
    MPI_Finalize();
 
  }
 
}