hello:
i am trying to implement the following sequence of events, as suggested by
jeff squyres:
****************************************************************************
***************************************
- your matlab script launches
- it calls MPI_Init
- check for a published name
- if the published name does not exist
- spawn a master (i.e., a new, independant process)
- the master publishes a name
- if the published name does exist
- MPI_Comm_connect to the master
- the master spawns a bunch of slaves to do the work
- the matlab script sends a bunch of work to the master
- the master farms it out to all the slaves
- the slaves do all the work and eventually send the result(s) to the
master
- the master sends the result(s) to the matlab script
- the matlab script disconnects from the master
- the matlab script finishes
---> note that the master and all of the slaves are still running
The next time that the matlab script starts up, it sees that the
master is running and just connects to it (rather than spawning a new
one). Hence, all of your slaves are durable and keep their data (no
need to re-scatter the same data every time).
Hence, your architecture is that your matlab script acts as a command
input to the persistent master and its slaves. You attach/detatch to
the master in order to send it a command and get the results.
You'll need some kind of "please die now" command, too, so that when
all processing is done, the matlab script can tell the master to kill
all of its slaves and die (and unpublish the name).
****************************************************************************
***************************************
so far, i've only managed to get the following working:
1. matlab script checks for a published name
2. it spawns a master (using MPI_Comm_spawn)
3. In the spawned code, the master publishes a name (lobelia)
questions:
1. i am using mpi_comm_spawn to spawn this independent master process, but
mpi_comm_spawn doesn't seem to like it when i specify a maxproc < 2,
which is why i have
MPI_Comm_spawn("/home/Galadriel/matlab/anne/uplink/update_master3",
MPI_ARGV_NULL,
2, MPI_INFO_NULL, 0, MPI_COMM_SELF,
&client,MPI_ERRCODES_IGNORE);
in my code, where i spawn off 2 processes. 'lobelia' is the one i treat as
master out of the two spawned. is there another way of spawning only one
process?
2. because my update_master3 code exits upon completion, when i run
MEX_master.c a second time, with the intention of just connecting to
'lobelia' instead of having to spawn it again, MATLAB script does see the
published name 'lobelia', but because i exited update_master3.c upon
completion of the previous invokation, update_master3.c is no longer running
on 'lobelia'. how can i go about keeping 'lobelia' alive so that in
subsequent calls to MEX_master.c, it can still connect to 'lobelia'. do i
just not exit the function update_master3.c? does that meani should put in
a loop somewhere? not clear on how to implement this!!! please help!
3. how do i have MEX_master.c tell 'lobelia' to PLEASE DIE NOW?
my code for MEX_master.c and update_master3.c are pasted in this message
below.
Thanks,
Anne
----------------------------------------------------------------------------
----------------------------------------------------------------------------
----------------------------
*** my code for the matlab MEX script (MEX_master.c) is as follows:
#include <stdio.h>
#include <malloc.h>
#include <math.h>
#include "mpi.h"
/* #include "mex.h"
*/
int master=0; /*specifies that node 0: Galadriel is the master
node */
int rank,size; /*rank within mergedcomm, size of
mergedcomm*/
FILE *fyle_out;
char processor_name[256];
int len;
static int bummer = 0;
char port_name[MPI_MAX_PORT_NAME];
MPI_Status status;
int main(int argc, char **argv)
{
/*** INPUTS ***/
double input;
/************** Variables for MPI set-up *******************/
MPI_Comm client; /*INTERcommunicator created
by MPI_comm_spawn to master process*/
MPI_Comm mergedcomm; /*INTRAcommunicator created by
merging remote/local of client */
int flag, *universe_sizep ;
/****************************************************************/
if (bummer == 0)
MPI_Init(&argc,&argv);
bummer++;
MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
MPI_Get_processor_name(processor_name,&len);
fyle_out = fopen (processor_name, "w");
fprintf(fyle_out,"I'm connecting inside %s\n",processor_name);
if (MPI_Lookup_name("lobelia",MPI_INFO_NULL,
port_name)!=MPI_SUCCESS)
{
printf("i'm spawning...\n");
/* 1. spawn independent master process */
MPI_Comm_spawn("/home/Galadriel/matlab/anne/uplink/update_master3",
MPI_ARGV_NULL,
2, MPI_INFO_NULL, 0, MPI_COMM_SELF,
&client,MPI_ERRCODES_IGNORE);
MPI_Comm_set_name(client,"client");
}
else
{
printf("I'm connecting immediately..\n");
MPI_Comm_connect(port_name,MPI_INFO_NULL,0,MPI_COMM_SELF,&client);
}
input = 88;
printf("processor name = %s\n\n",processor_name);
/**** Print out stats ****/
MPI_Comm_size(client, &size); /*size of local group of client:
spawners*/
fprintf(fyle_out,"Comm_spawn intercommunicator: client\nsize of
local group (# spawners) = %d\n",size);
MPI_Comm_remote_size(client, &size); /*size of remote group of
client: spawned*/
MPI_Comm_rank(client, &rank);
fprintf(fyle_out,"size of remote group (# spawned)= %d\nrank of %s =
%d \n\n",size,processor_name,rank);
MPI_Barrier(MPI_COMM_WORLD);
MPI_Intercomm_merge(client,0,&mergedcomm);
MPI_Comm_size(mergedcomm,&size);
MPI_Comm_rank(mergedcomm,&rank);
fprintf(fyle_out,"\nIntracommunicator from client: mergedcomm\nrank
of %s = %d\nsize = %d\n",processor_name,rank,size);
MPI_Send(&input,1,MPI_DOUBLE,1,1,mergedcomm);
} /* MAIN: update_master2 */
----------------------------------------------------------------------------
----------------------------------------------------------------------------
--------------------------------------
******* my code for the master that get spawned from the MATLAB script is as
follows (stored in update_master3.c):
#include <stdio.h>
#include <malloc.h>
#include <math.h>
#include "mpi.h"
int master=0; /*specifies that node 0: Galadriel is the master
node */
int rank,size; /*rank within mergedcomm, size of
mergedcomm*/
FILE *fyle_out;
char processor_name[256];
int len;
static bummer = 0;
/************** Variables for MPI set-up *******************/
MPI_Comm server; /*INTERcommunicator created by
MPI_Comm_accept*/
MPI_Comm spawncomm; /*INTERcommunicator created by
MPI_Comm_spawn*/
MPI_Comm mergedcomm; /*INTRAcommunicator created by merging
remote/local of spawncomm*/
char port_name[MPI_MAX_PORT_NAME];
MPI_Status status;
int main (int argc, char **argv)
{
/*** INPUTS ***/
double *input;
int flag, *universe_sizep ;
int i;
/****************************************************************/
MPI_Init(&argc,&argv);
MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
MPI_Get_processor_name(processor_name,&len);
fyle_out = fopen (processor_name, "w");
fprintf(fyle_out,"I'm connecting inside %s\n",processor_name);
if
(MPI_Lookup_name(processor_name,MPI_INFO_NULL,port_name)!=MPI_SUCCESS)
{
/* 1. master process spawned */
fprintf(fyle_out,"I'm being spawned...\n");
MPI_Comm_get_parent(&server);
MPI_Comm_set_name(server,"server");
/* 2. connection to master process accepted */
fprintf(fyle_out,"I'm publishing my name...\n");
MPI_Open_port(MPI_INFO_NULL,port_name);
MPI_Publish_name(processor_name,MPI_INFO_NULL,port_name);
fprintf(fyle_out,"port_name of %s =
%s...\n\n",processor_name,port_name);
}
else
{
fprintf(fyle_out,"Connecting...\n");
MPI_Comm_accept(port_name,MPI_INFO_NULL,0,MPI_COMM_SELF,
&server);
MPI_Comm_set_name(server,"server");
}
/**** Print out stats ****/
MPI_Comm_size(server, &size); /*size of local group of spawncomm:
spawned*/
fprintf(fyle_out, "Comm_get_parent/Comm_accept intercommunicator:
server \nsize of local group (# spaw$
MPI_Comm_remote_size(server, &size); /*size of remote group of
spawncomm: spawners*/
MPI_Comm_rank(server, &rank);
fprintf(fyle_out,"size of remote group (# spawners) = %d\nrank of %s
= %d \n",size,processor_name,rank$
MPI_Barrier(MPI_COMM_WORLD);
MPI_Intercomm_merge(server,1,&mergedcomm);
MPI_Comm_size(mergedcomm,&size);
MPI_Comm_rank(mergedcomm,&rank);
fprintf(fyle_out,"\nIntracommunicator from server: mergedcomm\nrank
of %s = %d\nsize = %d\n",processor$
MPI_Recv(input,1,MPI_DOUBLE,MPI_ANY_SOURCE,MPI_ANY_TAG,
mergedcomm,&status);
fprintf(fyle_out,"input = %4.3e\n", *input);
fflush(NULL);
bummer++;
fclose(fyle_out);
if (bummer == 3)
{
MPI_Unpublish_name(processor_name,MPI_INFO_NULL,port_name);
MPI_Finalize();
}
} /* MAIN: update_master2 */
___________________________________________________
Anne Pak, L1-50
Building 153 2G8
1111 Lockheed Martin Way
Sunnyvale, CA 94089
(408) 742-4369 (W)
(408) 742-4697 (F)
|