I re-read the standard. I don't know what I read before, but I was
clearly mistaken on what MPI_THREAD_SEQUENTIAL meant. This is the
expected behavior. Clearly I cannot have a thread doing blocking calls.
Fairly useless, really, in terms of multi-threading.
Dave.
David Cronk wrote:
> I think this is a bug. I am attaching two programs to demonstrate the
> problem. To run the tests, compile serv.c with -lpthread and then run
> it in the background. Note that the call to sleep is there only to
> force some timing. After serv is running, compile and run work.c
>
> What seems to be happening is, a thread in serv posts an
> MPI_Comm_accept. Once work connects, the thread in serv creates another
> thread to look for other connecting processes, before trying to receive
> data from the already connected process. The newly created thread gets
> on the processor and posts an MPI_Comm_accept. The previous thread then
> gets back on the processor and tries to receive the data sent by work.
> However, this data never seems to arrive and we are in deadlock. As far
> as I can tell from reading the standard, this is incorrect behavior for
> MPI_THREAD_SEQUENTIAL.
>
> Unless, of course, I am missing something in the code that is causing
> the deadlock.
>
> Dave.
>
>
> ------------------------------------------------------------------------
>
> #include <stdlib.h>
> #include <stdio.h>
> #include <mpi.h>
> #include <signal.h>
> #include <pthread.h>
> #include "head.h"
>
> int nodelist[NUMNODES];
> char myport[MPI_MAX_PORT_NAME];
> pthread_cond_t cond;
> pthread_mutex_t lock;
> pthread_attr_t attr;
> int active;
>
> void *service_thread (void *arg)
> {
> int node, mintasks, i, provided;
> int target;
> MPI_Comm communicator;
> MPI_Status status;
> pthread_t thr_id;
> int pid = getpid();
>
> printf ("[%d] service_thread is ALIVE!\n", pid); fflush (stdout);
> MPI_Comm_accept (myport, MPI_INFO_NULL, 0, MPI_COMM_WORLD, &communicator);
> printf ("[%d] service_thread Accepted!\n", pid); fflush (stdout);
> pthread_create (&thr_id, &attr, service_thread, NULL);
> sleep(2);
> while (1) {
> printf ("[%d] service_thread about to receive!\n", pid); fflush (stdout);
> MPI_Recv (&node, 1, MPI_INT, 0, MPI_ANY_TAG, communicator, &status);
> printf ("[%d] service_thread received %d \n", pid, status.MPI_TAG); fflush (stdout);
> if (status.MPI_TAG == DONE_TAG) {
> break;
> }
> if (status.MPI_TAG == NEW_TAG) {
> active++;
> printf ("[%d] service_thread sending \n", pid); fflush (stdout);
> MPI_Send (&active, 1, MPI_INT, 0, MSG_TAG, communicator);
> printf ("[%d] service_thread sent \n", pid); fflush (stdout);
> }
> }
> MPI_Comm_disconnect (&communicator);
> }
>
>
> int main(int argc, char *argv[])
> //---------------------------------
> {
> int i, provided;
> pthread_t thr_id;
>
> MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
> if (provided < MPI_THREAD_SERIALIZED) {
> printf ("This MPI implementation does not support a high enough level of thread support. Aborting tasker!\n");
> exit (1);
> }
>
> MPI_Open_port (MPI_INFO_NULL, myport);
> MPI_Publish_name ("tasker", MPI_INFO_NULL, myport);
>
> pthread_attr_init (&attr);
> pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
>
> // Initialize lists
> for (i=0; i<NUMNODES; i++)
> nodelist[i] = 0;
>
> active = 1;
> pthread_create (&thr_id, &attr, service_thread, NULL);
>
> while (1);
>
> MPI_Finalize();
> exit(0);
> }
>
>
>
> ------------------------------------------------------------------------
>
> #include <stdlib.h>
> #include <stdio.h>
> #include <mpi.h>
> #include <sys/types.h>
> #include <signal.h>
> #include "head.h"
>
> int self;
> MPI_Comm taskcomm;
> MPI_Info sp_info;
>
>
>
> int main(int argc, char *argv[])
> //---------------------------------
> {
> int node;
> MPI_Status status;
> int errcode[1];
> char portname[MPI_MAX_PORT_NAME];
> char nodestr[6];
>
> MPI_Init(&argc, &argv);
> MPI_Comm_rank(MPI_COMM_WORLD, &self);
>
> printf ("about to lookup\n"); fflush (stdout);
> MPI_Lookup_name ("tasker", MPI_INFO_NULL, portname);
> printf ("portname = %s\n", portname); fflush (stdout);
> MPI_Comm_connect (portname, MPI_INFO_NULL, 0, MPI_COMM_SELF, &taskcomm);
> printf ("connected \n"); fflush (stdout);
>
> MPI_Info_create (&sp_info);
>
> printf ("sending to server \n"); fflush (stdout);
> MPI_Send (&node, 0, MPI_INT, 0, NEW_TAG, taskcomm);
> printf ("receiving from server \n"); fflush (stdout);
> MPI_Recv (&node, 1, MPI_INT, 0, MSG_TAG, taskcomm, &status);
>
> printf ("sending to server node = %d\n", node); fflush (stdout);
> MPI_Send (&node, 0, MPI_INT, 0, NEW_TAG, taskcomm);
> printf ("receiving from server \n"); fflush (stdout);
> MPI_Recv (&node, 1, MPI_INT, 0, MSG_TAG, taskcomm, &status);
> printf ("received from server node = %d\n", node); fflush (stdout);
>
> sprintf (nodestr, "n%d", node);
> MPI_Info_set (sp_info, "lam_spawn_sched_round_robin", nodestr);
>
> printf ("sending done_tag to server \n"); fflush (stdout);
> MPI_Send (&self, 0, MPI_INT, 0, DONE_TAG, taskcomm);
> printf ("disconnecting from server \n"); fflush (stdout);
> MPI_Comm_disconnect (&taskcomm);
> MPI_Finalize();
> exit(0);
> }
>
>
>
> ------------------------------------------------------------------------
>
>
> ------------------------------------------------------------------------
>
> _______________________________________________
> This list is archived at http://www.lam-mpi.org/MailArchives/lam/
--
Dr. David Cronk, Ph.D. phone: (865) 974-3735
Research Leader fax: (865) 974-8296
Innovative Computing Lab http://www.cs.utk.edu/~cronk
University of Tennessee, Knoxville
|