LAM/MPI logo

LAM/MPI General User's Mailing List Archives

  |   Home   |   Download   |   Documentation   |   FAQ   |   all just in this list

From: abhik.sarkar_at_[hidden]
Date: 2003-07-21 00:12:34


     Hi ,
     
        I had posted a message earlier and glad to get such a prompt
     reply.Special thanks to Nick.I am using Lam 6.5.6 on a beowulf
     cluster.My basic problem is as followed:
        I have 2 processes running on a single node with every process
     having a thread.The purpose of the thread is such that i can perform
     the computational task along with the communication i.e. one of the
     threads is for communication and the other performs the
     computation.The communication thread is waiting on two conditions.One
     is that it is waiting that the MPI_Iprobe tell if any data is waiting
     to be accepted on the channel and the second is that the parent thread
     or the computation thread promts it to send some data to the other
     process.
        I have taken care that all the processes make the MPI calls from a
     single thread,either the child or the parent but in both the cases i
     get the same error which is generated by the MPI_Iprobe.....Plz reply
     soon.
     
     The following is the code
     
     #include<stdio.h>
     #include<pthread.h>
     #include<mpi.h>
     #include<curses.h>
     #include<string.h>
     #include<unistd.h>
     #include<stdlib.h>
     #include<sys/types.h>
     #include<sys/msg.h>
     #include<sys/ipc.h>
     char c;
     int rank;
     struct procnum{
                int pn;
                int *argc1;
                char ***argv1;
                };
     struct msgbuf{
                long mtype;
                int m;
                };
     
     void * thrdprobe(void *t)
     {
        int len;
        char *s;
        int flg=2;
        MPI_Status stat;
        int ret;
        key_t key1;
        int msqid;
        struct msgbuf msg1;
     
        s=(char *)malloc(10);
        MPI_Init(((struct procnum *)t)->argc1,((struct procnum*)t)->argv1);
        MPI_Comm_rank(MPI_COMM_WORLD,&rank);
        key1=1234;
        msqid=msgget(key1,0666|IPC_CREAT);
        msg1.mtype=19;
        msgsnd(msqid,&msg1,sizeof(msg1),0);
        printf("my rank is %d\n",rank);
     
        while(c!='a' && flg!=1)
        {
                MPI_Iprobe(MPI_ANY_SOURCE,17,MPI_COMM_WORLD,&flg,&stat);
        }
        if(c=='a')
        {
                s="abhik";
                len=strlen(s);
                MPI_Send(s,len+1,MPI_CHAR,1,17,MPI_COMM_WORLD);
                printf("sent\n");
        }
     
        if(flg==1)
        {
                printf("the recv thread has reached\n");
     
     MPI_Recv(s,10,MPI_CHAR,MPI_ANY_SOURCE,MPI_ANY_TAG,MPI_COMM_WORLD,&stat
     );
                printf("my name is %s\n",s);
        }
     
        MPI_Barrier(MPI_COMM_WORLD);
        printf("barrier passed\n");
        MPI_Finalize();
        return(0);
     }
     
     int main(int argc,char *argv[])
     {
        pthread_t thr_id;
        struct procnum s;
        void *thrd_stat;
        key_t key1;
        int msqid;
        struct msgbuf msg1;
     
     
        c='b';
        s.pn=1;
        s.argc1=&argc;
        s.argv1=&argv;
        key1=1234;
        if(!pthread_create(&thr_id,NULL,thrdprobe,(void *)&s))
                printf("created a thread\n");
        else
                perror("pthread_create\n");
        msqid=msgget(key1,0666|IPC_CREAT);
        msgrcv(msqid,&msg1,sizeof(msg1),19,0);
        if(rank==0)
        {
                printf("ask for data by typing a\n");
                scanf("%c",c);
        }
        pthread_join(thr_id,&thrd_stat);
        printf("thread Died\n");
        return(0);
     }
     
     ********************************************************************
     
     and the following is the error that i get on my console with errors
     
     created a thread
     my rank is 0
     created a thread
     my rank is 1
     ask for data by typing a
     a
     Rank (0, MPI_COMM_WORLD): Call stack within LAM:
     Rank (0, MPI_COMM_WORLD): - MPI_Iprobe()
     Rank (0, MPI_COMM_WORLD): - main()
     MPI process rank 0 (n0, p2084) caught a SIGSEGV in MPI_Iprobe.
     ----------------------------------------------------------------------
     One of the processes started by mpirun has exited with a nonzero exit
     code. This typically indicates that the process finished in error.
     If your process did not finish in error, be sure to include a "return
     0" or "exit(0)" in your C code before exiting the application.
     PID 2080 failed on node n0 with exit status 1.
     ----------------------------------------------------------------------