LAM/MPI logo

LAM/MPI General User's Mailing List Archives

  |   Home   |   Download   |   Documentation   |   FAQ   |   all just in this list

From: James Fang (cf8e_at_[hidden])
Date: 2004-03-13 01:10:01


Hi all

I am trying to make a manager/worker program that monitors processes
running on different nodes of a clusters. I am writing the program in c++
and I am having problem using the spawn features in MPI. I have tried to use

MPI_Comm_spawn(worker_program, MPI_ARGV_NULL, universe_size-1,
MPI_INFO_NULL, 0, MPI_COMM_SELF, &everyone, MPI_ERRCODEES_IGNORE);

but, I recieve an error at the last parameter indicating that I am
substituting a void* for int*

I then tried to use

MPI::Intracomm::Spawn(worker_program, MPI::ARGV_NULL, universe_size-1,
MPI::INFO_NULL, 3, MPI::COMM_SELF, &everyone);

but this returned three errors, MPI::ARGV_NULL, MPI::INFO_NULL,
Intracomm::Spawn undefined.

I then declared an int* and replaced MPI_ERRCODEES_IGNORE in

MPI_Comm_spawn(worker_program, MPI_ARGV_NULL, universe_size-1,
MPI_INFO_NULL, 0, MPI_COMM_SELF, &everyone, MPI_ERRCODEES_IGNORE);

this led to the successful calling of the worker program, but then I
recieved errors in the worker_program, which when compiled by itself has no
errors. I am now running out of ideas, so you have any idea of what is
going on, or if you know any example of how MPI_Comm_spawn works, it will
be greatly appreciated. The programs I wrote are as the following:

#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <string>
#include <iostream>
#include <sys/types.h>
#include <unistd.h>
#include <fstream>
#include <sys/times.h>
#include <time.h>
#include <sys/resource.h>
#include <mpi++.h>
#include <intracomm.h>
#include <intercomm.h>

using namespace std;

int main(int argc, char *argv[])
{
         int universe_size, *universe_sizep, flag;
         MPI_Comm everyone;
         char worker_program[100] = "pmm_worker.cpp";

         char name [MPI_MAX_PROCESSOR_NAME];
         int namelen;

         MPI::Init(argc,argv);

         int rank = MPI::COMM_WORLD.Get_rank();
     int world_size = MPI::COMM_WORLD.Get_size();
         MPI_Get_processor_name(name, &namelen);
         cout<<"size is "<<world_size<<" "<<name<<endl;

         //MPI::Init(argc, argv);
         //int world_size = MPI::COMM_WORLD.Get_size();

         if(world_size != 1)
                 cout<<"Top heavy with management"<<endl;

         MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_UNIVERSE_SIZE,
&universe_sizep, &flag);

         if(!flag)
         {
                 cout<<"This MPI does not support Universe_size."<<endl;
                 cout<<"How many processes total?"<<endl;
                 scanf("%d", &universe_size);

         }

         else
                 universe_size = *universe_sizep;

         if (universe_size == 1)
                 cout << "no room to start workers"<<endl;

     //spawn workers
         int *whatever;

   // MPI_Comm_spawn(worker_program, MPI_ARGV_NULL, universe_size-1,
MPI_INFO_NULL, 0, MPI_COMM_SELF, &everyone, whatever);

         Intracomm::Spawn(worker_program, MPI::ARGV_NULL, universe_size-1,
MPI::INFO_NULL, 3, MPI::COMM_SELF, &everyone);

         MPI::Finalize();

         return 0;

}

#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <string>
#include <iostream>
#include <sys/types.h>
#include <unistd.h>
#include <fstream>
#include <sys/times.h>
#include <time.h>
#include <sys/resource.h>
#include <mpi++.h>

using namespace std;
void InfoOutput(string infoContent, int space_count1, int space_count2, int
cpu_or_mem)
{
    string info1, info2;
    string infoShown;
    int space_count = 0;
    char check_for_space = ' ';
    if(cpu_or_mem == 0)
            infoShown = "The total amount of time (in jiffies) this process
is schdule in the user and the kernel mode is ";
    else
            infoShown = "The total amount of memory of this process and its
data (in pages) are ";
         for (int i = 0; i < infoContent.length(); i++)
    {

           if(infoContent[i] == ' ')
                 space_count++;
            //cout << i << " " << infoContent[i] <<endl;

            if(space_count == space_count1)
            {
                 for (int j = i+1; j < infoContent.length(); j++)
                 {
                         if(infoContent[j] != check_for_space)
                         {
                                 info1 += infoContent[j];
                         // cout << mem1;
                         }
                         else
                                 break;

                 }
            }
         if(space_count == space_count2)
            {
                 for (int j = i+1; j < infoContent.length(); j++)
                 {
                         if(infoContent[j] != check_for_space)
                         {
                                 info2 += infoContent[j];
                         // cout << mem1;
                         }
                         else
                                 break;

                 }
            }
    }
         cout<<infoShown<<info1<<", "<<info2<<endl;
}
//***********************************************************************************************************
int main(int argc, char* argv[])
{

// Get Command from parent

int size;
MPI_Comm parent;
MPI::Init(argc, argv);
MPI_Comm_get_parent(&parent);
if(parent == MPI_COMM_NULL)
cout<<"No Parent!";

//MPI_Comm_remote_size(parent, &size);
//if(size != 1)
//cout<<"Something is wrong with the parent";

int pid = getpid();
  cout << pid << " is the pid "<<endl;
  string cpid;
  char i;

  tms cpuTime;
  times(&cpuTime);
  double jiffy = sysconf(_SC_CLK_TCK);
  double page_size = sysconf(_SC_PAGE_SIZE);

  rusage cpuTimeFromG;
  int who = RUSAGE_SELF;
  getrusage(who , &cpuTimeFromG);

  cout<<"On this system, a jiffy is 1/"<<jiffy<<" second"<<endl;
  cout<<"the page size is "<<page_size<<" bytes"<<endl;

  cout<<"the user time is " << cpuTime.tms_utime <<endl;
  cout<<"the system time is " << cpuTime.tms_stime<<endl;
  cout<<"the approximation of processor time used by the program is "<<
clock()<<endl;
  cout<<"the user time from getrusage is " << cpuTimeFromG.ru_utime.tv_sec
<<endl;
  cout<<"the system time from getrusage is " <<
cpuTimeFromG.ru_stime.tv_sec<<endl;
  cout<<"the number of messages sent is " << cpuTimeFromG.ru_msgsnd <<endl;
  cout<<"the number of messages recieve is " << cpuTimeFromG.ru_msgrcv<<endl;

     while(pid > 0)
         {
                   i = (char)(pid % 10) + 48;
           pid = pid / 10;
                   cpid = i + cpid;
                 }
         string cpuInfo = "//proc//" + cpid +"//stat";
         string memInfo = "//proc//" + cpid +"//statm";

         ifstream OpenCpuInfoFile(cpuInfo.c_str());
         ifstream OpenMemInfoFile(memInfo.c_str());

    // ifstream object is used for getting input
    //char filename[50]; // string to hold file name
    char n1, n2;
    string cpuContent;
    string memContent;
    // open the file
   // fin.open(filename);

    // check for successful file open
    if( !OpenCpuInfoFile || !OpenMemInfoFile)
    {
      cout << "can't open file\n";
          exit(1);
    }
    // loop, reading two integers at a time
    while( !OpenCpuInfoFile.eof())
    {
            OpenCpuInfoFile.get(n1);
            cpuContent+=n1;
    }

    while( !OpenMemInfoFile.eof())
    {
            OpenMemInfoFile.get(n2);
            memContent+=n2;
         }

   // cout<<"mem string s length is " << memContent.length() <<endl;
    //cout<<"cpu string s length is " << cpuContent.length() <<endl;

    InfoOutput(memContent, 3, 4, 1); //send memContent and specify the
position of the space to obtain desired info
    InfoOutput(cpuContent, 13, 14, 0); //send cpuContent and specify the
position of the space to obtain desired info

   // fin.close();
    OpenMemInfoFile.close();
    OpenCpuInfoFile.close();

    MPI::Finalize();
    return 0;

}