Hi,
Please any one solve my problem.
I have a matrix multiplication program below which  run on multiple machines(using linux).
The program works fine with small size matrices(till 900), but doesn't work with large
size matrices(>900).
I am allocating memory dynamically.
The error is as follows
It seems that [at least] one of the processes that was started with mpirun did not invoke MPI_INIT before quitting (it is possible that more than one process  did not invoke MPI_INIT--- mpirun was only notified of the first one which was on node n0).
Mpirun can only be sed with MPI programs .
Mpirun failed with exit staus 252
 
Can anyone tell me the possible reason that causes this error?
/***********  MULTIPLY_MATRICES WITH MPI AND
 CANNON ALGORITHM    *******/
 
 
#include <stdio.h>     
#include <math.h>
#include "mpi.h"       
 
#define N     1200      /* < 900 ok above 900 problem exists */
#define _mat(i,j) (mat[(i)*n+(j)
 
 
 readmat(char *fname,int *mat,int n)
{
    FILE *fp;
    int i,j;
   
 if((fp=fopen(fname,"r"))==NULL)
      return(-1);
    for(i=0;i<n;i++)
       for(j=0;j<n;j++)
          if(fscanf(fp,"%d",&_mat(i,j))==EOF)
          {
             fclose(fp);
             return(-1);
          }
    fclose(fp);
    return(0);
}         
 
 
 
writemat(char *fname,int *mat,int n)
{
    FILE *fp;
    int i,j;
    if((fp=fopen(fname,"w"))==NULL)
        return(-1);
    for(i=0;i<n; fprintf(fp,"\n"),i++)
        for(j=0;j<n;j++)
           fprintf(fp,"%d\t",_mat(i,j));
    fclose(fp);
    return(0);
}  
 
 
            
 
int main(int argc,char *argv[])
{
   
   int
 myrank_old,myrank_new,process_rank,numprocs,situation=0,namelen,source,count;
   double t1,t2;
   
   int M,S,start_block_x,start_block_y;
   int i,j,k,l,coord_nbr_proc;
   float sum;
   int *package_A,*package_B,*C_blocks,*temp_A,*temp_B,*temp_C;
   char processor_name[MPI_MAX_PROCESSOR_NAME]; 
   MPI_Status stat;
   MPI_Comm comm_new;
   int
 num_dims=2,dims[2],periods[2],coords[2];
   int rank_source,rank_dest;
   
   MPI_Init(&argc,&argv);
   MPI_Comm_rank(MPI_COMM_WORLD,&myrank_old);
   MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
 
   M=(int)sqrt((double)numprocs);
   S=N/M;
   
   dims[0]=dims[1]=M;
   periods[0]=periods[1]=1;
  
 MPI_Cart_create(MPI_COMM_WORLD,num_dims,dims,periods,0,&comm_new);
   MPI_Comm_rank(comm_new,&myrank_new);
   
   MPI_Get_processor_name(processor_name,&namelen);
   fprintf(stdout,"Process %d of %d on %s\n",myrank_new, numprocs, processor_name);
   
   package_A=(int *)malloc(S*S*sizeof(int));
   package_B=(int *)malloc(S*S*sizeof(int));
   C_blocks=(int *)malloc(S*S*sizeof(int));
 
   temp_A=(int
 *)malloc(S*S*sizeof(int));
   temp_B=(int *)malloc(S*S*sizeof(int));
   temp_C=(int *)malloc(S*S*sizeof(int));
       
   
   for(i=0; i<S*S; i++)
        temp_C[i]=0;
 
   if(myrank_new==0)
   {
       
       int A[N][N],B[N][N],C[N][N];
       
 
       if(readmat("A_file",(int *)A,N)<0)
            situation=1;
          
       if(readmat("B_file",(int *)B,N)<0)
            situation=1;
            
       MPI_Bcast(&situation,1,MPI_INT,0,comm_new);
       if(situation==1)
       {   
            printf("File A_file or B_file has problem.\n");
              MPI_Finalize();
            return(0);  
       }
      
 t1=MPI_Wtime();
       
       for(process_rank=1;process_rank<numprocs;process_rank++)
       {
              start_block_x=(process_rank/M);
              start_block_y=(process_rank%M);
            
            count=0;
            for(i=start_block_x*S;i<(start_block_x+1)*S;i++)
            {
                for(j=start_block_y*S;j<(start_block_y+1)*S;j++)
                {
                     package_A[count]=A[i][j];
                     package_B[count]=B[i][j];
                     count++;
                }
             }
             
             MPI_Send(package_A,S*S,MPI_INT,process_rank,0,comm_new);
             MPI_Send(package_B,S*S,MPI_INT,process_rank,0,comm_new);
        }
        
        start_block_x=(myrank_new/M);
        start_block_y=(myrank_new%M);
 
        count=0;
        
        for(i=start_block_x*S;i<(start_block_x+1)*S;i++)
        {
              for(j=start_block_y*S;j<(start_block_y+1)*S;j++)
              {
                    temp_A[count]=A[i][j];
                    temp_B[count]=B[i][j];
                
                    count++;
              }
        }
                    
        for(coord_nbr_proc=0;coord_nbr_proc<M;coord_nbr_proc++)
        {
           
  
            for(i=0;i<S;i++)
            {  
                 for(j=0;j<S;j++)
             {         
                        for(k=0;k<S;k++)
                       temp_C[i*S+j]+=temp_A[i*S+k]*temp_B[k*S+j];
                 }       
                   
              }
              MPI_Cart_shift(comm_new,1,-1,&rank_source,&rank_dest);
              MPI_Sendrecv_replace(temp_A,S*S,MPI_INT,rank_dest,0,rank_source,0,comm_new,&stat);
              
              MPI_Cart_shift(comm_new,0,-1,&rank_source,&rank_dest);
             
 MPI_Sendrecv_replace(temp_B,S*S,MPI_INT,rank_dest,0,rank_source,0,comm_new,&stat);
 
          } 
          for(i=0;i<S;i++)
          {
             for(j=0;j<S;j++)
                C[i][j]=temp_C[i*S+j];
          }
        
        
        for(i=1;i<numprocs;i++)
          {
                    
                  
               MPI_Recv(C_blocks,S*S,MPI_INT,MPI_ANY_SOURCE,MPI_ANY_TAG,comm_new,&stat);
                   count=0;
               l=0;
               source=stat.MPI_SOURCE;
                   for(j=0;j<S;j++)
              
 {
                       for(k=0;k<S;k++)
                       {
                          C[(source/M)*S+j][(source%M)*S+k]=C_blocks[l*S+count];
                          count++;
                   } 
                   count=0; 
                   l++; 
               }      
                    
           }
         
         t2= MPI_Wtime(); 
           printf("Execution_time=%lf\n",(t2-t1));   
         writemat("C_Cannon",(int *)C,N); 
     }          
 
 
     if(myrank_new!=0)
     {
          
          MPI_Cart_coords(comm_new,myrank_new,2,coords);
          MPI_Bcast(&situation,1,MPI_INT,0,comm_new);    
          if(situation!=0)
          {
             MPI_Finalize();
             return(0);
          }    
          MPI_Recv(temp_A,S*S,MPI_INT,0,0,comm_new,&stat);
          MPI_Recv(temp_B,S*S,MPI_INT,0,0,comm_new,&stat);
 
         
 MPI_Cart_shift(comm_new,1,-coords[0],&rank_source,&rank_dest);
          MPI_Sendrecv_replace(temp_A,S*S,MPI_INT,rank_dest,0,rank_source,0,comm_new,&stat);
 
          MPI_Cart_shift(comm_new,0,-coords[1],&rank_source,&rank_dest);
          MPI_Sendrecv_replace(temp_B,S*S,MPI_INT,rank_dest,0,rank_source,0,comm_new,&stat);
 
          for(coord_nbr_proc=0;coord_nbr_proc<M;coord_nbr_proc++)
          {
             
              for(i=0;i<S;i++)
              {  
                   for(j=0;j<S;j++)
                   {
                      
                        for(k=0;k<S;k++)
                             temp_C[i*S+j]+=temp_A[i*S+k]*temp_B[k*S+j];
                        
                     }
              }
              MPI_Cart_shift(comm_new,1,-1,&rank_source,&rank_dest);
              MPI_Sendrecv_replace(temp_A,S*S,MPI_INT,rank_dest,0,rank_source,0,comm_new,&stat);
 
              MPI_Cart_shift(comm_new,0,-1,&rank_source,&rank_dest);
              MPI_Sendrecv_replace(temp_B,S*S,MPI_INT,rank_dest,0,rank_source,0,comm_new,&stat);
 
          } 
        MPI_Send(temp_C,S*S,MPI_INT,0,0,comm_new); 
  }    
  MPI_Finalize();
  return(0);             
}
 


Here’s a new way to find what you're looking for - Yahoo! Answers