Hi,
Please any one solve my problem.
I have a matrix multiplication program below which run on multiple machines(using linux).
The program works fine with small size matrices(till 900), but doesn't work with large
size matrices(>900).
I am allocating memory dynamically.
The error is that one of the process cannont initialize MPI_Init(); etc
.
Can anyone tell me the possible reason that causes this error?
/*********** MULTIPLY_MATRICES WITH MPI AND CANNON ALGORITHM *******/
#include <stdio.h>
#include <math.h>
#include "mpi.h"
#define N 1200 /* < 900 ok above 900 problem exists */
#define _mat(i,j) (mat[(i)*n+(j)
readmat(char *fname,int *mat,int n)
{
FILE *fp;
int i,j;
if((fp=fopen(fname,"r"))==NULL)
return(-1);
for(i=0;i<n;i++)
for(j=0;j<n;j++)
if(fscanf(fp,"%d",&_mat(i,j))==EOF)
{
fclose(fp);
return(-1);
}
fclose(fp);
return(0);
}
writemat(char *fname,int *mat,int n)
{
FILE *fp;
int i,j;
if((fp=fopen(fname,"w"))==NULL)
return(-1);
for(i=0;i<n; fprintf(fp,"\n"),i++)
for(j=0;j<n;j++)
fprintf(fp,"%d\t",_mat(i,j));
fclose(fp);
return(0);
}
int main(int argc,char *argv[])
{
int myrank_old,myrank_new,process_rank,numprocs,situation=0,namelen,source,count;
double t1,t2;
int M,S,start_block_x,start_block_y;
int i,j,k,l,coord_nbr_proc;
float sum;
int *package_A,*package_B,*C_blocks,*temp_A,*temp_B,*temp_C;
char processor_name[MPI_MAX_PROCESSOR_NAME];
MPI_Status stat;
MPI_Comm comm_new;
int num_dims=2,dims[2],periods[2],coords[2];
int rank_source,rank_dest;
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD,&myrank_old);
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
M=(int)sqrt((double)numprocs);
S=N/M;
dims[0]=dims[1]=M;
periods[0]=periods[1]=1;
MPI_Cart_create(MPI_COMM_WORLD,num_dims,dims,periods,0,&comm_new);
MPI_Comm_rank(comm_new,&myrank_new);
MPI_Get_processor_name(processor_name,&namelen);
fprintf(stdout,"Process %d of %d on %s\n",myrank_new, numprocs, processor_name);
package_A=(int *)malloc(S*S*sizeof(int));
package_B=(int *)malloc(S*S*sizeof(int));
C_blocks=(int *)malloc(S*S*sizeof(int));
temp_A=(int *)malloc(S*S*sizeof(int));
temp_B=(int *)malloc(S*S*sizeof(int));
temp_C=(int *)malloc(S*S*sizeof(int));
for(i=0; i<S*S; i++)
temp_C[i]=0;
if(myrank_new==0)
{
int A[N][N],B[N][N],C[N][N];
if(readmat("A_file",(int *)A,N)<0)
situation=1;
if(readmat("B_file",(int *)B,N)<0)
situation=1;
MPI_Bcast(&situation,1,MPI_INT,0,comm_new);
if(situation==1)
{
printf("File A_file or B_file has problem.\n");
MPI_Finalize();
return(0);
}
t1=MPI_Wtime();
for(process_rank=1;process_rank<numprocs;process_rank++)
{
start_block_x=(process_rank/M);
start_block_y=(process_rank%M);
count=0;
for(i=start_block_x*S;i<(start_block_x+1)*S;i++)
{
for(j=start_block_y*S;j<(start_block_y+1)*S;j++)
{
package_A[count]=A[i][j];
package_B[count]=B[i][j];
count++;
}
}
MPI_Send(package_A,S*S,MPI_INT,process_rank,0,comm_new);
MPI_Send(package_B,S*S,MPI_INT,process_rank,0,comm_new);
}
start_block_x=(myrank_new/M);
start_block_y=(myrank_new%M);
count=0;
for(i=start_block_x*S;i<(start_block_x+1)*S;i++)
{
for(j=start_block_y*S;j<(start_block_y+1)*S;j++)
{
temp_A[count]=A[i][j];
temp_B[count]=B[i][j];
count++;
}
}
for(coord_nbr_proc=0;coord_nbr_proc<M;coord_nbr_proc++)
{
for(i=0;i<S;i++)
{
for(j=0;j<S;j++)
{
for(k=0;k<S;k++)
temp_C[i*S+j]+=temp_A[i*S+k]*temp_B[k*S+j];
}
}
MPI_Cart_shift(comm_new,1,-1,&rank_source,&rank_dest);
MPI_Sendrecv_replace(temp_A,S*S,MPI_INT,rank_dest,0,rank_source,0,comm_new,&stat);
MPI_Cart_shift(comm_new,0,-1,&rank_source,&rank_dest);
MPI_Sendrecv_replace(temp_B,S*S,MPI_INT,rank_dest,0,rank_source,0,comm_new,&stat);
}
for(i=0;i<S;i++)
{
for(j=0;j<S;j++)
C[i][j]=temp_C[i*S+j];
}
for(i=1;i<numprocs;i++)
{
MPI_Recv(C_blocks,S*S,MPI_INT,MPI_ANY_SOURCE,MPI_ANY_TAG,comm_new,&stat);
count=0;
l=0;
source=stat.MPI_SOURCE;
for(j=0;j<S;j++)
{
for(k=0;k<S;k++)
{
C[(source/M)*S+j][(source%M)*S+k]=C_blocks[l*S+count];
count++;
}
count=0;
l++;
}
}
t2= MPI_Wtime();
printf("Execution_time=%lf\n",(t2-t1));
writemat("C_Cannon",(int *)C,N);
}
if(myrank_new!=0)
{
MPI_Cart_coords(comm_new,myrank_new,2,coords);
MPI_Bcast(&situation,1,MPI_INT,0,comm_new);
if(situation!=0)
{
MPI_Finalize();
return(0);
}
MPI_Recv(temp_A,S*S,MPI_INT,0,0,comm_new,&stat);
MPI_Recv(temp_B,S*S,MPI_INT,0,0,comm_new,&stat);
MPI_Cart_shift(comm_new,1,-coords[0],&rank_source,&rank_dest);
MPI_Sendrecv_replace(temp_A,S*S,MPI_INT,rank_dest,0,rank_source,0,comm_new,&stat);
MPI_Cart_shift(comm_new,0,-coords[1],&rank_source,&rank_dest);
MPI_Sendrecv_replace(temp_B,S*S,MPI_INT,rank_dest,0,rank_source,0,comm_new,&stat);
for(coord_nbr_proc=0;coord_nbr_proc<M;coord_nbr_proc++)
{
for(i=0;i<S;i++)
{
for(j=0;j<S;j++)
{
for(k=0;k<S;k++)
temp_C[i*S+j]+=temp_A[i*S+k]*temp_B[k*S+j];
}
}
MPI_Cart_shift(comm_new,1,-1,&rank_source,&rank_dest);
MPI_Sendrecv_replace(temp_A,S*S,MPI_INT,rank_dest,0,rank_source,0,comm_new,&stat);
MPI_Cart_shift(comm_new,0,-1,&rank_source,&rank_dest);
MPI_Sendrecv_replace(temp_B,S*S,MPI_INT,rank_dest,0,rank_source,0,comm_new,&stat);
}
MPI_Send(temp_C,S*S,MPI_INT,0,0,comm_new);
}
MPI_Finalize();
return(0);
}
---------------------------------
Heres a new way to find what you're looking for - Yahoo! Answers
|