next up previous
Next: Strassen algorithm: a skeleton Up: annexe1 Previous: General Framework

A new code for the matrix product

We compute the product C of two squared matrices A, B:




according to the physical data representation for the file containing matrix A:




and for the file containning matrix B:




Example:

Matrix A:
 00001 00002 00003 00004 00005 00006 00007 00008
 00009 00010 00011 00012 00013 00014 00015 00016
 00017 00018 00019 00020 00021 00022 00023 00024
 00025 00026 00027 00028 00029 00030 00031 00032
 00033 00034 00035 00036 00037 00038 00039 00040
 00041 00042 00043 00044 00045 00046 00047 00048
 00049 00050 00051 00052 00053 00054 00055 00056
 00057 00058 00059 00060 00061 00062 00063 00064
Matrix B:
 00001 00009 00017 00025 00033 00041 00049 00057
 00002 00010 00018 00026 00034 00042 00050 00058
 00003 00011 00019 00027 00035 00043 00051 00059
 00004 00012 00020 00028 00036 00044 00052 00060
 00005 00013 00021 00029 00037 00045 00053 00061
 00006 00014 00022 00030 00038 00046 00054 00062
 00007 00015 00023 00031 00039 00047 00055 00063
 00008 00016 00024 00032 00040 00048 00056 00064
Matrix C (product):
 00204 00492 00780 01068 01356 01644 01932 02220
 00492 01292 02092 02892 03692 04492 05292 06092
 00780 02092 03404 04716 06028 07340 08652 09964
 01068 02892 04716 06540 08364 10188 12012 13836
 01356 03692 06028 08364 10700 13036 15372 17708
 01644 04492 07340 10188 13036 15884 18732 21580
 01932 05292 08652 12012 15372 18732 22092 25452
 02220 06092 09964 13836 17708 21580 25452 29324

The proposed code is as follows:

/*
/usr/local/mpi/bin/mpicc -O out-of-core-matrix.c
*/

#include "sys/types.h"
#include "stdlib.h"
#include "stdio.h"
#include "string.h"
#include <argz.h>
#include <stdlib.h>
#include <limits.h>
#include <string.h>
#include <math.h>
#include <sys/time.h>
#include <sys/stat.h>
#include <unistd.h>
#include "mpi.h"

#define MPI_OFFSET_ZERO 0

#define P          4              /* number of tasks                 */
#define Nb         8              /* slice size. Nb*Nb = matrix size */
#define Ng         ( Nb * P )     /* global size of matrix           */

static char fileA[] = "fileA";
static char fileB[] = "fileB";
static char fileC[] = "fileC";

void
fill_file_A(char *FNAME, int N)
{
  int             i, data;
  FILE            *f;

  /*  sprintf(s, FNAME, 0);*/
  if ((f = (FILE *) fopen(FNAME, "w+")) == NULL) {
    perror("io2: error in opening auxiliary files");
    exit(1);
  }
  for (i = 0; i < N * N; i++) {
    data = i + 1;
    if (fwrite(&data, sizeof(int), 1, f) != 1) {
      perror("io3: can't write in an auxiliary file");
      fclose(f);
      exit(1);
    }
  }
  fclose(f);
}

void
fill_file_B(char *FNAME, int N)
{
  int             i, j, data;
  FILE            *f;

  /*  sprintf(s, FNAME, 0);*/
  if ((f = (FILE *) fopen(FNAME, "w+")) == NULL) {
    perror("io2: error in opening auxiliary files");
    exit(1);
  }
  for (i = 0; i < N; i++) {
    for(j=0; j < N; j++) {
    data = (i+1) + N*j;
    if (fwrite(&data, sizeof(int), 1, f) != 1) {
      perror("io3: can't write in an auxiliary file");
      fclose(f);
      exit(1);
    }
    }
  }
  fclose(f);
}

/*
 * Print screen fname file (which much contain integers)
 */
void 
aff(char *fname, int N)
{
  FILE           *fp;
  int             i,count=0;

  fp = fopen(fname, "r");
  fread(&i, 4, 1, fp);
  while (0 == feof(fp)) {
    printf("%6.5d ", i);
    if(((count+1) % N) == 0) {
      count = 0; printf("\n");
    } else count++;
    fread(&i, 4, 1, fp);
  }
  printf("\n"); fclose(fp);
}


int
main( int argc, char *argv[] )
{
  int i, j, k;
  int iv;
  int myrank, commsize;
  int extent;
  int colext;
  int slicelen;
  int sliceext;
  int length[ 3 ];
  MPI_Aint disp[ 3 ];
  MPI_Datatype type[ 3 ];
  MPI_Datatype ftypeA;
  MPI_Datatype ftypeB;
  MPI_Datatype ftypeC;
  MPI_Datatype slice_typeB;
  MPI_Datatype block_typeC;
  MPI_Datatype slice_typeC;
  int mode;
  MPI_File fh_A, fh_B, fh_C;
  MPI_Offset offsetA;
  MPI_Offset offsetB;
  MPI_Offset offsetC;
  MPI_Status status;
  int val_A[ Nb ][ Ng ], val_B[ Ng ][ Nb ], val_C[ Nb ][ Nb ];

  /* initialize MPI */
  MPI_Init( &argc, &argv );
  MPI_Comm_rank( MPI_COMM_WORLD, &myrank );
  MPI_Comm_size( MPI_COMM_WORLD, &commsize );

  /* We fille and we print the file contain */    
  if(myrank == 0) {
    fill_file_A(fileA,Nb);
    fill_file_B(fileB,Nb);
    printf("File A:\n");
    aff(fileA,Nb);
    printf("File B:\n");
    aff(fileB,Nb);
  }
  /*  exit(0);*/

  /*****************************************/

  MPI_Type_extent( MPI_INT, &extent );
  colext = Nb * extent;
  slicelen = Ng * Nb;
  sliceext = slicelen * extent;

  /* create filetype for matrix A */
  length[ 0 ] = 1;
  length[ 1 ] = slicelen;
  length[ 2 ] = 1;
  disp[ 0 ] = 0;
  disp[ 1 ] = sliceext * myrank;
  disp[ 2 ] = sliceext * P;
  type[ 0 ] = MPI_LB;
  type[ 1 ] = MPI_INT;
  type[ 2 ] = MPI_UB;
  MPI_Type_struct( 3, length, disp, type, &ftypeA );
  MPI_Type_commit( &ftypeA );

  /* create filetype for matrix B */
  length[ 0 ] = 1;
  length[ 1 ] = slicelen;
  length[ 2 ] = 1;
  disp[ 0 ] = 0;
  disp[ 1 ] = sliceext * myrank;
  disp[ 2 ] = sliceext * P;
  type[ 0 ] = MPI_LB;
  type[ 1 ] = MPI_INT;
  type[ 2 ] = MPI_UB;
  MPI_Type_struct( 3, length, disp, type, &ftypeB );
  MPI_Type_commit( &ftypeB );

  /* create filetype for matrix C */
  MPI_Type_vector( Nb, Nb, Ng, MPI_INT, &block_typeC );
  MPI_Type_hvector( P, 1, colext, block_typeC, &slice_typeC );
  length[ 0 ] = 1;
  length[ 1 ] = 1;
  length[ 2 ] = 1;
  disp[ 0 ] = 0;
  disp[ 1 ] = sliceext * myrank;
  disp[ 2 ] = sliceext * P;
  type[ 0 ] = MPI_LB;
  type[ 1 ] = slice_typeC;
  type[ 2 ] = MPI_UB;
  MPI_Type_struct( 3, length, disp, type, &ftypeC );
  MPI_Type_commit( &ftypeC );

  /* open files */
  mode = MPI_MODE_RDONLY;
  MPI_File_open( MPI_COMM_WORLD, fileA, mode, MPI_INFO_NULL, &fh_A );
  mode = MPI_MODE_RDONLY;
  MPI_File_open( MPI_COMM_WORLD, fileB, mode, MPI_INFO_NULL, &fh_B );
  mode = MPI_MODE_CREATE | MPI_MODE_WRONLY;
  MPI_File_open( MPI_COMM_WORLD, fileC, mode, MPI_INFO_NULL, &fh_C );

  /* set views */
  MPI_File_set_view (fh_A, MPI_OFFSET_ZERO, MPI_INT, ftypeA, 
                     "native", MPI_INFO_NULL );
  MPI_File_set_view( fh_B, MPI_OFFSET_ZERO, MPI_INT, ftypeB, 
                     "native", MPI_INFO_NULL );
  MPI_File_set_view( fh_C, MPI_OFFSET_ZERO, MPI_INT, ftypeC, 
                     "native", MPI_INFO_NULL );

  offsetA = MPI_OFFSET_ZERO;
  offsetB = MPI_OFFSET_ZERO;
  offsetC = MPI_OFFSET_ZERO;

  /* read horizontal slice of A */
  MPI_File_read_at_all( fh_A, offsetA, val_A, slicelen, MPI_INT, &status );

  /* loop on vertical slices */
  for ( iv = 0; iv < P; iv++ ) {

    /* read next vertical slice of B */
    MPI_File_read_at_all( fh_B, offsetB, val_B, slicelen, MPI_INT, &status );
    offsetB += slicelen;

    /* compute block product */
    for ( i = 0; i < Nb; i++ ) {
      for ( j = 0; j < Nb; j++ ) {
        val_C[ i ][ j ] = 0;
        for ( k = 0; k < Ng; k++ ) {
          val_C[ i ][ j ] += val_A[ i ][ k ] * val_B[ k ][ j ];
        }
      }
    }

    /* write block of C */ 
    MPI_File_write_at_all( fh_C, offsetC, val_C, Nb * Nb, MPI_INT, &status );
    offsetC += Nb * Nb;

  }

  /* close files */
  MPI_File_close( &fh_A );
  MPI_File_close( &fh_B );
  MPI_File_close( &fh_C );

  /* free filetypes */
  MPI_Type_free( &ftypeA );
  MPI_Type_free( &ftypeB );
  MPI_Type_free( &ftypeC );

  if(myrank == 0) {
    printf("File C:\n");
    aff(fileC,Nb);
  }

  /* finalize MPI */
  MPI_Finalize();
}

An example of an execution is given below. Note that only process 0 prints its result. Note also what is contained in file C at the end of the execution (is it possible to diminish the size of file C?).

[cerin@fatboy cerin]$ /usr/local/mpi/bin/mpirun -np 4 a.out
File A:
 00001  00002  00003  00004  00005  00006  00007  00008
 00009  00010  00011  00012  00013  00014  00015  00016
 00017  00018  00019  00020  00021  00022  00023  00024
 00025  00026  00027  00028  00029  00030  00031  00032
 00033  00034  00035  00036  00037  00038  00039  00040
 00041  00042  00043  00044  00045  00046  00047  00048
 00049  00050  00051  00052  00053  00054  00055  00056
 00057  00058  00059  00060  00061  00062  00063  00064

File B:
 00001  00009  00017  00025  00033  00041  00049  00057
 00002  00010  00018  00026  00034  00042  00050  00058
 00003  00011  00019  00027  00035  00043  00051  00059
 00004  00012  00020  00028  00036  00044  00052  00060
 00005  00013  00021  00029  00037  00045  00053  00061
 00006  00014  00022  00030  00038  00046  00054  00062
 00007  00015  00023  00031  00039  00047  00055  00063
 00008  00016  00024  00032  00040  00048  00056  00064

File C:
 00204  00492  00780  01068  01356  01644  01932  02220
 00000  00000  00000  00000  00000  00000  00000  00000
 00000  00000  00000  00000  00000  00000  00000  00000
 00000  00000  00000  00000  00000  00000  00000  00000
 01356  03692  06028  08364  10700  13036  15372  17708
 00000  00000  00000  00000  00000  00000  00000  00000
 00000  00000  00000  00000  00000  00000  00000  00000
 00000  00000  00000  00000  00000  00000  00000  00000
 00000  00000  00000  00000  00000  00000  00000  00000
 00000  00000  00000  00000  00000  00000  00000  00000
 00000  00000  00000  00000  00000  00000  00000  00000
 00000  00000  00000  00000  00000  00000  00000  00000
 .....  .....  .....  .....  .....  .....  .....  .....
 .....  .....  .....  .....  .....  .....  .....  .....



Christophe Cérin 2002-09-16