/* Matrix-Matrix Multiply, AB = C using PESSL (ScaLAPACK)
   filename:  ex5.c
 
 to use IBM PESSL library
 compile:   mpcc -o ex5 -lblacs -lpessl -DIBM ex5.c
 run:       poe+ ./ex5 -nodes 1 -procs 4
 
 to use ScaLAPACK library
 compile:   module load scalapack
            mpcc -o ex5 $PBLAS $BLACS $SCALAPACK -lessl -lxlf90 ex5.c
 
 to use ScaLAPACK and VAMPIR (which does not work with PESSL)
 compile:   module load scalapack
            module load vampir vampirtrace
            mpcc -o ex5 $PBLAS $BLACS $SCALAPACK -lessl -lxlf90 $VTINC $VTLIB ex5.c
 
                prow    number of rows in proc grid set to 2
                pcol    number of columns in proc grid set to 2
                n       number of rows/columns in matrix A set to 1000
                nb      matrix distribution block size set to 16
 
   output:   stdout  
*/
 
#include "stdio.h"
#include "stdlib.h"
#ifdef IBM
 #include "pessl.h" 
#endif
int g2lp (int,int,int,int);
int g2li (int,int,int,int);
 
main () {
 
      int n=1000, nb=16;              /* problem size and block size */
      int myzero=0, myone=1;
      int myArows, myAcols;   /* size of local subset of global array */
      int i,j, igrid,jgrid, iproc,jproc, myi,myj, p;
      double mydzero=0., mydone=1.;
      double *myA,*myB,*myC;  /* local data arrays */
 
      int me, procs, icontxt, prow=2, pcol=2, myrow, mycol; /* blacs data */
      int info;   /* pessl return value */
      int ides_a[9], ides_b[9], ides_c[9]; /* pessl array desc */
 
/* Initialize blacs processor grid */
 
      blacs_pinfo   (&me,&procs);
      blacs_get     (0, 0, &icontxt);
      blacs_gridinit(&icontxt, "r", &prow, &pcol);
      blacs_gridinfo(&icontxt, &prow, &pcol, &myrow, &mycol);
      printf("Proc %d: myrow, mycol in p-array is %d %d\n",me, myrow, mycol);
 
/* Construct local arrays
   Global structure:  matrix A of n rows and n columns
                      matrix B of n rows and n column
                      matrix C of n rows and n column 
 
      if(me==0)printf("Size of global array is %d x %d\n", n, n);
      if(me==0)printf("Size of block is        %d x %d\n", nb, nb);
 
*/
 
#ifdef IBM      
      myArows = numroc(n, nb, myrow, 0, prow);
      myAcols = numroc(n, nb, mycol, 0, pcol);
#else
      myArows = numroc(&n, &nb, &myrow, &myzero, &prow);
      myAcols = numroc(&n, &nb, &mycol, &myzero, &pcol);
#endif
 
/*    printf("Proc %d: Size of local array is  %d x %d\n",me,myArows,myAcols);
*/
 
/* Initialize local arrays  */
 
      myA = (double *) malloc(sizeof(double)*myArows*myAcols);
      myB = (double *) malloc(sizeof(double)*myArows*myAcols);
      myC = (double *) malloc(sizeof(double)*myArows*myAcols);
 
      for (i=0;i<n;i++)
      {
         iproc = g2lp(i,n,prow,nb);
         myi   = g2li(i,n,prow,nb);
         if (myrow==iproc)
         {
            for (j=0;j<n;j++)
            {
            jproc = g2lp(j,n,pcol,nb);
            myj   = g2li(j,n,pcol,nb);
               if (mycol==jproc)
               {
                  myA[(myj*myArows)+myi] = (double)(i+j+2);
                  myB[(myj*myArows)+myi] = (double)(j-i);
                  myC[(myj*myArows)+myi] = 0.;
               }
            }
         }
      }
 
/* Prepare array descriptors for PESSL (ScaLAPACK style) */
 
      ides_a[0] = 1;        /* descriptor type */
      ides_a[1] = icontxt;  /* blacs context */
      ides_a[2] = n;        /* global number of rows */
      ides_a[3] = n;        /* global number of columns */
      ides_a[4] = nb;       /* row block size */
      ides_a[5] = nb;       /* column block size */
      ides_a[6] = 0;        /* initial process row */
      ides_a[7] = 0;        /* initial process column */
      ides_a[8] = myArows;  /* leading dimension of local array */
 
      for (i=0;i<9;i++)
      {
         ides_b[i] = ides_a[i];
         ides_c[i] = ides_a[i];
      }
 
/* Call PESSL library routine */
 
#ifdef IBM      
     pdgemm("t","n",n,n,n,1.0,myA,1,1,ides_a,myB,1,1,ides_b,0.,myC,1,1,ides_c);
#else
     pdgemm("t","n",&n,&n,&n,&mydone,myA,&myone,&myone,&ides_a,myB,&myone,&myone,&ides_b,&mydzero,myC,&myone,&myone,&ides_c);
#endif
 
/* Print results */
 
      iproc = g2lp(n-1,n,prow,nb);
      myi   = g2li(n-1,n,prow,nb);
      jproc = g2lp(n-1,n,pcol,nb);
      myj   = g2li(n-1,n,pcol,nb);
      if ((myrow==iproc) && (mycol==jproc))
         printf("c(%d,%d)=%f\n", n, n, myC[(myj*myArows)+myi]);
 
/* Deallocate the local arrays */
 
      free(myA);
      free(myB);
      free(myC);
 
/* End blacs for processors that are used */
 
      blacs_gridexit(icontxt);
      blacs_exit(0);
 
      return 0; 
}
 
/* convert global index to local index in block-cyclic distribution */
 
int g2lp(int i,int n,int np,int nb)
{
 
/*  i      global array index, input
    n      global array dimension, input
    np     processor array dimension, input
    nb     block size, input
    p      processor array index, return
*/
   return (i/nb)%np;
}
 
int g2li(int i,int n,int np,int nb)
{
 
/*  i      global array index, input
    n      global array dimension, input
    np     processor array dimension, input
    nb     block size, input
    il     local array index, return
*/
   return (i/(np*nb))*nb + (i%nb);
}
 
/* convert local index to global index in block-cyclic distribution */
 
l2g(il,p,n,np,nb,i)
{
 
/*  il     local array index, input
    p      processor array index, input
    n      global array dimension, input
    np     processor array dimension, input
    nb     block size, input
    i      global array index, output
*/
 
   int ilm1;
 
   ilm1 = il-1;
   i    = (((ilm1/nb) * np) + p)*nb + (ilm1%nb) + 1;
   
   return 0;
}

Close This Window