/* Matrix-Matrix Multiply, AB = C using PESSL (ScaLAPACK)
filename: ex5.c
to use IBM PESSL library
compile: mpcc -o ex5 -lblacs -lpessl -DIBM ex5.c
run: poe+ ./ex5 -nodes 1 -procs 4
to use ScaLAPACK library
compile: module load scalapack
mpcc -o ex5 $PBLAS $BLACS $SCALAPACK -lessl -lxlf90 ex5.c
to use ScaLAPACK and VAMPIR (which does not work with PESSL)
compile: module load scalapack
module load vampir vampirtrace
mpcc -o ex5 $PBLAS $BLACS $SCALAPACK -lessl -lxlf90 $VTINC $VTLIB ex5.c
prow number of rows in proc grid set to 2
pcol number of columns in proc grid set to 2
n number of rows/columns in matrix A set to 1000
nb matrix distribution block size set to 16
output: stdout
*/
#include "stdio.h"
#include "stdlib.h"
#ifdef IBM
#include "pessl.h"
#endif
int g2lp (int,int,int,int);
int g2li (int,int,int,int);
main () {
int n=1000, nb=16; /* problem size and block size */
int myzero=0, myone=1;
int myArows, myAcols; /* size of local subset of global array */
int i,j, igrid,jgrid, iproc,jproc, myi,myj, p;
double mydzero=0., mydone=1.;
double *myA,*myB,*myC; /* local data arrays */
int me, procs, icontxt, prow=2, pcol=2, myrow, mycol; /* blacs data */
int info; /* pessl return value */
int ides_a[9], ides_b[9], ides_c[9]; /* pessl array desc */
/* Initialize blacs processor grid */
blacs_pinfo (&me,&procs);
blacs_get (0, 0, &icontxt);
blacs_gridinit(&icontxt, "r", &prow, &pcol);
blacs_gridinfo(&icontxt, &prow, &pcol, &myrow, &mycol);
printf("Proc %d: myrow, mycol in p-array is %d %d\n",me, myrow, mycol);
/* Construct local arrays
Global structure: matrix A of n rows and n columns
matrix B of n rows and n column
matrix C of n rows and n column
if(me==0)printf("Size of global array is %d x %d\n", n, n);
if(me==0)printf("Size of block is %d x %d\n", nb, nb);
*/
#ifdef IBM
myArows = numroc(n, nb, myrow, 0, prow);
myAcols = numroc(n, nb, mycol, 0, pcol);
#else
myArows = numroc(&n, &nb, &myrow, &myzero, &prow);
myAcols = numroc(&n, &nb, &mycol, &myzero, &pcol);
#endif
/* printf("Proc %d: Size of local array is %d x %d\n",me,myArows,myAcols);
*/
/* Initialize local arrays */
myA = (double *) malloc(sizeof(double)*myArows*myAcols);
myB = (double *) malloc(sizeof(double)*myArows*myAcols);
myC = (double *) malloc(sizeof(double)*myArows*myAcols);
for (i=0;i<n;i++)
{
iproc = g2lp(i,n,prow,nb);
myi = g2li(i,n,prow,nb);
if (myrow==iproc)
{
for (j=0;j<n;j++)
{
jproc = g2lp(j,n,pcol,nb);
myj = g2li(j,n,pcol,nb);
if (mycol==jproc)
{
myA[(myj*myArows)+myi] = (double)(i+j+2);
myB[(myj*myArows)+myi] = (double)(j-i);
myC[(myj*myArows)+myi] = 0.;
}
}
}
}
/* Prepare array descriptors for PESSL (ScaLAPACK style) */
ides_a[0] = 1; /* descriptor type */
ides_a[1] = icontxt; /* blacs context */
ides_a[2] = n; /* global number of rows */
ides_a[3] = n; /* global number of columns */
ides_a[4] = nb; /* row block size */
ides_a[5] = nb; /* column block size */
ides_a[6] = 0; /* initial process row */
ides_a[7] = 0; /* initial process column */
ides_a[8] = myArows; /* leading dimension of local array */
for (i=0;i<9;i++)
{
ides_b[i] = ides_a[i];
ides_c[i] = ides_a[i];
}
/* Call PESSL library routine */
#ifdef IBM
pdgemm("t","n",n,n,n,1.0,myA,1,1,ides_a,myB,1,1,ides_b,0.,myC,1,1,ides_c);
#else
pdgemm("t","n",&n,&n,&n,&mydone,myA,&myone,&myone,&ides_a,myB,&myone,&myone,&ides_b,&mydzero,myC,&myone,&myone,&ides_c);
#endif
/* Print results */
iproc = g2lp(n-1,n,prow,nb);
myi = g2li(n-1,n,prow,nb);
jproc = g2lp(n-1,n,pcol,nb);
myj = g2li(n-1,n,pcol,nb);
if ((myrow==iproc) && (mycol==jproc))
printf("c(%d,%d)=%f\n", n, n, myC[(myj*myArows)+myi]);
/* Deallocate the local arrays */
free(myA);
free(myB);
free(myC);
/* End blacs for processors that are used */
blacs_gridexit(icontxt);
blacs_exit(0);
return 0;
}
/* convert global index to local index in block-cyclic distribution */
int g2lp(int i,int n,int np,int nb)
{
/* i global array index, input
n global array dimension, input
np processor array dimension, input
nb block size, input
p processor array index, return
*/
return (i/nb)%np;
}
int g2li(int i,int n,int np,int nb)
{
/* i global array index, input
n global array dimension, input
np processor array dimension, input
nb block size, input
il local array index, return
*/
return (i/(np*nb))*nb + (i%nb);
}
/* convert local index to global index in block-cyclic distribution */
l2g(il,p,n,np,nb,i)
{
/* il local array index, input
p processor array index, input
n global array dimension, input
np processor array dimension, input
nb block size, input
i global array index, output
*/
int ilm1;
ilm1 = il-1;
i = (((ilm1/nb) * np) + p)*nb + (ilm1%nb) + 1;
return 0;
}
|