C compile: ftn -fastsse -o dp1 dp1.f -L$ACML_DIR/pgi64/lib -lacml
C run: qsub rundp
IMPLICIT NONE
INCLUDE 'mpif.h'
integer, parameter :: statesize=16, naggen=1, seedsize=1
INTEGER :: myPE, totPEs, ierr
integer :: info, i, n, nskip1, nskip2, imag
integer :: state1(statesize), state2(statesize), seed(seedsize)
real :: dpsend
real, allocatable :: dpset(:)
double precision :: dp, ddot
double precision, parameter :: a=0.d0, b=1.0d0
double precision, allocatable :: x(:), y(:)
CALL MPI_INIT( ierr )
CALL MPI_COMM_RANK( MPI_COMM_WORLD, myPE, ierr )
CALL MPI_COMM_SIZE( MPI_COMM_WORLD, totPEs, ierr )
allocate(dpset(totPEs))
C all processors working from the same random sequence, same seed
seed(1) = 1234
call drandinitialize(naggen,1,seed,1,state1,statesize,info)
state2 = state1
c pull vectors out of sequence based on processor number
n = 1000000
nskip1 = n * myPE
nskip2 = nskip1 + (n * totPEs)
call drandskipahead(nskip1,state1,info)
call drandskipahead(nskip2,state2,info)
allocate(x(n), y(n))
call dranduniform(n,a,b,state1,x,info)
call dranduniform(n,a,b,state2,y,info)
c compute dot product of two vectors from random distribution
dp = ddot(n,x,1,y,1)
deallocate(x,y)
c gather normalized dot products
dpsend = 4*(dp/n)
CALL MPI_GATHER(dpsend, 1, MPI_REAL, dpset, 1, MPI_REAL,
. 0, MPI_COMM_WORLD, ierr)
if (myPE==0) then
print *,"dpset = ",dpset
print *,"PE's, average = ", totPEs, (sum(dpset)/totPEs)
endif
deallocate(dpset)
CALL MPI_FINALIZE(ierr)
END