NERSCPowering Scientific Discovery Since 1974

FFTW2 MPI 3D Example C Source

#include <stdio.h>
#include <dfftw_mpi.h>

#define space_loop for(x=0;x<lnx;++x)for(y=0;y<ny;++y)for(z=0;z<nz;++z)
#define space_loop2 for(x=0;x<1;++x)for(y=0;y<2;++y)for(z=0;z<2;++z)

/*
 *     nx*ny*nz FFT computed in parallel (MPI FFTW)
 *    
 *    slab decomposed (along x) among numtasks MPI tasks
 *        
 *    lnx, lny, lny         local nx,ny,nz
 *    lnyt             local ny after transpose
 *    lxs             local x start
 *    lyst             local y start after transpose
 *    lsize             total local size
 *        
 *    Frank Hale Jul 2008 (fvhale@nersc.gov)
 *    David Skinner    Aug 2001 (dskinner@nersc.gov)
 */

int main(int argc, char **argv)
{
     int         nn, nx,ny,nz;                 /* DIMS */
     int         rank;                     /* MPI */
     int         lnx, lxs, lnyt, lyst, lsize;    /* LOCAL DIMS */
     int         x, y, z;            /* LOOP VARS */
     fftwnd_mpi_plan     plan, iplan;            /* PLANS */
     fftw_complex     *data, *work;            /* VECTORS */

     MPI_Init(&argc,&argv);

     if(argc != 2) {
      if(!rank){printf("usage: %s dim_size\n",argv[0]);}
      if(!rank){printf("\tdim_size = number of points in each dimension\n");}
      MPI_Finalize();
      exit(1);
     }
     else { nn = atoi(argv[1]); }

     MPI_Comm_rank(MPI_COMM_WORLD, &rank);

     nx = ny = nz = nn;
     printf("PE: %d, nx=%d, ny=%d, nz=%d\n", rank, nx, ny, nz);

     plan = fftw3d_mpi_create_plan(MPI_COMM_WORLD, nx, ny, nz,
     FFTW_FORWARD, FFTW_ESTIMATE);

     iplan = fftw3d_mpi_create_plan(MPI_COMM_WORLD, nx, ny, nz,
     FFTW_BACKWARD, FFTW_ESTIMATE);

/*   slab decomposition */

     fftwnd_mpi_local_sizes(plan, &lnx, &lxs, &lnyt, &lyst, &lsize);

     printf("data_decomp:: %d xslab %d->%d (%d) %d (%.3e MB of %.3e MB total)\n"
    ,rank, lxs, lxs+lnx,lnx, lyst,
    lsize*sizeof(fftw_complex)*1.0e-6,
    nn*nn*nn*sizeof(fftw_complex)*1.0e-6);

/*  malloc data and work vectors */

     data = (fftw_complex*) malloc(sizeof(fftw_complex) * lsize);
     if(!data) {printf("malloc failed for data (task = %d)\n",rank);}

     work = (fftw_complex*) malloc(sizeof(fftw_complex) * lsize);
     if(!data) {printf("malloc failed for work (task = %d)\n",rank);}

/* initialize data */

     space_loop{
        data[(x*ny + y) * nz + z].re = 1.0;
        data[(x*ny + y) * nz + z].im = 0.0;
     }

     space_loop2{
        printf("PE %d input %d %d %d %.3e %.3e\n", rank, x+lxs, y, z,
     data[(x*ny + y) * nz + z].re,
     data[(x*ny + y) * nz + z].im);
     }

     MPI_Barrier(MPI_COMM_WORLD);


     fftwnd_mpi(plan, 1, data, work, FFTW_TRANSPOSED_ORDER);

/* after forward fft */

     space_loop2{
        printf("PE %d forward %d %d %d %.3e %.3e\n", rank, x+lxs, y, z,
     data[(x*ny + y) * nz + z].re,
     data[(x*ny + y) * nz + z].im);
     }

     MPI_Barrier(MPI_COMM_WORLD);
     fftwnd_mpi(iplan, 1, data, work, FFTW_TRANSPOSED_ORDER);

/* after reverse fft */

     space_loop2{
        printf("PE %d reverse %d %d %d %.3e %.3e\n", rank, x+lxs, y, z,
     data[(x*ny + y) * nz + z].re,
     data[(x*ny + y) * nz + z].im);
     }

     MPI_Barrier(MPI_COMM_WORLD);
     printf("\nWrap up PE %d\n",rank);

     free(data);
     free(work);
     fftwnd_mpi_destroy_plan(plan);
     fftwnd_mpi_destroy_plan(iplan);

     MPI_Finalize();
     exit(0);
}