diff --git a/src/Depend.sh b/src/Depend.sh
index 2a83a1155..18595f6d7 100644
--- a/src/Depend.sh
+++ b/src/Depend.sh
@@ -1,118 +1,124 @@
 # Depend.sh = Install/unInstall files due to package dependencies
 # this script is invoked after any package is installed/uninstalled
 
 # all parent/child package dependencies should be listed below
 # parent package = has files that files in another package derive from
 # child package = has files that derive from files in another package
 
 # update child packages that depend on the parent,
 #   but only if the child package is already installed
 # this is necessary to insure the child package installs
 #   only child files whose parent package files are now installed
 # decisions on (un)installing individual child files are made by
 #   the Install.sh script in the child package
 
 # depend function: arg = child-package
 # checks if child-package is installed, if not just return
 # otherwise invoke update of child package via its Install.sh
 
 depend () {
   cd $1
   installed=0
   for file in *.cpp *.h; do
     if (test -e ../$file) then
       installed=1
     fi
   done
 
   cd ..
   if (test $installed = 0) then
     return
   fi
 
   echo "  updating package $1"
   if (test -e $1/Install.sh) then
     cd $1; /bin/sh Install.sh 2; cd ..
   else
     cd $1; /bin/sh ../Install.sh 2; cd ..
   fi
 }
 
 # add one if statement per parent package
 # add one depend() call per child package that depends on that parent
 
 if (test $1 = "ASPHERE") then
   depend GPU
   depend USER-OMP
   depend USER-INTEL
 fi
 
 if (test $1 = "CLASS2") then
   depend GPU
   depend USER-CUDA
   depend USER-OMP
 fi
 
 if (test $1 = "COLLOID") then
   depend GPU
   depend USER-OMP
 fi
 
 if (test $1 = "DIPOLE") then
   depend USER-MISC
   depend USER-OMP
 fi
 
 if (test $1 = "GRANULAR") then
   depend USER-CUDA
   depend USER-OMP
 fi
 
 if (test $1 = "KSPACE") then
   depend CORESHELL
   depend GPU
   depend KOKKOS
   depend OPT
   depend USER-CUDA
   depend USER-OMP
   depend USER-INTEL
   depend USER-PHONON
+  depend USER-FEP
 fi
 
 if (test $1 = "MANYBODY") then
   depend GPU
   depend KOKKOS
   depend OPT
   depend USER-CUDA
   depend USER-MISC
   depend USER-OMP
 fi
 
 if (test $1 = "MOLECULE") then
   depend GPU
   depend KOKKOS
   depend USER-CUDA
   depend USER-MISC
   depend USER-OMP
+  depend USER-FEP
   depend USER-INTEL
 fi
 
 if (test $1 = "PERI") then
   depend USER-OMP
 fi
 
 if (test $1 = "RIGID") then
   depend USER-OMP
 fi
 
 if (test $1 = "USER-CG-CMM") then
   depend GPU
   depend KOKKOS
   depend USER-CUDA
   depend USER-OMP
 fi
 
+if (test $1 = "USER-FEP") then
+  depend USER-OMP
+fi
+
 if (test $1 = "USER-MISC") then
   depend GPU
   depend USER-OMP
 fi
diff --git a/src/KSPACE/pppm.cpp b/src/KSPACE/pppm.cpp
index 250d9b875..f13583506 100644
--- a/src/KSPACE/pppm.cpp
+++ b/src/KSPACE/pppm.cpp
@@ -1,3504 +1,3514 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL)
      per-atom energy/virial & group/group energy/force added by Stan Moore (BYU)
      analytic diff (2 FFT) option added by Rolf Isele-Holder (Aachen University)
      triclinic added by Stan Moore (SNL)
 ------------------------------------------------------------------------- */
 
 #include "mpi.h"
 #include "string.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "math.h"
 #include "pppm.h"
 #include "atom.h"
 #include "comm.h"
 #include "gridcomm.h"
 #include "neighbor.h"
 #include "force.h"
 #include "pair.h"
 #include "bond.h"
 #include "angle.h"
 #include "domain.h"
 #include "fft3d_wrap.h"
 #include "remap_wrap.h"
 #include "memory.h"
 #include "error.h"
 
 #include "math_const.h"
 #include "math_special.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
 #define MAXORDER 7
 #define OFFSET 16384
 #define LARGE 10000.0
 #define SMALL 0.00001
 #define EPS_HOC 1.0e-7
 
 enum{REVERSE_RHO};
 enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM};
 
 #ifdef FFT_SINGLE
 #define ZEROF 0.0f
 #define ONEF  1.0f
 #else
 #define ZEROF 0.0
 #define ONEF  1.0
 #endif
 
 /* ---------------------------------------------------------------------- */
 
 PPPM::PPPM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
 {
   if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm command");
  
   pppmflag = 1;
   group_group_enable = 1;
 
   accuracy_relative = fabs(force->numeric(FLERR,arg[0]));
 
   nfactors = 3;
   factors = new int[nfactors];
   factors[0] = 2;
   factors[1] = 3;
   factors[2] = 5;
 
   MPI_Comm_rank(world,&me);
   MPI_Comm_size(world,&nprocs);
 
   density_brick = vdx_brick = vdy_brick = vdz_brick = NULL;
   density_fft = NULL;
   u_brick = NULL;
   v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL;
   greensfn = NULL;
   work1 = work2 = NULL;
   vg = NULL;
   fkx = fky = fkz = NULL;
 
   sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = 
     sf_precoeff4 = sf_precoeff5 = sf_precoeff6 = NULL;
 
   density_A_brick = density_B_brick = NULL;
   density_A_fft = density_B_fft = NULL;
 
   gf_b = NULL;
   rho1d = rho_coeff = drho1d = drho_coeff = NULL;
 
   fft1 = fft2 = NULL;
   remap = NULL;
   cg = NULL;
   cg_peratom = NULL;
 
   nmax = 0;
   part2grid = NULL;
 
   peratom_allocate_flag = 0;
   group_allocate_flag = 0;
 
   // define acons coefficients for estimation of kspace errors
   // see JCP 109, pg 7698 for derivation of coefficients
   // higher order coefficients may be computed if needed
 
   memory->create(acons,8,7,"pppm:acons");
   acons[1][0] = 2.0 / 3.0;
   acons[2][0] = 1.0 / 50.0;
   acons[2][1] = 5.0 / 294.0;
   acons[3][0] = 1.0 / 588.0;
   acons[3][1] = 7.0 / 1440.0;
   acons[3][2] = 21.0 / 3872.0;
   acons[4][0] = 1.0 / 4320.0;
   acons[4][1] = 3.0 / 1936.0;
   acons[4][2] = 7601.0 / 2271360.0;
   acons[4][3] = 143.0 / 28800.0;
   acons[5][0] = 1.0 / 23232.0;
   acons[5][1] = 7601.0 / 13628160.0;
   acons[5][2] = 143.0 / 69120.0;
   acons[5][3] = 517231.0 / 106536960.0;
   acons[5][4] = 106640677.0 / 11737571328.0;
   acons[6][0] = 691.0 / 68140800.0;
   acons[6][1] = 13.0 / 57600.0;
   acons[6][2] = 47021.0 / 35512320.0;
   acons[6][3] = 9694607.0 / 2095994880.0;
   acons[6][4] = 733191589.0 / 59609088000.0;
   acons[6][5] = 326190917.0 / 11700633600.0;
   acons[7][0] = 1.0 / 345600.0;
   acons[7][1] = 3617.0 / 35512320.0;
   acons[7][2] = 745739.0 / 838397952.0;
   acons[7][3] = 56399353.0 / 12773376000.0;
   acons[7][4] = 25091609.0 / 1560084480.0;
   acons[7][5] = 1755948832039.0 / 36229939200000.0;
   acons[7][6] = 4887769399.0 / 37838389248.0;
 }
 
 /* ----------------------------------------------------------------------
    free all memory
 ------------------------------------------------------------------------- */
 
 PPPM::~PPPM()
 {
   delete [] factors;
   deallocate();
   if (peratom_allocate_flag) deallocate_peratom();
   if (group_allocate_flag) deallocate_groups();
   memory->destroy(part2grid);
   memory->destroy(acons);
 }
 
 /* ----------------------------------------------------------------------
    called once before run
 ------------------------------------------------------------------------- */
 
 void PPPM::init()
 {
   if (me == 0) {
     if (screen) fprintf(screen,"PPPM initialization ...\n");
     if (logfile) fprintf(logfile,"PPPM initialization ...\n");
   }
 
   // error check
 
   triclinic_check();
   if (domain->triclinic && differentiation_flag == 1)
     error->all(FLERR,"Cannot (yet) use PPPM with triclinic box "
                "and kspace_modify diff ad");
   if (domain->triclinic && slabflag)
     error->all(FLERR,"Cannot (yet) use PPPM with triclinic box and "
                "slab correction");
   if (domain->dimension == 2) error->all(FLERR,
                                          "Cannot use PPPM with 2d simulation");
   if (comm->style != 0) 
     error->universe_all(FLERR,"PPPM can only currently be used with "
                         "comm_style brick");
 
   if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
 
   if (slabflag == 0 && domain->nonperiodic > 0)
     error->all(FLERR,"Cannot use nonperiodic boundaries with PPPM");
   if (slabflag) {
     if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
         domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
       error->all(FLERR,"Incorrect boundaries with slab PPPM");
   }
 
   if (order < 2 || order > MAXORDER) {
     char str[128];
     sprintf(str,"PPPM order cannot be < 2 or > than %d",MAXORDER);
     error->all(FLERR,str);
   }
 
   // extract short-range Coulombic cutoff from pair style
 
   triclinic = domain->triclinic;
   pair_check();
 
   int itmp = 0;
   double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp);
   if (p_cutoff == NULL)
     error->all(FLERR,"KSpace style is incompatible with Pair style");
   cutoff = *p_cutoff;
 
   // if kspace is TIP4P, extract TIP4P params from pair style
   // bond/angle are not yet init(), so insure equilibrium request is valid
 
   qdist = 0.0;
 
   if (tip4pflag) {
     if (me == 0) {
       if (screen) fprintf(screen,"  extracting TIP4P info from pair style\n");
       if (logfile) fprintf(logfile,"  extracting TIP4P info from pair style\n");
     }
 
     double *p_qdist = (double *) force->pair->extract("qdist",itmp);
     int *p_typeO = (int *) force->pair->extract("typeO",itmp);
     int *p_typeH = (int *) force->pair->extract("typeH",itmp);
     int *p_typeA = (int *) force->pair->extract("typeA",itmp);
     int *p_typeB = (int *) force->pair->extract("typeB",itmp);
     if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB)
       error->all(FLERR,"Pair style is incompatible with TIP4P KSpace style");
     qdist = *p_qdist;
     typeO = *p_typeO;
     typeH = *p_typeH;
     int typeA = *p_typeA;
     int typeB = *p_typeB;
 
     if (force->angle == NULL || force->bond == NULL ||
         force->angle->setflag == NULL || force->bond->setflag == NULL)
       error->all(FLERR,"Bond and angle potentials must be defined for TIP4P");
     if (typeA < 1 || typeA > atom->nangletypes ||
         force->angle->setflag[typeA] == 0)
       error->all(FLERR,"Bad TIP4P angle type for PPPM/TIP4P");
     if (typeB < 1 || typeB > atom->nbondtypes ||
         force->bond->setflag[typeB] == 0)
       error->all(FLERR,"Bad TIP4P bond type for PPPM/TIP4P");
     double theta = force->angle->equilibrium_angle(typeA);
     double blen = force->bond->equilibrium_distance(typeB);
     alpha = qdist / (cos(0.5*theta) * blen);
     if (domain->triclinic)
       error->all(FLERR,"Cannot (yet) use PPPM with triclinic box and TIP4P");
   }
 
   // compute qsum & qsqsum and warn if not charge-neutral
 
   scale = 1.0;
   qqrd2e = force->qqrd2e;
   qsum_qsq();
   natoms_original = atom->natoms;
 
   // set accuracy (force units) from accuracy_relative or accuracy_absolute
 
   if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
   else accuracy = accuracy_relative * two_charge_force;
 
   // free all arrays previously allocated
 
   deallocate();
   if (peratom_allocate_flag) deallocate_peratom();
   if (group_allocate_flag) deallocate_groups();
 
   // setup FFT grid resolution and g_ewald
   // normally one iteration thru while loop is all that is required
   // if grid stencil does not extend beyond neighbor proc
   //   or overlap is allowed, then done
   // else reduce order and try again
 
   int (*procneigh)[2] = comm->procneigh;
 
   GridComm *cgtmp = NULL;
   int iteration = 0;
 
   while (order >= minorder) {
     if (iteration && me == 0)
       error->warning(FLERR,"Reducing PPPM order b/c stencil extends "
                      "beyond nearest neighbor processor");
 
     if (stagger_flag && !differentiation_flag) compute_gf_denom();
     set_grid_global();
     set_grid_local();
     if (overlap_allowed) break;
 
     cgtmp = new GridComm(lmp,world,1,1,
                          nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                          nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                          procneigh[0][0],procneigh[0][1],procneigh[1][0],
                          procneigh[1][1],procneigh[2][0],procneigh[2][1]);
     cgtmp->ghost_notify();
     if (!cgtmp->ghost_overlap()) break;
     delete cgtmp;
 
     order--;
     iteration++;
   }
   
   if (order < minorder) error->all(FLERR,"PPPM order < minimum allowed order");
   if (!overlap_allowed && cgtmp->ghost_overlap())
     error->all(FLERR,"PPPM grid stencil extends "
                "beyond nearest neighbor processor");
   if (cgtmp) delete cgtmp;
 
   // adjust g_ewald
 
   if (!gewaldflag) adjust_gewald();
 
   // calculate the final accuracy
 
   double estimated_accuracy = final_accuracy();
 
   // print stats
 
   int ngrid_max,nfft_both_max;
   MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world);
   MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world);
 
   if (me == 0) {
 
 #ifdef FFT_SINGLE
     const char fft_prec[] = "single";
 #else
     const char fft_prec[] = "double";
 #endif
 
     if (screen) {
       fprintf(screen,"  G vector (1/distance) = %g\n",g_ewald);
       fprintf(screen,"  grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
       fprintf(screen,"  stencil order = %d\n",order);
       fprintf(screen,"  estimated absolute RMS force accuracy = %g\n",
               estimated_accuracy);
       fprintf(screen,"  estimated relative force accuracy = %g\n",
               estimated_accuracy/two_charge_force);
       fprintf(screen,"  using %s precision FFTs\n",fft_prec);
       fprintf(screen,"  3d grid and FFT values/proc = %d %d\n",
               ngrid_max,nfft_both_max);
     }
     if (logfile) {
       fprintf(logfile,"  G vector (1/distance) = %g\n",g_ewald);
       fprintf(logfile,"  grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
       fprintf(logfile,"  stencil order = %d\n",order);
       fprintf(logfile,"  estimated absolute RMS force accuracy = %g\n",
               estimated_accuracy);
       fprintf(logfile,"  estimated relative force accuracy = %g\n",
               estimated_accuracy/two_charge_force);
       fprintf(logfile,"  using %s precision FFTs\n",fft_prec);
       fprintf(logfile,"  3d grid and FFT values/proc = %d %d\n",
               ngrid_max,nfft_both_max);
     }
   }
 
   // allocate K-space dependent memory
   // don't invoke allocate peratom() or group(), will be allocated when needed
 
   allocate();
   cg->ghost_notify();
   cg->setup();
 
   // pre-compute Green's function denomiator expansion
   // pre-compute 1d charge distribution coefficients
 
   compute_gf_denom();
   if (differentiation_flag == 1) compute_sf_precoeff();
   compute_rho_coeff();
 }
 
 /* ----------------------------------------------------------------------
    adjust PPPM coeffs, called initially and whenever volume has changed
 ------------------------------------------------------------------------- */
 
 void PPPM::setup()
 {
   if (triclinic) {
     setup_triclinic();
     return;
   }
 
+  // perform some checks to avoid illegal boundaries with read_data
+
+  if (slabflag == 0 && domain->nonperiodic > 0)
+    error->all(FLERR,"Cannot use nonperiodic boundaries with PPPM");
+  if (slabflag) {
+    if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
+        domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
+      error->all(FLERR,"Incorrect boundaries with slab PPPM");
+  }
+
   int i,j,k,n;
   double *prd;
 
   // volume-dependent factors
   // adjust z dimension for 2d slab PPPM
   // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
   volume = xprd * yprd * zprd_slab;
 
   delxinv = nx_pppm/xprd;
   delyinv = ny_pppm/yprd;
   delzinv = nz_pppm/zprd_slab;
 
   delvolinv = delxinv*delyinv*delzinv;
 
   double unitkx = (MY_2PI/xprd);
   double unitky = (MY_2PI/yprd);
   double unitkz = (MY_2PI/zprd_slab);
 
   // fkx,fky,fkz for my FFT grid pts
 
   double per;
 
   for (i = nxlo_fft; i <= nxhi_fft; i++) {
     per = i - nx_pppm*(2*i/nx_pppm);
     fkx[i] = unitkx*per;
   }
 
   for (i = nylo_fft; i <= nyhi_fft; i++) {
     per = i - ny_pppm*(2*i/ny_pppm);
     fky[i] = unitky*per;
   }
 
   for (i = nzlo_fft; i <= nzhi_fft; i++) {
     per = i - nz_pppm*(2*i/nz_pppm);
     fkz[i] = unitkz*per;
   }
 
   // virial coefficients
 
   double sqk,vterm;
 
   n = 0;
   for (k = nzlo_fft; k <= nzhi_fft; k++) {
     for (j = nylo_fft; j <= nyhi_fft; j++) {
       for (i = nxlo_fft; i <= nxhi_fft; i++) {
         sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k];
         if (sqk == 0.0) {
           vg[n][0] = 0.0;
           vg[n][1] = 0.0;
           vg[n][2] = 0.0;
           vg[n][3] = 0.0;
           vg[n][4] = 0.0;
           vg[n][5] = 0.0;
         } else {
           vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald));
           vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i];
           vg[n][1] = 1.0 + vterm*fky[j]*fky[j];
           vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k];
           vg[n][3] = vterm*fkx[i]*fky[j];
           vg[n][4] = vterm*fkx[i]*fkz[k];
           vg[n][5] = vterm*fky[j]*fkz[k];
         }
         n++;
       }
     }
   }
 
   if (differentiation_flag == 1) compute_gf_ad();
   else compute_gf_ik();
 }
 
 /* ----------------------------------------------------------------------
    adjust PPPM coeffs, called initially and whenever volume has changed
    for a triclinic system
 ------------------------------------------------------------------------- */
 
 void PPPM::setup_triclinic()
 {
   int i,j,k,n;
   double *prd;
 
   // volume-dependent factors
   // adjust z dimension for 2d slab PPPM
   // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
 
   prd = domain->prd;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
   volume = xprd * yprd * zprd_slab;
 
   // use lamda (0-1) coordinates
 
   delxinv = nx_pppm;
   delyinv = ny_pppm;
   delzinv = nz_pppm;
   delvolinv = delxinv*delyinv*delzinv/volume;
 
   // fkx,fky,fkz for my FFT grid pts
 
   double per_i,per_j,per_k;
 
   n = 0;
   for (k = nzlo_fft; k <= nzhi_fft; k++) {
     per_k = k - nz_pppm*(2*k/nz_pppm);
     for (j = nylo_fft; j <= nyhi_fft; j++) {
       per_j = j - ny_pppm*(2*j/ny_pppm);
       for (i = nxlo_fft; i <= nxhi_fft; i++) {
         per_i = i - nx_pppm*(2*i/nx_pppm);
 
         double unitk_lamda[3];
         unitk_lamda[0] = 2.0*MY_PI*per_i;
         unitk_lamda[1] = 2.0*MY_PI*per_j;
         unitk_lamda[2] = 2.0*MY_PI*per_k;
         x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
         fkx[n] = unitk_lamda[0];
         fky[n] = unitk_lamda[1];
         fkz[n] = unitk_lamda[2];
         n++;
       }
     }
   }
 
   // virial coefficients
 
   double sqk,vterm;
 
   for (n = 0; n < nfft; n++) {
     sqk = fkx[n]*fkx[n] + fky[n]*fky[n] + fkz[n]*fkz[n];
     if (sqk == 0.0) {
       vg[n][0] = 0.0;
       vg[n][1] = 0.0;
       vg[n][2] = 0.0;
       vg[n][3] = 0.0;
       vg[n][4] = 0.0;
       vg[n][5] = 0.0;
     } else {
       vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald));
       vg[n][0] = 1.0 + vterm*fkx[n]*fkx[n];
       vg[n][1] = 1.0 + vterm*fky[n]*fky[n];
       vg[n][2] = 1.0 + vterm*fkz[n]*fkz[n];
       vg[n][3] = vterm*fkx[n]*fky[n];
       vg[n][4] = vterm*fkx[n]*fkz[n];
       vg[n][5] = vterm*fky[n]*fkz[n];
     }
   }
 
   compute_gf_ik_triclinic();
 }
 
 /* ----------------------------------------------------------------------
    reset local grid arrays and communication stencils
    called by fix balance b/c it changed sizes of processor sub-domains
 ------------------------------------------------------------------------- */
 
 void PPPM::setup_grid()
 {
   // free all arrays previously allocated
 
   deallocate();
   if (peratom_allocate_flag) deallocate_peratom();
   if (group_allocate_flag) deallocate_groups();
 
   // reset portion of global grid that each proc owns
 
   set_grid_local();
 
   // reallocate K-space dependent memory
   // check if grid communication is now overlapping if not allowed
   // don't invoke allocate peratom() or group(), will be allocated when needed
 
   allocate();
 
   cg->ghost_notify();
   if (overlap_allowed == 0 && cg->ghost_overlap())
     error->all(FLERR,"PPPM grid stencil extends "
                "beyond nearest neighbor processor");
   cg->setup();
 
   // pre-compute Green's function denomiator expansion
   // pre-compute 1d charge distribution coefficients
 
   compute_gf_denom();
   if (differentiation_flag == 1) compute_sf_precoeff();
   compute_rho_coeff();
 
   // pre-compute volume-dependent coeffs
 
   setup();
 }
 
 /* ----------------------------------------------------------------------
    compute the PPPM long-range force, energy, virial
 ------------------------------------------------------------------------- */
 
 void PPPM::compute(int eflag, int vflag)
 {
   int i,j;
 
   // set energy/virial flags
   // invoke allocate_peratom() if needed for first time
 
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = evflag_atom = eflag_global = vflag_global =
          eflag_atom = vflag_atom = 0;
 
   if (evflag_atom && !peratom_allocate_flag) {
     allocate_peratom();
     cg_peratom->ghost_notify();
     cg_peratom->setup();
   }
 
   // if atom count has changed, update qsum and qsqsum
 
   if (atom->natoms != natoms_original) {
     qsum_qsq();
     natoms_original = atom->natoms;
   }
   
   // return if there are no charges
   
   if (qsqsum == 0.0) return;
   
   // convert atoms from box to lamda coords
 
   if (triclinic == 0) boxlo = domain->boxlo;
   else {
     boxlo = domain->boxlo_lamda;
     domain->x2lamda(atom->nlocal);
   }
 
   // extend size of per-atom arrays if necessary
 
   if (atom->nlocal > nmax) {
     memory->destroy(part2grid);
     nmax = atom->nmax;
     memory->create(part2grid,nmax,3,"pppm:part2grid");
   }
 
   // find grid points for all my particles
   // map my particle charge onto my local 3d density grid
 
   particle_map();
   make_rho();
 
   // all procs communicate density values from their ghost cells
   //   to fully sum contribution in their 3d bricks
   // remap from 3d decomposition to FFT decomposition
 
   cg->reverse_comm(this,REVERSE_RHO);
   brick2fft();
 
   // compute potential gradient on my FFT grid and
   //   portion of e_long on this proc's FFT grid
   // return gradients (electric fields) in 3d brick decomposition
   // also performs per-atom calculations via poisson_peratom()
 
   poisson();
 
   // all procs communicate E-field values
   // to fill ghost cells surrounding their 3d bricks
 
   if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD);
   else cg->forward_comm(this,FORWARD_IK);
 
   // extra per-atom energy/virial communication
 
   if (evflag_atom) {
     if (differentiation_flag == 1 && vflag_atom) 
       cg_peratom->forward_comm(this,FORWARD_AD_PERATOM);
     else if (differentiation_flag == 0)
       cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
   }
 
   // calculate the force on my particles
 
   fieldforce();
 
   // extra per-atom energy/virial communication
 
   if (evflag_atom) fieldforce_peratom();
 
   // sum global energy across procs and add in volume-dependent term
 
   const double qscale = qqrd2e * scale;
 
   if (eflag_global) {
     double energy_all;
     MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
     energy = energy_all;
 
     energy *= 0.5*volume;
     energy -= g_ewald*qsqsum/MY_PIS +
       MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
     energy *= qscale;
   }
 
   // sum global virial across procs
 
   if (vflag_global) {
     double virial_all[6];
     MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
     for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
   }
 
   // per-atom energy/virial
   // energy includes self-energy correction
   // notal accounts for TIP4P tallying eatom/vatom for ghost atoms
 
   if (evflag_atom) {
     double *q = atom->q;
     int nlocal = atom->nlocal;
     int ntotal = nlocal;
     if (tip4pflag) ntotal += atom->nghost;
 
     if (eflag_atom) {
       for (i = 0; i < nlocal; i++) {
         eatom[i] *= 0.5;
         eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
           (g_ewald*g_ewald*volume);
         eatom[i] *= qscale;
       }
       for (i = nlocal; i < ntotal; i++) eatom[i] *= 0.5*qscale;
     }
 
     if (vflag_atom) {
       for (i = 0; i < ntotal; i++)
         for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale;
     }
   }
 
   // 2d slab correction
 
   if (slabflag == 1) slabcorr();
 
   // convert atoms back from lamda to box coords
 
   if (triclinic) domain->lamda2x(atom->nlocal);
 }
 
 /* ----------------------------------------------------------------------
    allocate memory that depends on # of K-vectors and order
 ------------------------------------------------------------------------- */
 
 void PPPM::allocate()
 {
   memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
                           nxlo_out,nxhi_out,"pppm:density_brick");
 
   memory->create(density_fft,nfft_both,"pppm:density_fft");
   memory->create(greensfn,nfft_both,"pppm:greensfn");
   memory->create(work1,2*nfft_both,"pppm:work1");
   memory->create(work2,2*nfft_both,"pppm:work2");
   memory->create(vg,nfft_both,6,"pppm:vg");
 
   if (triclinic == 0) {
     memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm:fkx");
     memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm:fky");
     memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm:fkz");
   } else {
     memory->create(fkx,nfft_both,"pppm:fkx");
     memory->create(fky,nfft_both,"pppm:fky");
     memory->create(fkz,nfft_both,"pppm:fkz");
   }
 
   if (differentiation_flag == 1) {
     memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
                           nxlo_out,nxhi_out,"pppm:u_brick");
 
     memory->create(sf_precoeff1,nfft_both,"pppm:sf_precoeff1");
     memory->create(sf_precoeff2,nfft_both,"pppm:sf_precoeff2");
     memory->create(sf_precoeff3,nfft_both,"pppm:sf_precoeff3");
     memory->create(sf_precoeff4,nfft_both,"pppm:sf_precoeff4");
     memory->create(sf_precoeff5,nfft_both,"pppm:sf_precoeff5");
     memory->create(sf_precoeff6,nfft_both,"pppm:sf_precoeff6");
 
   } else {
     memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
                             nxlo_out,nxhi_out,"pppm:vdx_brick");
     memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
                             nxlo_out,nxhi_out,"pppm:vdy_brick");
     memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
                             nxlo_out,nxhi_out,"pppm:vdz_brick");
   }
 
   // summation coeffs
 
   order_allocated = order;
   if (!stagger_flag) memory->create(gf_b,order,"pppm:gf_b");
   memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d");
   memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm:drho1d");
   memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff");
   memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2,
                           "pppm:drho_coeff");
 
   // create 2 FFTs and a Remap
   // 1st FFT keeps data in FFT decompostion
   // 2nd FFT returns data in 3d brick decomposition
   // remap takes data from 3d brick to FFT decomposition
 
   int tmp;
 
   fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
                    nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
                    nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
                    0,0,&tmp,collective_flag);
 
   fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
                    nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
                    nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                    0,0,&tmp,collective_flag);
 
   remap = new Remap(lmp,world,
                     nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                     nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
                     1,0,0,FFT_PRECISION,collective_flag);
 
   // create ghost grid object for rho and electric field communication
 
   int (*procneigh)[2] = comm->procneigh;
 
   if (differentiation_flag == 1)
     cg = new GridComm(lmp,world,1,1,
                       nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                       nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                       procneigh[0][0],procneigh[0][1],procneigh[1][0],
                       procneigh[1][1],procneigh[2][0],procneigh[2][1]);
   else
     cg = new GridComm(lmp,world,3,1,
                       nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                       nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                       procneigh[0][0],procneigh[0][1],procneigh[1][0],
                       procneigh[1][1],procneigh[2][0],procneigh[2][1]);
 }
 
 /* ----------------------------------------------------------------------
    deallocate memory that depends on # of K-vectors and order
 ------------------------------------------------------------------------- */
 
 void PPPM::deallocate()
 {
   memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out);
 
   if (differentiation_flag == 1) {
     memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out);
     memory->destroy(sf_precoeff1);
     memory->destroy(sf_precoeff2);
     memory->destroy(sf_precoeff3);
     memory->destroy(sf_precoeff4);
     memory->destroy(sf_precoeff5);
     memory->destroy(sf_precoeff6);
   } else {
     memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out);
     memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out);
     memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out);
   }
 
   memory->destroy(density_fft);
   memory->destroy(greensfn);
   memory->destroy(work1);
   memory->destroy(work2);
   memory->destroy(vg);
 
   if (triclinic == 0) {
     memory->destroy1d_offset(fkx,nxlo_fft);
     memory->destroy1d_offset(fky,nylo_fft);
     memory->destroy1d_offset(fkz,nzlo_fft);
   } else {
     memory->destroy(fkx);
     memory->destroy(fky);
     memory->destroy(fkz);
   }
 
   memory->destroy(gf_b);
   if (stagger_flag) gf_b = NULL;
   memory->destroy2d_offset(rho1d,-order_allocated/2);
   memory->destroy2d_offset(drho1d,-order_allocated/2);
   memory->destroy2d_offset(rho_coeff,(1-order_allocated)/2);
   memory->destroy2d_offset(drho_coeff,(1-order_allocated)/2);
 
   delete fft1;
   delete fft2;
   delete remap;
   delete cg;
 }
 
 /* ----------------------------------------------------------------------
    allocate per-atom memory that depends on # of K-vectors and order
 ------------------------------------------------------------------------- */
 
 void PPPM::allocate_peratom()
 {
   peratom_allocate_flag = 1;
 
   if (differentiation_flag != 1)
     memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
                             nxlo_out,nxhi_out,"pppm:u_brick");
 
   memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
                           nxlo_out,nxhi_out,"pppm:v0_brick");
 
   memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
                           nxlo_out,nxhi_out,"pppm:v1_brick");
   memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
                           nxlo_out,nxhi_out,"pppm:v2_brick");
   memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
                           nxlo_out,nxhi_out,"pppm:v3_brick");
   memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
                           nxlo_out,nxhi_out,"pppm:v4_brick");
   memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
                           nxlo_out,nxhi_out,"pppm:v5_brick");
 
   // create ghost grid object for rho and electric field communication
 
   int (*procneigh)[2] = comm->procneigh;
 
   if (differentiation_flag == 1)
     cg_peratom =
       new GridComm(lmp,world,6,1,
                    nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                    nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                    procneigh[0][0],procneigh[0][1],procneigh[1][0],
                    procneigh[1][1],procneigh[2][0],procneigh[2][1]);
   else
     cg_peratom =
       new GridComm(lmp,world,7,1,
                    nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                    nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                    procneigh[0][0],procneigh[0][1],procneigh[1][0],
                    procneigh[1][1],procneigh[2][0],procneigh[2][1]);
 }
 
 /* ----------------------------------------------------------------------
    deallocate per-atom memory that depends on # of K-vectors and order
 ------------------------------------------------------------------------- */
 
 void PPPM::deallocate_peratom()
 {
   peratom_allocate_flag = 0;
 
   memory->destroy3d_offset(v0_brick,nzlo_out,nylo_out,nxlo_out);
   memory->destroy3d_offset(v1_brick,nzlo_out,nylo_out,nxlo_out);
   memory->destroy3d_offset(v2_brick,nzlo_out,nylo_out,nxlo_out);
   memory->destroy3d_offset(v3_brick,nzlo_out,nylo_out,nxlo_out);
   memory->destroy3d_offset(v4_brick,nzlo_out,nylo_out,nxlo_out);
   memory->destroy3d_offset(v5_brick,nzlo_out,nylo_out,nxlo_out);
 
   if (differentiation_flag != 1)
     memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out);
 
   delete cg_peratom;
 }
 
 /* ----------------------------------------------------------------------
    set global size of PPPM grid = nx,ny,nz_pppm
    used for charge accumulation, FFTs, and electric field interpolation
 ------------------------------------------------------------------------- */
 
 void PPPM::set_grid_global()
 {
   // use xprd,yprd,zprd (even if triclinic, and then scale later)
   // adjust z dimension for 2d slab PPPM
   // 3d PPPM just uses zprd since slab_volfactor = 1.0
 
   double xprd = domain->xprd;
   double yprd = domain->yprd;
   double zprd = domain->zprd;
   double zprd_slab = zprd*slab_volfactor;
 
   // make initial g_ewald estimate
   // based on desired accuracy and real space cutoff
   // fluid-occupied volume used to estimate real-space error
   // zprd used rather than zprd_slab
 
   double h;
   bigint natoms = atom->natoms;
 
   if (!gewaldflag) {
     if (accuracy <= 0.0)
       error->all(FLERR,"KSpace accuracy must be > 0");
     g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2);
     if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff;
     else g_ewald = sqrt(-log(g_ewald)) / cutoff;
   }
 
   // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy
   // nz_pppm uses extended zprd_slab instead of zprd
   // reduce it until accuracy target is met
 
   if (!gridflag) {
 
     if (differentiation_flag == 1 || stagger_flag) {
 
       h = h_x = h_y = h_z = 4.0/g_ewald;
       int count = 0;
       while (1) {
 
         // set grid dimension
         nx_pppm = static_cast<int> (xprd/h_x);
         ny_pppm = static_cast<int> (yprd/h_y);
         nz_pppm = static_cast<int> (zprd_slab/h_z);
 
         if (nx_pppm <= 1) nx_pppm = 2;
         if (ny_pppm <= 1) ny_pppm = 2;
         if (nz_pppm <= 1) nz_pppm = 2;
 
         //set local grid dimension
         int npey_fft,npez_fft;
         if (nz_pppm >= nprocs) {
           npey_fft = 1;
           npez_fft = nprocs;
         } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
 
         int me_y = me % npey_fft;
         int me_z = me / npey_fft;
 
         nxlo_fft = 0;
         nxhi_fft = nx_pppm - 1;
         nylo_fft = me_y*ny_pppm/npey_fft;
         nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
         nzlo_fft = me_z*nz_pppm/npez_fft;
         nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
 
         double df_kspace = compute_df_kspace();
 
         count++;
 
         // break loop if the accuracy has been reached or
         // too many loops have been performed
 
         if (df_kspace <= accuracy) break;
         if (count > 500) error->all(FLERR, "Could not compute grid size");
         h *= 0.95;
         h_x = h_y = h_z = h;
       }
 
     } else {
 
       double err;
       h_x = h_y = h_z = 1.0/g_ewald;
 
       nx_pppm = static_cast<int> (xprd/h_x) + 1;
       ny_pppm = static_cast<int> (yprd/h_y) + 1;
       nz_pppm = static_cast<int> (zprd_slab/h_z) + 1;
 
       err = estimate_ik_error(h_x,xprd,natoms);
       while (err > accuracy) {
         err = estimate_ik_error(h_x,xprd,natoms);
         nx_pppm++;
         h_x = xprd/nx_pppm;
       }
 
       err = estimate_ik_error(h_y,yprd,natoms);
       while (err > accuracy) {
         err = estimate_ik_error(h_y,yprd,natoms);
         ny_pppm++;
         h_y = yprd/ny_pppm;
       }
 
       err = estimate_ik_error(h_z,zprd_slab,natoms);
       while (err > accuracy) {
         err = estimate_ik_error(h_z,zprd_slab,natoms);
         nz_pppm++;
         h_z = zprd_slab/nz_pppm;
       }
     }
 
     // scale grid for triclinic skew
     
     if (triclinic) {
       double tmp[3];
       tmp[0] = nx_pppm/xprd;
       tmp[1] = ny_pppm/yprd;
       tmp[2] = nz_pppm/zprd;
       lamda2xT(&tmp[0],&tmp[0]);
       nx_pppm = static_cast<int>(tmp[0]) + 1;
       ny_pppm = static_cast<int>(tmp[1]) + 1;
       nz_pppm = static_cast<int>(tmp[2]) + 1;
     }
   }
 
   // boost grid size until it is factorable
 
   while (!factorable(nx_pppm)) nx_pppm++;
   while (!factorable(ny_pppm)) ny_pppm++;
   while (!factorable(nz_pppm)) nz_pppm++;
 
   if (triclinic == 0) {
     h_x = xprd/nx_pppm;
     h_y = yprd/ny_pppm;
     h_z = zprd_slab/nz_pppm;
   } else {
     double tmp[3];
     tmp[0] = nx_pppm;
     tmp[1] = ny_pppm;
     tmp[2] = nz_pppm;
     x2lamdaT(&tmp[0],&tmp[0]);
     h_x = 1.0/tmp[0];
     h_y = 1.0/tmp[1];
     h_z = 1.0/tmp[2];
   }
 
   if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET)
     error->all(FLERR,"PPPM grid is too large");
 }
 
 /* ----------------------------------------------------------------------
    check if all factors of n are in list of factors
    return 1 if yes, 0 if no
 ------------------------------------------------------------------------- */
 
 int PPPM::factorable(int n)
 {
   int i;
 
   while (n > 1) {
     for (i = 0; i < nfactors; i++) {
       if (n % factors[i] == 0) {
         n /= factors[i];
         break;
       }
     }
     if (i == nfactors) return 0;
   }
 
   return 1;
 }
 
 /* ----------------------------------------------------------------------
    compute estimated kspace force error
 ------------------------------------------------------------------------- */
 
 double PPPM::compute_df_kspace()
 {
   double xprd = domain->xprd;
   double yprd = domain->yprd;
   double zprd = domain->zprd;
   double zprd_slab = zprd*slab_volfactor;
   bigint natoms = atom->natoms;
   double df_kspace = 0.0;
   if (differentiation_flag == 1 || stagger_flag) {
     double qopt = compute_qopt();
     df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);
   } else {
     double lprx = estimate_ik_error(h_x,xprd,natoms);
     double lpry = estimate_ik_error(h_y,yprd,natoms);
     double lprz = estimate_ik_error(h_z,zprd_slab,natoms);
     df_kspace = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0);
   }
   return df_kspace;
 }
 
 /* ----------------------------------------------------------------------
    compute qopt
 ------------------------------------------------------------------------- */
 
 double PPPM::compute_qopt()
 {
   double qopt = 0.0;
   double *prd = domain->prd;
   
   const double xprd = prd[0];
   const double yprd = prd[1];
   const double zprd = prd[2];
   const double zprd_slab = zprd*slab_volfactor;
   volume = xprd * yprd * zprd_slab;
 
   const double unitkx = (MY_2PI/xprd);
   const double unitky = (MY_2PI/yprd);
   const double unitkz = (MY_2PI/zprd_slab);
 
   double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
   double u1, u2, sqk;
   double sum1,sum2,sum3,sum4,dot2;
 
   int k,l,m,nx,ny,nz;
   const int twoorder = 2*order;
 
   for (m = nzlo_fft; m <= nzhi_fft; m++) {
     const int mper = m - nz_pppm*(2*m/nz_pppm);
 
     for (l = nylo_fft; l <= nyhi_fft; l++) {
       const int lper = l - ny_pppm*(2*l/ny_pppm);
 
       for (k = nxlo_fft; k <= nxhi_fft; k++) {
         const int kper = k - nx_pppm*(2*k/nx_pppm);
 
         sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
 
         if (sqk != 0.0) {
 
           sum1 = 0.0;
           sum2 = 0.0;
           sum3 = 0.0;
           sum4 = 0.0;
           for (nx = -2; nx <= 2; nx++) {
             qx = unitkx*(kper+nx_pppm*nx);
             sx = exp(-0.25*square(qx/g_ewald));
             argx = 0.5*qx*xprd/nx_pppm;
             wx = powsinxx(argx,twoorder);
             qx *= qx;
 
             for (ny = -2; ny <= 2; ny++) {
               qy = unitky*(lper+ny_pppm*ny);
               sy = exp(-0.25*square(qy/g_ewald));
               argy = 0.5*qy*yprd/ny_pppm;
               wy = powsinxx(argy,twoorder);
               qy *= qy;
 
               for (nz = -2; nz <= 2; nz++) {
                 qz = unitkz*(mper+nz_pppm*nz);
                 sz = exp(-0.25*square(qz/g_ewald));
                 argz = 0.5*qz*zprd_slab/nz_pppm;
                 wz = powsinxx(argz,twoorder);
                 qz *= qz;
 
                 dot2 = qx+qy+qz;
                 u1   = sx*sy*sz;
                 u2   = wx*wy*wz;
                 sum1 += u1*u1/dot2*MY_4PI*MY_4PI;
                 sum2 += u1 * u2 * MY_4PI;
                 sum3 += u2;
                 sum4 += dot2*u2;
               }
             }
           }
           sum2 *= sum2;
           qopt += sum1 - sum2/(sum3*sum4);
         }
       }
     }
   }
   double qopt_all;
   MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
   return qopt_all;
 }
 
 /* ----------------------------------------------------------------------
    estimate kspace force error for ik method
 ------------------------------------------------------------------------- */
 
 double PPPM::estimate_ik_error(double h, double prd, bigint natoms)
 {
   double sum = 0.0;
   for (int m = 0; m < order; m++)
     sum += acons[order][m] * pow(h*g_ewald,2.0*m);
   double value = q2 * pow(h*g_ewald,(double)order) *
     sqrt(g_ewald*prd*sqrt(MY_2PI)*sum/natoms) / (prd*prd);
 
   return value;
 }
 
 /* ----------------------------------------------------------------------
    adjust the g_ewald parameter to near its optimal value
    using a Newton-Raphson solver
 ------------------------------------------------------------------------- */
 
 void PPPM::adjust_gewald()
 {
   double dx;
 
   for (int i = 0; i < LARGE; i++) {
     dx = newton_raphson_f() / derivf();
     g_ewald -= dx;
     if (fabs(newton_raphson_f()) < SMALL) return;
   }
 
   char str[128];
   sprintf(str, "Could not compute g_ewald");
   error->all(FLERR, str);
 }
 
 /* ----------------------------------------------------------------------
    calculate f(x) using Newton-Raphson solver
 ------------------------------------------------------------------------- */
 
 double PPPM::newton_raphson_f()
 {
   double xprd = domain->xprd;
   double yprd = domain->yprd;
   double zprd = domain->zprd;
   bigint natoms = atom->natoms;
 
   double df_rspace = 2.0*q2*exp(-g_ewald*g_ewald*cutoff*cutoff) /
        sqrt(natoms*cutoff*xprd*yprd*zprd);
 
   double df_kspace = compute_df_kspace();
 
   return df_rspace - df_kspace;
 }
 
 /* ----------------------------------------------------------------------
    calculate numerical derivative f'(x) using forward difference
    [f(x + h) - f(x)] / h
 ------------------------------------------------------------------------- */
 
 double PPPM::derivf()
 {
   double h = 0.000001;  //Derivative step-size
   double df,f1,f2,g_ewald_old;
 
   f1 = newton_raphson_f();
   g_ewald_old = g_ewald;
   g_ewald += h;
   f2 = newton_raphson_f();
   g_ewald = g_ewald_old;
   df = (f2 - f1)/h;
 
   return df;
 }
 
 /* ----------------------------------------------------------------------
    calculate the final estimate of the accuracy
 ------------------------------------------------------------------------- */
 
 double PPPM::final_accuracy()
 {
   double xprd = domain->xprd;
   double yprd = domain->yprd;
   double zprd = domain->zprd;
   bigint natoms = atom->natoms;
 
   double df_kspace = compute_df_kspace();
   double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd);
   double df_rspace = 2.0 * q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff);
   double df_table = estimate_table_accuracy(q2_over_sqrt,df_rspace);
   double estimated_accuracy = sqrt(df_kspace*df_kspace + df_rspace*df_rspace +
                                    df_table*df_table);
 
   return estimated_accuracy;
 }
 
 /* ----------------------------------------------------------------------
    set local subset of PPPM/FFT grid that I own
    n xyz lo/hi in = 3d brick that I own (inclusive)
    n xyz lo/hi out = 3d brick + ghost cells in 6 directions (inclusive)
    n xyz lo/hi fft = FFT columns that I own (all of x dim, 2d decomp in yz)
 ------------------------------------------------------------------------- */
 
 void PPPM::set_grid_local()
 {
   // global indices of PPPM grid range from 0 to N-1
   // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
   //   global PPPM grid that I own without ghost cells
   // for slab PPPM, assign z grid as if it were not extended
 
   nxlo_in = static_cast<int> (comm->xsplit[comm->myloc[0]] * nx_pppm);
   nxhi_in = static_cast<int> (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1;
 
   nylo_in = static_cast<int> (comm->ysplit[comm->myloc[1]] * ny_pppm);
   nyhi_in = static_cast<int> (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1;
 
   nzlo_in = static_cast<int>
       (comm->zsplit[comm->myloc[2]] * nz_pppm/slab_volfactor);
   nzhi_in = static_cast<int>
       (comm->zsplit[comm->myloc[2]+1] * nz_pppm/slab_volfactor) - 1;
 
   // nlower,nupper = stencil size for mapping particles to PPPM grid
 
   nlower = -(order-1)/2;
   nupper = order/2;
 
   // shift values for particle <-> grid mapping
   // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
 
   if (order % 2) shift = OFFSET + 0.5;
   else shift = OFFSET;
   if (order % 2) shiftone = 0.0;
   else shiftone = 0.5;
 
   // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of
   //   global PPPM grid that my particles can contribute charge to
   // effectively nlo_in,nhi_in + ghost cells
   // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest
   //           position a particle in my box can be at
   // dist[3] = particle position bound = subbox + skin/2.0 + qdist
   //   qdist = offset due to TIP4P fictitious charge
   //   convert to triclinic if necessary
   // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping
   // for slab PPPM, assign z grid as if it were not extended
 
   double *prd,*sublo,*subhi;
 
   if (triclinic == 0) {
     prd = domain->prd;
     boxlo = domain->boxlo;
     sublo = domain->sublo;
     subhi = domain->subhi;
   } else {
     prd = domain->prd_lamda;
     boxlo = domain->boxlo_lamda;
     sublo = domain->sublo_lamda;
     subhi = domain->subhi_lamda;
   }
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
 
   double dist[3];
   double cuthalf = 0.5*neighbor->skin + qdist;
   if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf;
   else kspacebbox(cuthalf,&dist[0]);
 
   int nlo,nhi;
 
   nlo = static_cast<int> ((sublo[0]-dist[0]-boxlo[0]) *
                             nx_pppm/xprd + shift) - OFFSET;
   nhi = static_cast<int> ((subhi[0]+dist[0]-boxlo[0]) *
                             nx_pppm/xprd + shift) - OFFSET;
   nxlo_out = nlo + nlower;
   nxhi_out = nhi + nupper;
 
   nlo = static_cast<int> ((sublo[1]-dist[1]-boxlo[1]) *
                             ny_pppm/yprd + shift) - OFFSET;
   nhi = static_cast<int> ((subhi[1]+dist[1]-boxlo[1]) *
                             ny_pppm/yprd + shift) - OFFSET;
   nylo_out = nlo + nlower;
   nyhi_out = nhi + nupper;
 
   nlo = static_cast<int> ((sublo[2]-dist[2]-boxlo[2]) *
                             nz_pppm/zprd_slab + shift) - OFFSET;
   nhi = static_cast<int> ((subhi[2]+dist[2]-boxlo[2]) *
                             nz_pppm/zprd_slab + shift) - OFFSET;
   nzlo_out = nlo + nlower;
   nzhi_out = nhi + nupper;
 
   if (stagger_flag) {
     nxhi_out++;
     nyhi_out++;
     nzhi_out++;
   }
 
   // for slab PPPM, change the grid boundary for processors at +z end
   //   to include the empty volume between periodically repeating slabs
   // for slab PPPM, want charge data communicated from -z proc to +z proc,
   //   but not vice versa, also want field data communicated from +z proc to
   //   -z proc, but not vice versa
   // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells)
   // also insure no other procs use ghost cells beyond +z limit
 
   if (slabflag == 1) {
     if (comm->myloc[2] == comm->procgrid[2]-1)
       nzhi_in = nzhi_out = nz_pppm - 1;
     nzhi_out = MIN(nzhi_out,nz_pppm-1);
   }
     
   // decomposition of FFT mesh
   // global indices range from 0 to N-1
   // proc owns entire x-dimension, clumps of columns in y,z dimensions
   // npey_fft,npez_fft = # of procs in y,z dims
   // if nprocs is small enough, proc can own 1 or more entire xy planes,
   //   else proc owns 2d sub-blocks of yz plane
   // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions
   // nlo_fft,nhi_fft = lower/upper limit of the section
   //   of the global FFT mesh that I own
 
   int npey_fft,npez_fft;
   if (nz_pppm >= nprocs) {
     npey_fft = 1;
     npez_fft = nprocs;
   } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
 
   int me_y = me % npey_fft;
   int me_z = me / npey_fft;
 
   nxlo_fft = 0;
   nxhi_fft = nx_pppm - 1;
   nylo_fft = me_y*ny_pppm/npey_fft;
   nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
   nzlo_fft = me_z*nz_pppm/npez_fft;
   nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
 
   // PPPM grid pts owned by this proc, including ghosts
 
   ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
     (nzhi_out-nzlo_out+1);
 
   // FFT grids owned by this proc, without ghosts
   // nfft = FFT points in FFT decomposition on this proc
   // nfft_brick = FFT points in 3d brick-decomposition on this proc
   // nfft_both = greater of 2 values
 
   nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) *
     (nzhi_fft-nzlo_fft+1);
   int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) *
     (nzhi_in-nzlo_in+1);
   nfft_both = MAX(nfft,nfft_brick);
 }
 
 /* ----------------------------------------------------------------------
    pre-compute Green's function denominator expansion coeffs, Gamma(2n)
 ------------------------------------------------------------------------- */
 
 void PPPM::compute_gf_denom()
 {
   int k,l,m;
 
   for (l = 1; l < order; l++) gf_b[l] = 0.0;
   gf_b[0] = 1.0;
 
   for (m = 1; m < order; m++) {
     for (l = m; l > 0; l--)
       gf_b[l] = 4.0 * (gf_b[l]*(l-m)*(l-m-0.5)-gf_b[l-1]*(l-m-1)*(l-m-1));
     gf_b[0] = 4.0 * (gf_b[0]*(l-m)*(l-m-0.5));
   }
 
   bigint ifact = 1;
   for (k = 1; k < 2*order; k++) ifact *= k;
   double gaminv = 1.0/ifact;
   for (l = 0; l < order; l++) gf_b[l] *= gaminv;
 }
 
 /* ----------------------------------------------------------------------
    pre-compute modified (Hockney-Eastwood) Coulomb Green's function
 ------------------------------------------------------------------------- */
 
 void PPPM::compute_gf_ik()
 {
   const double * const prd = domain->prd;
 
   const double xprd = prd[0];
   const double yprd = prd[1];
   const double zprd = prd[2];
   const double zprd_slab = zprd*slab_volfactor;
   const double unitkx = (MY_2PI/xprd);
   const double unitky = (MY_2PI/yprd);
   const double unitkz = (MY_2PI/zprd_slab);
 
   double snx,sny,snz;
   double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
   double sum1,dot1,dot2;
   double numerator,denominator;
   double sqk;
 
   int k,l,m,n,nx,ny,nz,kper,lper,mper;
 
   const int nbx = static_cast<int> ((g_ewald*xprd/(MY_PI*nx_pppm)) *
                                     pow(-log(EPS_HOC),0.25));
   const int nby = static_cast<int> ((g_ewald*yprd/(MY_PI*ny_pppm)) *
                                     pow(-log(EPS_HOC),0.25));
   const int nbz = static_cast<int> ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) *
                                     pow(-log(EPS_HOC),0.25));
   const int twoorder = 2*order;
 
   n = 0;
   for (m = nzlo_fft; m <= nzhi_fft; m++) {
     mper = m - nz_pppm*(2*m/nz_pppm);
     snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm));
 
     for (l = nylo_fft; l <= nyhi_fft; l++) {
       lper = l - ny_pppm*(2*l/ny_pppm);
       sny = square(sin(0.5*unitky*lper*yprd/ny_pppm));
 
       for (k = nxlo_fft; k <= nxhi_fft; k++) {
         kper = k - nx_pppm*(2*k/nx_pppm);
         snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm));
 
         sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
 
         if (sqk != 0.0) {
           numerator = 12.5663706/sqk;
           denominator = gf_denom(snx,sny,snz);
           sum1 = 0.0;
 
           for (nx = -nbx; nx <= nbx; nx++) {
             qx = unitkx*(kper+nx_pppm*nx);
             sx = exp(-0.25*square(qx/g_ewald));
             argx = 0.5*qx*xprd/nx_pppm;
             wx = powsinxx(argx,twoorder);
 
             for (ny = -nby; ny <= nby; ny++) {
               qy = unitky*(lper+ny_pppm*ny);
               sy = exp(-0.25*square(qy/g_ewald));
               argy = 0.5*qy*yprd/ny_pppm;
               wy = powsinxx(argy,twoorder);
 
               for (nz = -nbz; nz <= nbz; nz++) {
                 qz = unitkz*(mper+nz_pppm*nz);
                 sz = exp(-0.25*square(qz/g_ewald));
                 argz = 0.5*qz*zprd_slab/nz_pppm;
                 wz = powsinxx(argz,twoorder);
 
                 dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
                 dot2 = qx*qx+qy*qy+qz*qz;
                 sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz;
               }
             }
           }
           greensfn[n++] = numerator*sum1/denominator;
         } else greensfn[n++] = 0.0;
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    pre-compute modified (Hockney-Eastwood) Coulomb Green's function
    for a triclinic system
 ------------------------------------------------------------------------- */
 
 void PPPM::compute_gf_ik_triclinic()
 {
   double snx,sny,snz;
   double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
   double sum1,dot1,dot2;
   double numerator,denominator;
   double sqk;
 
   int k,l,m,n,nx,ny,nz,kper,lper,mper;
 
   double tmp[3];
   tmp[0] = (g_ewald/(MY_PI*nx_pppm)) * pow(-log(EPS_HOC),0.25);
   tmp[1] = (g_ewald/(MY_PI*ny_pppm)) * pow(-log(EPS_HOC),0.25);
   tmp[2] = (g_ewald/(MY_PI*nz_pppm)) * pow(-log(EPS_HOC),0.25);
   lamda2xT(&tmp[0],&tmp[0]);
   const int nbx = static_cast<int> (tmp[0]);
   const int nby = static_cast<int> (tmp[1]);
   const int nbz = static_cast<int> (tmp[2]);
 
   const int twoorder = 2*order;
 
   n = 0;
   for (m = nzlo_fft; m <= nzhi_fft; m++) {
     mper = m - nz_pppm*(2*m/nz_pppm);
     snz = square(sin(MY_PI*mper/nz_pppm));
 
     for (l = nylo_fft; l <= nyhi_fft; l++) {
       lper = l - ny_pppm*(2*l/ny_pppm);
       sny = square(sin(MY_PI*lper/ny_pppm));
 
       for (k = nxlo_fft; k <= nxhi_fft; k++) {
         kper = k - nx_pppm*(2*k/nx_pppm);
         snx = square(sin(MY_PI*kper/nx_pppm));
 
         double unitk_lamda[3];
         unitk_lamda[0] = 2.0*MY_PI*kper;
         unitk_lamda[1] = 2.0*MY_PI*lper;
         unitk_lamda[2] = 2.0*MY_PI*mper;
         x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
 
         sqk = square(unitk_lamda[0]) + square(unitk_lamda[1]) + square(unitk_lamda[2]);
 
         if (sqk != 0.0) {
           numerator = 12.5663706/sqk;
           denominator = gf_denom(snx,sny,snz);
           sum1 = 0.0;
 
           for (nx = -nbx; nx <= nbx; nx++) {
             argx = MY_PI*kper/nx_pppm + MY_PI*nx;
             wx = powsinxx(argx,twoorder);
 
             for (ny = -nby; ny <= nby; ny++) {
               argy = MY_PI*lper/ny_pppm + MY_PI*ny;
               wy = powsinxx(argy,twoorder);
 
               for (nz = -nbz; nz <= nbz; nz++) {
                 argz = MY_PI*mper/nz_pppm + MY_PI*nz;
                 wz = powsinxx(argz,twoorder);
 
                 double b[3];
                 b[0] = 2.0*MY_PI*nx_pppm*nx;
                 b[1] = 2.0*MY_PI*ny_pppm*ny;
                 b[2] = 2.0*MY_PI*nz_pppm*nz;
                 x2lamdaT(&b[0],&b[0]);
 
                 qx = unitk_lamda[0]+b[0];
                 sx = exp(-0.25*square(qx/g_ewald));
 
                 qy = unitk_lamda[1]+b[1];
                 sy = exp(-0.25*square(qy/g_ewald));
 
                 qz = unitk_lamda[2]+b[2];
                 sz = exp(-0.25*square(qz/g_ewald));
 
                 dot1 = unitk_lamda[0]*qx + unitk_lamda[1]*qy + unitk_lamda[2]*qz;
                 dot2 = qx*qx+qy*qy+qz*qz;
                 sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz;
               }
             }
           }
           greensfn[n++] = numerator*sum1/denominator;
         } else greensfn[n++] = 0.0;
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    compute optimized Green's function for energy calculation
 ------------------------------------------------------------------------- */
 
 void PPPM::compute_gf_ad()
 {
   const double * const prd = domain->prd;
 
   const double xprd = prd[0];
   const double yprd = prd[1];
   const double zprd = prd[2];
   const double zprd_slab = zprd*slab_volfactor;
   const double unitkx = (MY_2PI/xprd);
   const double unitky = (MY_2PI/yprd);
   const double unitkz = (MY_2PI/zprd_slab);
 
   double snx,sny,snz,sqk;
   double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
   double numerator,denominator;
   int k,l,m,n,kper,lper,mper;
 
   const int twoorder = 2*order;
 
   for (int i = 0; i < 6; i++) sf_coeff[i] = 0.0;
 
   n = 0;
   for (m = nzlo_fft; m <= nzhi_fft; m++) {
     mper = m - nz_pppm*(2*m/nz_pppm);
     qz = unitkz*mper;
     snz = square(sin(0.5*qz*zprd_slab/nz_pppm));
     sz = exp(-0.25*square(qz/g_ewald));
     argz = 0.5*qz*zprd_slab/nz_pppm;
     wz = powsinxx(argz,twoorder);
 
     for (l = nylo_fft; l <= nyhi_fft; l++) {
       lper = l - ny_pppm*(2*l/ny_pppm);
       qy = unitky*lper;
       sny = square(sin(0.5*qy*yprd/ny_pppm));
       sy = exp(-0.25*square(qy/g_ewald));
       argy = 0.5*qy*yprd/ny_pppm;
       wy = powsinxx(argy,twoorder);
 
       for (k = nxlo_fft; k <= nxhi_fft; k++) {
         kper = k - nx_pppm*(2*k/nx_pppm);
         qx = unitkx*kper;
         snx = square(sin(0.5*qx*xprd/nx_pppm));
         sx = exp(-0.25*square(qx/g_ewald));
         argx = 0.5*qx*xprd/nx_pppm;
         wx = powsinxx(argx,twoorder);
 
         sqk = qx*qx + qy*qy + qz*qz;
 
         if (sqk != 0.0) {
           numerator = MY_4PI/sqk;
           denominator = gf_denom(snx,sny,snz);
           greensfn[n] = numerator*sx*sy*sz*wx*wy*wz/denominator;
           sf_coeff[0] += sf_precoeff1[n]*greensfn[n];
           sf_coeff[1] += sf_precoeff2[n]*greensfn[n];
           sf_coeff[2] += sf_precoeff3[n]*greensfn[n];
           sf_coeff[3] += sf_precoeff4[n]*greensfn[n];
           sf_coeff[4] += sf_precoeff5[n]*greensfn[n];
           sf_coeff[5] += sf_precoeff6[n]*greensfn[n];
           n++;
         } else {
           greensfn[n] = 0.0;
           sf_coeff[0] += sf_precoeff1[n]*greensfn[n];
           sf_coeff[1] += sf_precoeff2[n]*greensfn[n];
           sf_coeff[2] += sf_precoeff3[n]*greensfn[n];
           sf_coeff[3] += sf_precoeff4[n]*greensfn[n];
           sf_coeff[4] += sf_precoeff5[n]*greensfn[n];
           sf_coeff[5] += sf_precoeff6[n]*greensfn[n];
           n++;
         }
       }
     }
   }
 
   // compute the coefficients for the self-force correction
 
   double prex, prey, prez;
   prex = prey = prez = MY_PI/volume;
   prex *= nx_pppm/xprd;
   prey *= ny_pppm/yprd;
   prez *= nz_pppm/zprd_slab;
   sf_coeff[0] *= prex;
   sf_coeff[1] *= prex*2;
   sf_coeff[2] *= prey;
   sf_coeff[3] *= prey*2;
   sf_coeff[4] *= prez;
   sf_coeff[5] *= prez*2;
 
   // communicate values with other procs
 
   double tmp[6];
   MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world);
   for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n];
 }
 
 /* ----------------------------------------------------------------------
    compute self force coefficients for ad-differentiation scheme
 ------------------------------------------------------------------------- */
 
 void PPPM::compute_sf_precoeff()
 {
   int i,k,l,m,n;
   int nx,ny,nz,kper,lper,mper;
   double wx0[5],wy0[5],wz0[5],wx1[5],wy1[5],wz1[5],wx2[5],wy2[5],wz2[5];
   double qx0,qy0,qz0,qx1,qy1,qz1,qx2,qy2,qz2;
   double u0,u1,u2,u3,u4,u5,u6;
   double sum1,sum2,sum3,sum4,sum5,sum6;
 
   n = 0;
   for (m = nzlo_fft; m <= nzhi_fft; m++) {
     mper = m - nz_pppm*(2*m/nz_pppm);
 
     for (l = nylo_fft; l <= nyhi_fft; l++) {
       lper = l - ny_pppm*(2*l/ny_pppm);
 
       for (k = nxlo_fft; k <= nxhi_fft; k++) {
         kper = k - nx_pppm*(2*k/nx_pppm);
 
         sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0;
         for (i = 0; i < 5; i++) {
 
           qx0 = MY_2PI*(kper+nx_pppm*(i-2));
           qx1 = MY_2PI*(kper+nx_pppm*(i-1));
           qx2 = MY_2PI*(kper+nx_pppm*(i  ));
           wx0[i] = powsinxx(0.5*qx0/nx_pppm,order);
           wx1[i] = powsinxx(0.5*qx1/nx_pppm,order);
           wx2[i] = powsinxx(0.5*qx2/nx_pppm,order);
 
           qy0 = MY_2PI*(lper+ny_pppm*(i-2));
           qy1 = MY_2PI*(lper+ny_pppm*(i-1));
           qy2 = MY_2PI*(lper+ny_pppm*(i  ));
           wy0[i] = powsinxx(0.5*qy0/ny_pppm,order);
           wy1[i] = powsinxx(0.5*qy1/ny_pppm,order);
           wy2[i] = powsinxx(0.5*qy2/ny_pppm,order);
 
           qz0 = MY_2PI*(mper+nz_pppm*(i-2));
           qz1 = MY_2PI*(mper+nz_pppm*(i-1));
           qz2 = MY_2PI*(mper+nz_pppm*(i  ));
 
           wz0[i] = powsinxx(0.5*qz0/nz_pppm,order);
           wz1[i] = powsinxx(0.5*qz1/nz_pppm,order);
           wz2[i] = powsinxx(0.5*qz2/nz_pppm,order);
         }
 
         for (nx = 0; nx < 5; nx++) {
           for (ny = 0; ny < 5; ny++) {
             for (nz = 0; nz < 5; nz++) {
               u0 = wx0[nx]*wy0[ny]*wz0[nz];
               u1 = wx1[nx]*wy0[ny]*wz0[nz];
               u2 = wx2[nx]*wy0[ny]*wz0[nz];
               u3 = wx0[nx]*wy1[ny]*wz0[nz];
               u4 = wx0[nx]*wy2[ny]*wz0[nz];
               u5 = wx0[nx]*wy0[ny]*wz1[nz];
               u6 = wx0[nx]*wy0[ny]*wz2[nz];
 
               sum1 += u0*u1;
               sum2 += u0*u2;
               sum3 += u0*u3;
               sum4 += u0*u4;
               sum5 += u0*u5;
               sum6 += u0*u6;
             }
           }
         }
 
         // store values
 
         sf_precoeff1[n] = sum1;
         sf_precoeff2[n] = sum2;
         sf_precoeff3[n] = sum3;
         sf_precoeff4[n] = sum4;
         sf_precoeff5[n] = sum5;
         sf_precoeff6[n++] = sum6;
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    find center grid pt for each of my particles
    check that full stencil for the particle will fit in my 3d brick
    store central grid pt indices in part2grid array
 ------------------------------------------------------------------------- */
 
 void PPPM::particle_map()
 {
   int nx,ny,nz;
 
   double **x = atom->x;
   int nlocal = atom->nlocal;
 
   int flag = 0;
 
   if (!isfinite(boxlo[0]) || !isfinite(boxlo[1]) || !isfinite(boxlo[2]))
     error->one(FLERR,"Non-numeric box dimensions - simulation unstable");
 
   for (int i = 0; i < nlocal; i++) {
 
     // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
     // current particle coord can be outside global and local box
     // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
 
     nx = static_cast<int> ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET;
     ny = static_cast<int> ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET;
     nz = static_cast<int> ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET;
 
     part2grid[i][0] = nx;
     part2grid[i][1] = ny;
     part2grid[i][2] = nz;
 
     // check that entire stencil around nx,ny,nz will fit in my 3d brick
 
     if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
         ny+nlower < nylo_out || ny+nupper > nyhi_out ||
         nz+nlower < nzlo_out || nz+nupper > nzhi_out)
       flag = 1;
   }
 
   if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM");
 }
 
 /* ----------------------------------------------------------------------
    create discretized "density" on section of global grid due to my particles
    density(x,y,z) = charge "density" at grid points of my 3d brick
    (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
    in global grid
 ------------------------------------------------------------------------- */
 
 void PPPM::make_rho()
 {
   int l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
 
   // clear 3d density array
 
   memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0,
          ngrid*sizeof(FFT_SCALAR));
 
   // loop over my charges, add their contribution to nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
 
   double *q = atom->q;
   double **x = atom->x;
   int nlocal = atom->nlocal;
 
   for (int i = 0; i < nlocal; i++) {
 
     nx = part2grid[i][0];
     ny = part2grid[i][1];
     nz = part2grid[i][2];
     dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
     dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
     dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
 
     compute_rho1d(dx,dy,dz);
 
     z0 = delvolinv * q[i];
     for (n = nlower; n <= nupper; n++) {
       mz = n+nz;
       y0 = z0*rho1d[2][n];
       for (m = nlower; m <= nupper; m++) {
         my = m+ny;
         x0 = y0*rho1d[1][m];
         for (l = nlower; l <= nupper; l++) {
           mx = l+nx;
           density_brick[mz][my][mx] += x0*rho1d[0][l];
         }
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    remap density from 3d brick decomposition to FFT decomposition
 ------------------------------------------------------------------------- */
 
 void PPPM::brick2fft()
 {
   int n,ix,iy,iz;
 
   // copy grabs inner portion of density from 3d brick
   // remap could be done as pre-stage of FFT,
   //   but this works optimally on only double values, not complex values
 
   n = 0;
   for (iz = nzlo_in; iz <= nzhi_in; iz++)
     for (iy = nylo_in; iy <= nyhi_in; iy++)
       for (ix = nxlo_in; ix <= nxhi_in; ix++)
         density_fft[n++] = density_brick[iz][iy][ix];
 
   remap->perform(density_fft,density_fft,work1);
 }
 
 /* ----------------------------------------------------------------------
    FFT-based Poisson solver
 ------------------------------------------------------------------------- */
 
 void PPPM::poisson()
 {
   if (differentiation_flag == 1) poisson_ad();
   else poisson_ik();
 }
 
 /* ----------------------------------------------------------------------
    FFT-based Poisson solver for ik
 ------------------------------------------------------------------------- */
 
 void PPPM::poisson_ik()
 {
   int i,j,k,n;
   double eng;
 
   // transform charge density (r -> k)
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work1[n++] = density_fft[i];
     work1[n++] = ZEROF;
   }
 
   fft1->compute(work1,work1,1);
 
   // global energy and virial contribution
 
   double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
   double s2 = scaleinv*scaleinv;
 
   if (eflag_global || vflag_global) {
     if (vflag_global) {
       n = 0;
       for (i = 0; i < nfft; i++) {
         eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
         for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j];
         if (eflag_global) energy += eng;
         n += 2;
       }
     } else {
       n = 0;
       for (i = 0; i < nfft; i++) {
         energy +=
           s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
         n += 2;
       }
     }
   }
 
   // scale by 1/total-grid-pts to get rho(k)
   // multiply by Green's function to get V(k)
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work1[n++] *= scaleinv * greensfn[i];
     work1[n++] *= scaleinv * greensfn[i];
   }
 
   // extra FFTs for per-atom energy/virial
 
   if (evflag_atom) poisson_peratom();
 
   // triclinic system
 
   if (triclinic) {
     poisson_ik_triclinic();
     return;
   }
 
   // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
   // FFT leaves data in 3d brick decomposition
   // copy it into inner portion of vdx,vdy,vdz arrays
 
   // x direction gradient
 
   n = 0;
   for (k = nzlo_fft; k <= nzhi_fft; k++)
     for (j = nylo_fft; j <= nyhi_fft; j++)
       for (i = nxlo_fft; i <= nxhi_fft; i++) {
         work2[n] = fkx[i]*work1[n+1];
         work2[n+1] = -fkx[i]*work1[n];
         n += 2;
       }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in; i <= nxhi_in; i++) {
         vdx_brick[k][j][i] = work2[n];
         n += 2;
       }
 
   // y direction gradient
 
   n = 0;
   for (k = nzlo_fft; k <= nzhi_fft; k++)
     for (j = nylo_fft; j <= nyhi_fft; j++)
       for (i = nxlo_fft; i <= nxhi_fft; i++) {
         work2[n] = fky[j]*work1[n+1];
         work2[n+1] = -fky[j]*work1[n];
         n += 2;
       }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in; i <= nxhi_in; i++) {
         vdy_brick[k][j][i] = work2[n];
         n += 2;
       }
 
   // z direction gradient
 
   n = 0;
   for (k = nzlo_fft; k <= nzhi_fft; k++)
     for (j = nylo_fft; j <= nyhi_fft; j++)
       for (i = nxlo_fft; i <= nxhi_fft; i++) {
         work2[n] = fkz[k]*work1[n+1];
         work2[n+1] = -fkz[k]*work1[n];
         n += 2;
       }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in; i <= nxhi_in; i++) {
         vdz_brick[k][j][i] = work2[n];
         n += 2;
       }
 }
 
 /* ----------------------------------------------------------------------
    FFT-based Poisson solver for ik for a triclinic system
 ------------------------------------------------------------------------- */
 
 void PPPM::poisson_ik_triclinic()
 {
   int i,j,k,n;
 
   // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
   // FFT leaves data in 3d brick decomposition
   // copy it into inner portion of vdx,vdy,vdz arrays
 
   // x direction gradient
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work2[n] = fkx[i]*work1[n+1];
     work2[n+1] = -fkx[i]*work1[n];
     n += 2;
   }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in; i <= nxhi_in; i++) {
         vdx_brick[k][j][i] = work2[n];
         n += 2;
       }
 
   // y direction gradient
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work2[n] = fky[i]*work1[n+1];
     work2[n+1] = -fky[i]*work1[n];
     n += 2;
   }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in; i <= nxhi_in; i++) {
         vdy_brick[k][j][i] = work2[n];
         n += 2;
       }
 
   // z direction gradient
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work2[n] = fkz[i]*work1[n+1];
     work2[n+1] = -fkz[i]*work1[n];
     n += 2;
   }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in; i <= nxhi_in; i++) {
         vdz_brick[k][j][i] = work2[n];
         n += 2;
       }
 }
 
 /* ----------------------------------------------------------------------
    FFT-based Poisson solver for ad
 ------------------------------------------------------------------------- */
 
 void PPPM::poisson_ad()
 {
   int i,j,k,n;
   double eng;
 
   // transform charge density (r -> k)
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work1[n++] = density_fft[i];
     work1[n++] = ZEROF;
   }
 
   fft1->compute(work1,work1,1);
 
   // global energy and virial contribution
 
   double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
   double s2 = scaleinv*scaleinv;
 
   if (eflag_global || vflag_global) {
     if (vflag_global) {
       n = 0;
       for (i = 0; i < nfft; i++) {
         eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
         for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j];
         if (eflag_global) energy += eng;
         n += 2;
       }
     } else {
       n = 0;
       for (i = 0; i < nfft; i++) {
         energy +=
           s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
         n += 2;
       }
     }
   }
 
   // scale by 1/total-grid-pts to get rho(k)
   // multiply by Green's function to get V(k)
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work1[n++] *= scaleinv * greensfn[i];
     work1[n++] *= scaleinv * greensfn[i];
   }
 
   // extra FFTs for per-atom energy/virial
 
   if (vflag_atom) poisson_peratom();
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work2[n] = work1[n];
     work2[n+1] = work1[n+1];
     n += 2;
   }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in; i <= nxhi_in; i++) {
         u_brick[k][j][i] = work2[n];
         n += 2;
       }
 }
 
 /* ----------------------------------------------------------------------
    FFT-based Poisson solver for per-atom energy/virial
 ------------------------------------------------------------------------- */
 
 void PPPM::poisson_peratom()
 {
   int i,j,k,n;
 
   // energy
 
   if (eflag_atom && differentiation_flag != 1) {
     n = 0;
     for (i = 0; i < nfft; i++) {
       work2[n] = work1[n];
       work2[n+1] = work1[n+1];
       n += 2;
     }
 
     fft2->compute(work2,work2,-1);
 
     n = 0;
     for (k = nzlo_in; k <= nzhi_in; k++)
       for (j = nylo_in; j <= nyhi_in; j++)
         for (i = nxlo_in; i <= nxhi_in; i++) {
           u_brick[k][j][i] = work2[n];
           n += 2;
         }
   }
 
   // 6 components of virial in v0 thru v5
 
   if (!vflag_atom) return;
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work2[n] = work1[n]*vg[i][0];
     work2[n+1] = work1[n+1]*vg[i][0];
     n += 2;
   }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in; i <= nxhi_in; i++) {
         v0_brick[k][j][i] = work2[n];
         n += 2;
       }
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work2[n] = work1[n]*vg[i][1];
     work2[n+1] = work1[n+1]*vg[i][1];
     n += 2;
   }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in; i <= nxhi_in; i++) {
         v1_brick[k][j][i] = work2[n];
         n += 2;
       }
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work2[n] = work1[n]*vg[i][2];
     work2[n+1] = work1[n+1]*vg[i][2];
     n += 2;
   }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in; i <= nxhi_in; i++) {
         v2_brick[k][j][i] = work2[n];
         n += 2;
       }
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work2[n] = work1[n]*vg[i][3];
     work2[n+1] = work1[n+1]*vg[i][3];
     n += 2;
   }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in; i <= nxhi_in; i++) {
         v3_brick[k][j][i] = work2[n];
         n += 2;
       }
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work2[n] = work1[n]*vg[i][4];
     work2[n+1] = work1[n+1]*vg[i][4];
     n += 2;
   }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in; i <= nxhi_in; i++) {
         v4_brick[k][j][i] = work2[n];
         n += 2;
       }
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work2[n] = work1[n]*vg[i][5];
     work2[n+1] = work1[n+1]*vg[i][5];
     n += 2;
   }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in; i <= nxhi_in; i++) {
         v5_brick[k][j][i] = work2[n];
         n += 2;
       }
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get electric field & force on my particles
 ------------------------------------------------------------------------- */
 
 void PPPM::fieldforce()
 {
   if (differentiation_flag == 1) fieldforce_ad();
   else fieldforce_ik();
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get electric field & force on my particles for ik
 ------------------------------------------------------------------------- */
 
 void PPPM::fieldforce_ik()
 {
   int i,l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
   FFT_SCALAR ekx,eky,ekz;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of E-field on particle
 
   double *q = atom->q;
   double **x = atom->x;
   double **f = atom->f;
 
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
     nx = part2grid[i][0];
     ny = part2grid[i][1];
     nz = part2grid[i][2];
     dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
     dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
     dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
 
     compute_rho1d(dx,dy,dz);
 
     ekx = eky = ekz = ZEROF;
     for (n = nlower; n <= nupper; n++) {
       mz = n+nz;
       z0 = rho1d[2][n];
       for (m = nlower; m <= nupper; m++) {
         my = m+ny;
         y0 = z0*rho1d[1][m];
         for (l = nlower; l <= nupper; l++) {
           mx = l+nx;
           x0 = y0*rho1d[0][l];
           ekx -= x0*vdx_brick[mz][my][mx];
           eky -= x0*vdy_brick[mz][my][mx];
           ekz -= x0*vdz_brick[mz][my][mx];
         }
       }
     }
 
     // convert E-field to force
 
     const double qfactor = qqrd2e * scale * q[i];
     f[i][0] += qfactor*ekx;
     f[i][1] += qfactor*eky;
     if (slabflag != 2) f[i][2] += qfactor*ekz;
   }
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get electric field & force on my particles for ad
 ------------------------------------------------------------------------- */
 
 void PPPM::fieldforce_ad()
 {
   int i,l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz;
   FFT_SCALAR ekx,eky,ekz;
   double s1,s2,s3;
   double sf = 0.0;
   double *prd;
 
   prd = domain->prd;
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
 
   double hx_inv = nx_pppm/xprd;
   double hy_inv = ny_pppm/yprd;
   double hz_inv = nz_pppm/zprd;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of E-field on particle
 
   double *q = atom->q;
   double **x = atom->x;
   double **f = atom->f;
 
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
     nx = part2grid[i][0];
     ny = part2grid[i][1];
     nz = part2grid[i][2];
     dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
     dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
     dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
 
     compute_rho1d(dx,dy,dz);
     compute_drho1d(dx,dy,dz);
 
     ekx = eky = ekz = ZEROF;
     for (n = nlower; n <= nupper; n++) {
       mz = n+nz;
       for (m = nlower; m <= nupper; m++) {
         my = m+ny;
         for (l = nlower; l <= nupper; l++) {
           mx = l+nx;
           ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
           eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
           ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx];
         }
       }
     }
     ekx *= hx_inv;
     eky *= hy_inv;
     ekz *= hz_inv;
 
     // convert E-field to force and substract self forces
 
     const double qfactor = qqrd2e * scale;
 
     s1 = x[i][0]*hx_inv;
     s2 = x[i][1]*hy_inv;
     s3 = x[i][2]*hz_inv;
     sf = sf_coeff[0]*sin(2*MY_PI*s1);
     sf += sf_coeff[1]*sin(4*MY_PI*s1);
     sf *= 2*q[i]*q[i];
     f[i][0] += qfactor*(ekx*q[i] - sf);
 
     sf = sf_coeff[2]*sin(2*MY_PI*s2);
     sf += sf_coeff[3]*sin(4*MY_PI*s2);
     sf *= 2*q[i]*q[i];
     f[i][1] += qfactor*(eky*q[i] - sf);
 
 
     sf = sf_coeff[4]*sin(2*MY_PI*s3);
     sf += sf_coeff[5]*sin(4*MY_PI*s3);
     sf *= 2*q[i]*q[i];
     if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf);
   }
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get per-atom energy/virial
 ------------------------------------------------------------------------- */
 
 void PPPM::fieldforce_peratom()
 {
   int i,l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
   FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
 
   // loop over my charges, interpolate from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
 
   double *q = atom->q;
   double **x = atom->x;
 
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
     nx = part2grid[i][0];
     ny = part2grid[i][1];
     nz = part2grid[i][2];
     dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
     dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
     dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
 
     compute_rho1d(dx,dy,dz);
 
     u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
     for (n = nlower; n <= nupper; n++) {
       mz = n+nz;
       z0 = rho1d[2][n];
       for (m = nlower; m <= nupper; m++) {
         my = m+ny;
         y0 = z0*rho1d[1][m];
         for (l = nlower; l <= nupper; l++) {
           mx = l+nx;
           x0 = y0*rho1d[0][l];
           if (eflag_atom) u += x0*u_brick[mz][my][mx];
           if (vflag_atom) {
             v0 += x0*v0_brick[mz][my][mx];
             v1 += x0*v1_brick[mz][my][mx];
             v2 += x0*v2_brick[mz][my][mx];
             v3 += x0*v3_brick[mz][my][mx];
             v4 += x0*v4_brick[mz][my][mx];
             v5 += x0*v5_brick[mz][my][mx];
           }
         }
       }
     }
 
     if (eflag_atom) eatom[i] += q[i]*u;
     if (vflag_atom) {
       vatom[i][0] += q[i]*v0;
       vatom[i][1] += q[i]*v1;
       vatom[i][2] += q[i]*v2;
       vatom[i][3] += q[i]*v3;
       vatom[i][4] += q[i]*v4;
       vatom[i][5] += q[i]*v5;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    pack own values to buf to send to another proc
 ------------------------------------------------------------------------- */
 
 void PPPM::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
 {
   int n = 0;
 
   if (flag == FORWARD_IK) {
     FFT_SCALAR *xsrc = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *ysrc = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *zsrc = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       buf[n++] = xsrc[list[i]];
       buf[n++] = ysrc[list[i]];
       buf[n++] = zsrc[list[i]];
     }
   } else if (flag == FORWARD_AD) {
     FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++)
       buf[i] = src[list[i]];
   } else if (flag == FORWARD_IK_PERATOM) {
     FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       if (eflag_atom) buf[n++] = esrc[list[i]];
       if (vflag_atom) {
         buf[n++] = v0src[list[i]];
         buf[n++] = v1src[list[i]];
         buf[n++] = v2src[list[i]];
         buf[n++] = v3src[list[i]];
         buf[n++] = v4src[list[i]];
         buf[n++] = v5src[list[i]];
       }
     }
   } else if (flag == FORWARD_AD_PERATOM) {
     FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       buf[n++] = v0src[list[i]];
       buf[n++] = v1src[list[i]];
       buf[n++] = v2src[list[i]];
       buf[n++] = v3src[list[i]];
       buf[n++] = v4src[list[i]];
       buf[n++] = v5src[list[i]];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    unpack another proc's own values from buf and set own ghost values
 ------------------------------------------------------------------------- */
 
 void PPPM::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
 {
   int n = 0;
 
   if (flag == FORWARD_IK) {
     FFT_SCALAR *xdest = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *ydest = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *zdest = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       xdest[list[i]] = buf[n++];
       ydest[list[i]] = buf[n++];
       zdest[list[i]] = buf[n++];
     }
   } else if (flag == FORWARD_AD) {
     FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++)
       dest[list[i]] = buf[i];
   } else if (flag == FORWARD_IK_PERATOM) {
     FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       if (eflag_atom) esrc[list[i]] = buf[n++];
       if (vflag_atom) {
         v0src[list[i]] = buf[n++];
         v1src[list[i]] = buf[n++];
         v2src[list[i]] = buf[n++];
         v3src[list[i]] = buf[n++];
         v4src[list[i]] = buf[n++];
         v5src[list[i]] = buf[n++];
       }
     }
   } else if (flag == FORWARD_AD_PERATOM) {
     FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       v0src[list[i]] = buf[n++];
       v1src[list[i]] = buf[n++];
       v2src[list[i]] = buf[n++];
       v3src[list[i]] = buf[n++];
       v4src[list[i]] = buf[n++];
       v5src[list[i]] = buf[n++];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    pack ghost values into buf to send to another proc
 ------------------------------------------------------------------------- */
 
 void PPPM::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
 {
   if (flag == REVERSE_RHO) {
     FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++)
       buf[i] = src[list[i]];
   }
 }
 
 /* ----------------------------------------------------------------------
    unpack another proc's ghost values from buf and add to own values
 ------------------------------------------------------------------------- */
 
 void PPPM::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
 {
   if (flag == REVERSE_RHO) {
     FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++)
       dest[list[i]] += buf[i];
   } 
 }
 
 /* ----------------------------------------------------------------------
    map nprocs to NX by NY grid as PX by PY procs - return optimal px,py
 ------------------------------------------------------------------------- */
 
 void PPPM::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py)
 {
   // loop thru all possible factorizations of nprocs
   // surf = surface area of largest proc sub-domain
   // innermost if test minimizes surface area and surface/volume ratio
 
   int bestsurf = 2 * (nx + ny);
   int bestboxx = 0;
   int bestboxy = 0;
 
   int boxx,boxy,surf,ipx,ipy;
 
   ipx = 1;
   while (ipx <= nprocs) {
     if (nprocs % ipx == 0) {
       ipy = nprocs/ipx;
       boxx = nx/ipx;
       if (nx % ipx) boxx++;
       boxy = ny/ipy;
       if (ny % ipy) boxy++;
       surf = boxx + boxy;
       if (surf < bestsurf ||
           (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) {
         bestsurf = surf;
         bestboxx = boxx;
         bestboxy = boxy;
         *px = ipx;
         *py = ipy;
       }
     }
     ipx++;
   }
 }
 
 /* ----------------------------------------------------------------------
    charge assignment into rho1d
    dx,dy,dz = distance of particle from "lower left" grid point
 ------------------------------------------------------------------------- */
 
 void PPPM::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
                          const FFT_SCALAR &dz)
 {
   int k,l;
   FFT_SCALAR r1,r2,r3;
 
   for (k = (1-order)/2; k <= order/2; k++) {
     r1 = r2 = r3 = ZEROF;
 
     for (l = order-1; l >= 0; l--) {
       r1 = rho_coeff[l][k] + r1*dx;
       r2 = rho_coeff[l][k] + r2*dy;
       r3 = rho_coeff[l][k] + r3*dz;
     }
     rho1d[0][k] = r1;
     rho1d[1][k] = r2;
     rho1d[2][k] = r3;
   }
 }
 
 /* ----------------------------------------------------------------------
    charge assignment into drho1d
    dx,dy,dz = distance of particle from "lower left" grid point
 ------------------------------------------------------------------------- */
 
 void PPPM::compute_drho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
                           const FFT_SCALAR &dz)
 {
   int k,l;
   FFT_SCALAR r1,r2,r3;
 
   for (k = (1-order)/2; k <= order/2; k++) {
     r1 = r2 = r3 = ZEROF;
 
     for (l = order-2; l >= 0; l--) {
       r1 = drho_coeff[l][k] + r1*dx;
       r2 = drho_coeff[l][k] + r2*dy;
       r3 = drho_coeff[l][k] + r3*dz;
     }
     drho1d[0][k] = r1;
     drho1d[1][k] = r2;
     drho1d[2][k] = r3;
   }
 }
 
 /* ----------------------------------------------------------------------
    generate coeffients for the weight function of order n
 
               (n-1)
   Wn(x) =     Sum    wn(k,x) , Sum is over every other integer
            k=-(n-1)
   For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1
       k is odd integers if n is even and even integers if n is odd
               ---
              | n-1
              | Sum a(l,j)*(x-k/2)**l   if abs(x-k/2) < 1/2
   wn(k,x) = <  l=0
              |
              |  0                       otherwise
               ---
   a coeffients are packed into the array rho_coeff to eliminate zeros
   rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k)
 ------------------------------------------------------------------------- */
 
 void PPPM::compute_rho_coeff()
 {
   int j,k,l,m;
   FFT_SCALAR s;
 
   FFT_SCALAR **a;
   memory->create2d_offset(a,order,-order,order,"pppm:a");
 
   for (k = -order; k <= order; k++)
     for (l = 0; l < order; l++)
       a[l][k] = 0.0;
 
   a[0][0] = 1.0;
   for (j = 1; j < order; j++) {
     for (k = -j; k <= j; k += 2) {
       s = 0.0;
       for (l = 0; l < j; l++) {
         a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1);
 #ifdef FFT_SINGLE
         s += powf(0.5,(float) l+1) *
           (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1);
 #else
         s += pow(0.5,(double) l+1) *
           (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1);
 #endif
       }
       a[0][k] = s;
     }
   }
 
   m = (1-order)/2;
   for (k = -(order-1); k < order; k += 2) {
     for (l = 0; l < order; l++)
       rho_coeff[l][m] = a[l][k];
     for (l = 1; l < order; l++)
       drho_coeff[l-1][m] = l*a[l][k];
     m++;
   }
 
   memory->destroy2d_offset(a,-order);
 }
 
 /* ----------------------------------------------------------------------
    Slab-geometry correction term to dampen inter-slab interactions between
    periodically repeating slabs.  Yields good approximation to 2D Ewald if
    adequate empty space is left between repeating slabs (J. Chem. Phys.
    111, 3155).  Slabs defined here to be parallel to the xy plane. Also
    extended to non-neutral systems (J. Chem. Phys. 131, 094107).
 ------------------------------------------------------------------------- */
 
 void PPPM::slabcorr()
 {
   // compute local contribution to global dipole moment
 
   double *q = atom->q;
   double **x = atom->x;
   double zprd = domain->zprd;
   int nlocal = atom->nlocal;
 
   double dipole = 0.0;
   for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];
 
   // sum local contributions to get global dipole moment
 
   double dipole_all;
   MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
 
   // need to make non-neutral systems and/or
   //  per-atom energy translationally invariant
 
   double dipole_r2 = 0.0;
   if (eflag_atom || fabs(qsum) > SMALL) {
     for (int i = 0; i < nlocal; i++)
       dipole_r2 += q[i]*x[i][2]*x[i][2];
 
     // sum local contributions
 
     double tmp;
     MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
     dipole_r2 = tmp;
   }
 
   // compute corrections
 
   const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
     qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
   const double qscale = qqrd2e * scale;
 
   if (eflag_global) energy += qscale * e_slabcorr;
 
   // per-atom energy
 
   if (eflag_atom) {
     double efact = qscale * MY_2PI/volume;
     for (int i = 0; i < nlocal; i++)
       eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
         qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
   }
 
   // add on force corrections
 
   double ffact = qscale * (-4.0*MY_PI/volume);
   double **f = atom->f;
 
   for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]);
 }
 
 /* ----------------------------------------------------------------------
    perform and time the 1d FFTs required for N timesteps
 ------------------------------------------------------------------------- */
 
 int PPPM::timing_1d(int n, double &time1d)
 {
   double time1,time2;
 
   for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
 
   MPI_Barrier(world);
   time1 = MPI_Wtime();
 
   for (int i = 0; i < n; i++) {
     fft1->timing1d(work1,nfft_both,1);
     fft2->timing1d(work1,nfft_both,-1);
     if (differentiation_flag != 1) {
       fft2->timing1d(work1,nfft_both,-1);
       fft2->timing1d(work1,nfft_both,-1);
     }
   }
 
   MPI_Barrier(world);
   time2 = MPI_Wtime();
   time1d = time2 - time1;
 
   if (differentiation_flag) return 2;
   return 4;
 }
 
 /* ----------------------------------------------------------------------
    perform and time the 3d FFTs required for N timesteps
 ------------------------------------------------------------------------- */
 
 int PPPM::timing_3d(int n, double &time3d)
 {
   double time1,time2;
 
   for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
 
   MPI_Barrier(world);
   time1 = MPI_Wtime();
 
   for (int i = 0; i < n; i++) {
     fft1->compute(work1,work1,1);
     fft2->compute(work1,work1,-1);
     if (differentiation_flag != 1) {
       fft2->compute(work1,work1,-1);
       fft2->compute(work1,work1,-1);
     }
   }
 
   MPI_Barrier(world);
   time2 = MPI_Wtime();
   time3d = time2 - time1;
 
   if (differentiation_flag) return 2;
   return 4;
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local arrays
 ------------------------------------------------------------------------- */
 
 double PPPM::memory_usage()
 {
   double bytes = nmax*3 * sizeof(double);
   int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
     (nzhi_out-nzlo_out+1);
   if (differentiation_flag == 1) {
     bytes += 2 * nbrick * sizeof(FFT_SCALAR);
   } else {
     bytes += 4 * nbrick * sizeof(FFT_SCALAR);
   }
   if (triclinic) bytes += 3 * nfft_both * sizeof(double);
   bytes += 6 * nfft_both * sizeof(double);
   bytes += nfft_both * sizeof(double);
   bytes += nfft_both*5 * sizeof(FFT_SCALAR);
 
   if (peratom_allocate_flag)
     bytes += 6 * nbrick * sizeof(FFT_SCALAR);
 
   if (group_allocate_flag) {
     bytes += 2 * nbrick * sizeof(FFT_SCALAR);
     bytes += 2 * nfft_both * sizeof(FFT_SCALAR);;
   }
 
   bytes += cg->memory_usage();
 
   return bytes;
 }
 
 /* ----------------------------------------------------------------------
    group-group interactions
  ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    compute the PPPM total long-range force and energy for groups A and B
  ------------------------------------------------------------------------- */
 
 void PPPM::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag)
 {
   if (slabflag && triclinic)
     error->all(FLERR,"Cannot (yet) use K-space slab "
                "correction with compute group/group for triclinic systems");
 
   if (differentiation_flag)
     error->all(FLERR,"Cannot (yet) use kspace_modify "
                "diff ad with compute group/group");
 
   if (!group_allocate_flag) allocate_groups();
 
   // convert atoms from box to lamda coords
 
   if (triclinic == 0) boxlo = domain->boxlo;
   else {
     boxlo = domain->boxlo_lamda;
     domain->x2lamda(atom->nlocal);
   }
 
   e2group = 0.0; //energy
   f2group[0] = 0.0; //force in x-direction
   f2group[1] = 0.0; //force in y-direction
   f2group[2] = 0.0; //force in z-direction
 
   // map my particle charge onto my local 3d density grid
 
   make_rho_groups(groupbit_A,groupbit_B,AA_flag);
 
   // all procs communicate density values from their ghost cells
   //   to fully sum contribution in their 3d bricks
   // remap from 3d decomposition to FFT decomposition
 
   // temporarily store and switch pointers so we can
   //  use brick2fft() for groups A and B (without
   //  writing an additional function)
 
   FFT_SCALAR ***density_brick_real = density_brick;
   FFT_SCALAR *density_fft_real = density_fft;
 
   // group A
 
   density_brick = density_A_brick;
   density_fft = density_A_fft;
 
   cg->reverse_comm(this,REVERSE_RHO);
   brick2fft();
 
   // group B
 
   density_brick = density_B_brick;
   density_fft = density_B_fft;
 
   cg->reverse_comm(this,REVERSE_RHO);
   brick2fft();
 
   // switch back pointers
 
   density_brick = density_brick_real;
   density_fft = density_fft_real;
 
   // compute potential gradient on my FFT grid and
   //   portion of group-group energy/force on this proc's FFT grid
 
   poisson_groups(AA_flag);
 
   const double qscale = qqrd2e * scale;
 
   // total group A <--> group B energy
   // self and boundary correction terms are in compute_group_group.cpp
 
   double e2group_all;
   MPI_Allreduce(&e2group,&e2group_all,1,MPI_DOUBLE,MPI_SUM,world);
   e2group = e2group_all;
 
   e2group *= qscale*0.5*volume;
 
   // total group A <--> group B force
 
   double f2group_all[3];
   MPI_Allreduce(f2group,f2group_all,3,MPI_DOUBLE,MPI_SUM,world);
 
   f2group[0] = qscale*volume*f2group_all[0];
   f2group[1] = qscale*volume*f2group_all[1];
   if (slabflag != 2) f2group[2] = qscale*volume*f2group_all[2];
 
   // convert atoms back from lamda to box coords
 
   if (triclinic) domain->lamda2x(atom->nlocal);
 
   if (slabflag == 1)
     slabcorr_groups(groupbit_A, groupbit_B, AA_flag);
 }
 
 /* ----------------------------------------------------------------------
  allocate group-group memory that depends on # of K-vectors and order
  ------------------------------------------------------------------------- */
 
 void PPPM::allocate_groups()
 {
   group_allocate_flag = 1;
 
   memory->create3d_offset(density_A_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
                           nxlo_out,nxhi_out,"pppm:density_A_brick");
   memory->create3d_offset(density_B_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
                           nxlo_out,nxhi_out,"pppm:density_B_brick");
   memory->create(density_A_fft,nfft_both,"pppm:density_A_fft");
   memory->create(density_B_fft,nfft_both,"pppm:density_B_fft");
 }
 
 /* ----------------------------------------------------------------------
  deallocate group-group memory that depends on # of K-vectors and order
  ------------------------------------------------------------------------- */
 
 void PPPM::deallocate_groups()
 {
   group_allocate_flag = 0;
 
   memory->destroy3d_offset(density_A_brick,nzlo_out,nylo_out,nxlo_out);
   memory->destroy3d_offset(density_B_brick,nzlo_out,nylo_out,nxlo_out);
   memory->destroy(density_A_fft);
   memory->destroy(density_B_fft);
 }
 
 /* ----------------------------------------------------------------------
  create discretized "density" on section of global grid due to my particles
  density(x,y,z) = charge "density" at grid points of my 3d brick
  (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
  in global grid for group-group interactions
  ------------------------------------------------------------------------- */
 
 void PPPM::make_rho_groups(int groupbit_A, int groupbit_B, int AA_flag)
 {
   int l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
 
   // clear 3d density arrays
 
   memset(&(density_A_brick[nzlo_out][nylo_out][nxlo_out]),0,
          ngrid*sizeof(FFT_SCALAR));
 
   memset(&(density_B_brick[nzlo_out][nylo_out][nxlo_out]),0,
          ngrid*sizeof(FFT_SCALAR));
 
   // loop over my charges, add their contribution to nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
 
   double *q = atom->q;
   double **x = atom->x;
   int nlocal = atom->nlocal;
   int *mask = atom->mask;
 
   for (int i = 0; i < nlocal; i++) {
 
     if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B)))
       if (AA_flag) continue;
 
     if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) {
 
       nx = part2grid[i][0];
       ny = part2grid[i][1];
       nz = part2grid[i][2];
       dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
       dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
       dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
 
       compute_rho1d(dx,dy,dz);
 
       z0 = delvolinv * q[i];
       for (n = nlower; n <= nupper; n++) {
         mz = n+nz;
         y0 = z0*rho1d[2][n];
         for (m = nlower; m <= nupper; m++) {
           my = m+ny;
           x0 = y0*rho1d[1][m];
           for (l = nlower; l <= nupper; l++) {
             mx = l+nx;
 
             // group A
 
             if (mask[i] & groupbit_A)
               density_A_brick[mz][my][mx] += x0*rho1d[0][l];
 
             // group B
 
             if (mask[i] & groupbit_B)
               density_B_brick[mz][my][mx] += x0*rho1d[0][l];
           }
         }
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    FFT-based Poisson solver for group-group interactions
  ------------------------------------------------------------------------- */
 
 void PPPM::poisson_groups(int AA_flag)
 {
   int i,j,k,n;
 
   // reuse memory (already declared)
 
   FFT_SCALAR *work_A = work1;
   FFT_SCALAR *work_B = work2;
 
   // transform charge density (r -> k)
 
   // group A
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work_A[n++] = density_A_fft[i];
     work_A[n++] = ZEROF;
   }
 
   fft1->compute(work_A,work_A,1);
 
   // group B
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work_B[n++] = density_B_fft[i];
     work_B[n++] = ZEROF;
   }
 
   fft1->compute(work_B,work_B,1);
 
   // group-group energy and force contribution,
   //  keep everything in reciprocal space so
   //  no inverse FFTs needed
 
   double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
   double s2 = scaleinv*scaleinv;
 
   // energy
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     e2group += s2 * greensfn[i] *
       (work_A[n]*work_B[n] + work_A[n+1]*work_B[n+1]);
     n += 2;
   }
 
   if (AA_flag) return;
 
 
   // multiply by Green's function and s2
   //  (only for work_A so it is not squared below)
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work_A[n++] *= s2 * greensfn[i];
     work_A[n++] *= s2 * greensfn[i];
   }
 
   // triclinic system
   
   if (triclinic) {
     poisson_groups_triclinic();
     return;
   }
 
   double partial_group;
 
   // force, x direction
 
   n = 0;
   for (k = nzlo_fft; k <= nzhi_fft; k++)
     for (j = nylo_fft; j <= nyhi_fft; j++)
       for (i = nxlo_fft; i <= nxhi_fft; i++) {
         partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
         f2group[0] += fkx[i] * partial_group;
         n += 2;
       }
 
   // force, y direction
 
   n = 0;
   for (k = nzlo_fft; k <= nzhi_fft; k++)
     for (j = nylo_fft; j <= nyhi_fft; j++)
       for (i = nxlo_fft; i <= nxhi_fft; i++) {
         partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
         f2group[1] += fky[j] * partial_group;
         n += 2;
       }
 
   // force, z direction
 
   n = 0;
   for (k = nzlo_fft; k <= nzhi_fft; k++)
     for (j = nylo_fft; j <= nyhi_fft; j++)
       for (i = nxlo_fft; i <= nxhi_fft; i++) {
         partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
         f2group[2] += fkz[k] * partial_group;
         n += 2;
       }
 }
 
 /* ----------------------------------------------------------------------
    FFT-based Poisson solver for group-group interactions
    for a triclinic system
  ------------------------------------------------------------------------- */
 
 void PPPM::poisson_groups_triclinic()
 {
   int i,n;
 
   // reuse memory (already declared)
 
   FFT_SCALAR *work_A = work1;
   FFT_SCALAR *work_B = work2;
 
   double partial_group;
 
   // force, x direction
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
     f2group[0] += fkx[i] * partial_group;
     n += 2;
   }
 
   // force, y direction
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
     f2group[1] += fky[i] * partial_group;
     n += 2;
   }
 
   // force, z direction
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
     f2group[2] += fkz[i] * partial_group;
     n += 2;
   }
 }
 
 /* ----------------------------------------------------------------------
    Slab-geometry correction term to dampen inter-slab interactions between
    periodically repeating slabs.  Yields good approximation to 2D Ewald if
    adequate empty space is left between repeating slabs (J. Chem. Phys.
    111, 3155).  Slabs defined here to be parallel to the xy plane. Also
    extended to non-neutral systems (J. Chem. Phys. 131, 094107).
 ------------------------------------------------------------------------- */
 
 void PPPM::slabcorr_groups(int groupbit_A, int groupbit_B, int AA_flag)
 {
   // compute local contribution to global dipole moment
 
   double *q = atom->q;
   double **x = atom->x;
   double zprd = domain->zprd;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
   double qsum_A = 0.0;
   double qsum_B = 0.0;
   double dipole_A = 0.0;
   double dipole_B = 0.0;
   double dipole_r2_A = 0.0;
   double dipole_r2_B = 0.0;
 
   for (int i = 0; i < nlocal; i++) {
     if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B)))
       if (AA_flag) continue;
 
     if (mask[i] & groupbit_A) { 
       qsum_A += q[i];
       dipole_A += q[i]*x[i][2];
       dipole_r2_A += q[i]*x[i][2]*x[i][2];
     }
 
     if (mask[i] & groupbit_B) {
       qsum_B += q[i];
       dipole_B += q[i]*x[i][2];
       dipole_r2_B += q[i]*x[i][2]*x[i][2];
     }
   }
 
   // sum local contributions to get total charge and global dipole moment
   //  for each group
 
   double tmp;
   MPI_Allreduce(&qsum_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
   qsum_A = tmp;
 
   MPI_Allreduce(&qsum_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
   qsum_B = tmp;
 
   MPI_Allreduce(&dipole_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
   dipole_A = tmp;
 
   MPI_Allreduce(&dipole_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
   dipole_B = tmp;
 
   MPI_Allreduce(&dipole_r2_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
   dipole_r2_A = tmp;
 
   MPI_Allreduce(&dipole_r2_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
   dipole_r2_B = tmp;
 
   // compute corrections
 
   const double qscale = qqrd2e * scale;
   const double efact = qscale * MY_2PI/volume;
 
   e2group += efact * (dipole_A*dipole_B - 0.5*(qsum_A*dipole_r2_B +
     qsum_B*dipole_r2_A) - qsum_A*qsum_B*zprd*zprd/12.0);
 
   // add on force corrections
 
   const double ffact = qscale * (-4.0*MY_PI/volume);
   f2group[2] += ffact * (qsum_A*dipole_B - qsum_B*dipole_A);
 }
diff --git a/src/KSPACE/pppm_disp.cpp b/src/KSPACE/pppm_disp.cpp
index 7f7d9100e..7134ed432 100755
--- a/src/KSPACE/pppm_disp.cpp
+++ b/src/KSPACE/pppm_disp.cpp
@@ -1,8205 +1,8214 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Rolf Isele-Holder (Aachen University)
                          Paul Crozier (SNL)
 ------------------------------------------------------------------------- */
 
 #include "mpi.h"
 #include "string.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "math.h"
 #include "pppm_disp.h"
 #include "math_const.h"
 #include "atom.h"
 #include "comm.h"
 #include "gridcomm.h"
 #include "neighbor.h"
 #include "force.h"
 #include "pair.h"
 #include "bond.h"
 #include "angle.h"
 #include "domain.h"
 #include "fft3d_wrap.h"
 #include "remap_wrap.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 #define MAXORDER   7
 #define OFFSET 16384
 #define SMALL 0.00001
 #define LARGE 10000.0
 #define EPS_HOC 1.0e-7
 
 enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER};
 enum{REVERSE_RHO, REVERSE_RHO_G, REVERSE_RHO_A, REVERSE_RHO_NONE};
 enum{FORWARD_IK, FORWARD_AD, FORWARD_IK_PERATOM, FORWARD_AD_PERATOM,
      FORWARD_IK_G, FORWARD_AD_G, FORWARD_IK_PERATOM_G, FORWARD_AD_PERATOM_G,
      FORWARD_IK_A, FORWARD_AD_A, FORWARD_IK_PERATOM_A, FORWARD_AD_PERATOM_A,
      FORWARD_IK_NONE, FORWARD_AD_NONE, FORWARD_IK_PERATOM_NONE, FORWARD_AD_PERATOM_NONE};
 
 
 #ifdef FFT_SINGLE
 #define ZEROF 0.0f
 #define ONEF  1.0f
 #else
 #define ZEROF 0.0
 #define ONEF  1.0
 #endif
 
 /* ---------------------------------------------------------------------- */
 
 PPPMDisp::PPPMDisp(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
 {
   if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm/disp command");
 
   triclinic_support = 0;
   pppmflag = dispersionflag = 1;
   accuracy_relative = fabs(force->numeric(FLERR,arg[0]));
   
   nfactors = 3;
   factors = new int[nfactors];
   factors[0] = 2;
   factors[1] = 3;
   factors[2] = 5;
 
   MPI_Comm_rank(world,&me);
   MPI_Comm_size(world,&nprocs);
 
   csumflag = 0;
   B = NULL;
   cii = NULL;
   csumi = NULL;
   peratom_allocate_flag = 0;
 
   density_brick = vdx_brick = vdy_brick = vdz_brick = NULL;
   density_fft = NULL;
   u_brick = v0_brick = v1_brick = v2_brick = v3_brick = 
     v4_brick = v5_brick = NULL;
 
   density_brick_g = vdx_brick_g = vdy_brick_g = vdz_brick_g = NULL;
   density_fft_g = NULL;
   u_brick_g = v0_brick_g = v1_brick_g = v2_brick_g = v3_brick_g = 
     v4_brick_g = v5_brick_g = NULL;
 
   density_brick_a0 = vdx_brick_a0 = vdy_brick_a0 = vdz_brick_a0 = NULL;
   density_fft_a0 = NULL;
   u_brick_a0 = v0_brick_a0 = v1_brick_a0 = v2_brick_a0 = v3_brick_a0 = 
     v4_brick_a0 = v5_brick_a0 = NULL;
 
   density_brick_a1 = vdx_brick_a1 = vdy_brick_a1 = vdz_brick_a1 = NULL;
   density_fft_a1 = NULL;
   u_brick_a1 = v0_brick_a1 = v1_brick_a1 = v2_brick_a1 = v3_brick_a1 = 
     v4_brick_a1 = v5_brick_a1 = NULL;
 
   density_brick_a2 = vdx_brick_a2 = vdy_brick_a2 = vdz_brick_a2 = NULL;
   density_fft_a2 = NULL;
   u_brick_a2 = v0_brick_a2 = v1_brick_a2 = v2_brick_a2 = v3_brick_a2 = 
     v4_brick_a2 = v5_brick_a2 = NULL;
 
   density_brick_a3 = vdx_brick_a3 = vdy_brick_a3 = vdz_brick_a3 = NULL;
   density_fft_a3 = NULL;
   u_brick_a3 = v0_brick_a3 = v1_brick_a3 = v2_brick_a3 = v3_brick_a3 = 
     v4_brick_a3 = v5_brick_a3 = NULL;
 
   density_brick_a4 = vdx_brick_a4 = vdy_brick_a4 = vdz_brick_a4 = NULL;
   density_fft_a4 = NULL;
   u_brick_a4 = v0_brick_a4 = v1_brick_a4 = v2_brick_a4 = v3_brick_a4 = 
     v4_brick_a4 = v5_brick_a4 = NULL;
 
   density_brick_a5 = vdx_brick_a5 = vdy_brick_a5 = vdz_brick_a5 = NULL;
   density_fft_a5 = NULL;
   u_brick_a5 = v0_brick_a5 = v1_brick_a5 = v2_brick_a5 = v3_brick_a5 = 
     v4_brick_a5 = v5_brick_a5 = NULL;
 
   density_brick_a6 = vdx_brick_a6 = vdy_brick_a6 = vdz_brick_a6 = NULL;
   density_fft_a6 = NULL;
   u_brick_a6 = v0_brick_a6 = v1_brick_a6 = v2_brick_a6 = v3_brick_a6 = 
     v4_brick_a6 = v5_brick_a6 = NULL;
 
   density_brick_none = vdx_brick_none = vdy_brick_none = vdz_brick_none = NULL;
   density_fft_none = NULL;
   u_brick_none = v0_brick_none = v1_brick_none = v2_brick_none = v3_brick_none = 
     v4_brick_none = v5_brick_none = NULL;
 
   greensfn = NULL;
   greensfn_6 = NULL;
   work1 = work2 = NULL;
   work1_6 = work2_6 = NULL;
   vg = NULL;
   vg2 = NULL;
   vg_6 = NULL;
   vg2_6 = NULL;
   fkx = fky = fkz = NULL;
   fkx2 = fky2 = fkz2 = NULL;
   fkx_6 = fky_6 = fkz_6 = NULL;
   fkx2_6 = fky2_6 = fkz2_6 = NULL;
 
   sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = sf_precoeff4 = 
     sf_precoeff5 = sf_precoeff6 = NULL;
   sf_precoeff1_6 = sf_precoeff2_6 = sf_precoeff3_6 = sf_precoeff4_6 = 
     sf_precoeff5_6 = sf_precoeff6_6 = NULL;
 
   gf_b = NULL;
   gf_b_6 = NULL;
   rho1d = rho_coeff = NULL;
   drho1d = drho_coeff = NULL;
   rho1d_6 = rho_coeff_6 = NULL;
   drho1d_6 = drho_coeff_6 = NULL;
   fft1 = fft2 = NULL;
   fft1_6 = fft2_6 = NULL;
   remap = NULL;
   remap_6 = NULL;
 
   nmax = 0;
   part2grid = NULL;
   part2grid_6 = NULL;
 
   cg = NULL;
   cg_peratom = NULL;
   cg_6 = NULL;
   cg_peratom_6 = NULL;
 
   memset(function, 0, EWALD_FUNCS*sizeof(int));
 }
 
 /* ----------------------------------------------------------------------
    free all memory 
 ------------------------------------------------------------------------- */
 
 PPPMDisp::~PPPMDisp()
 {
   delete [] factors;
   delete [] B;
   B = NULL;
   delete [] cii;
   cii = NULL;
   delete [] csumi;
   csumi = NULL;
   deallocate();
   deallocate_peratom();
   memory->destroy(part2grid);
   memory->destroy(part2grid_6);
   part2grid = part2grid_6 = NULL;
 }
 
 /* ----------------------------------------------------------------------
    called once before run 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::init()
 {
   if (me == 0) {
     if (screen) fprintf(screen,"PPPMDisp initialization ...\n");
     if (logfile) fprintf(logfile,"PPPMDisp initialization ...\n");
   }
 
   triclinic_check();
   if (domain->dimension == 2)
     error->all(FLERR,"Cannot use PPPMDisp with 2d simulation");
   if (comm->style != 0) 
     error->universe_all(FLERR,"PPPMDisp can only currently be used with "
                         "comm_style brick");
 
   if (slabflag == 0 && domain->nonperiodic > 0)
     error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMDisp");
   if (slabflag == 1) {
     if (domain->xperiodic != 1 || domain->yperiodic != 1 || 
 	domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
       error->all(FLERR,"Incorrect boundaries with slab PPPMDisp");
   }
  
   if (order > MAXORDER || order_6 > MAXORDER) {
     char str[128];
     sprintf(str,"PPPMDisp coulomb order cannot be greater than %d",MAXORDER);
     error->all(FLERR,str);
   }
 
   // free all arrays previously allocated
 
   deallocate();
   deallocate_peratom(); 
 
   // check whether cutoff and pair style are set
 
   triclinic = domain->triclinic;
   pair_check();
 
   int tmp;
   Pair *pair = force->pair;
   int *ptr = pair ? (int *) pair->extract("ewald_order",tmp) : NULL;
   double *p_cutoff = pair ? (double *) pair->extract("cut_coul",tmp) : NULL;
   double *p_cutoff_lj = pair ? (double *) pair->extract("cut_LJ",tmp) : NULL;
   if (!(ptr||*p_cutoff||*p_cutoff_lj)) 
     error->all(FLERR,"KSpace style is incompatible with Pair style");
   cutoff = *p_cutoff;
   cutoff_lj = *p_cutoff_lj;
 
   double tmp2;
   MPI_Allreduce(&cutoff, &tmp2,1,MPI_DOUBLE,MPI_SUM,world); 
 
   // check out which types of potentials will have to be calculated
 
   int ewald_order = ptr ? *((int *) ptr) : 1<<1;
   int ewald_mix = ptr ? *((int *) pair->extract("ewald_mix",tmp)) : GEOMETRIC;
   memset(function, 0, EWALD_FUNCS*sizeof(int));
   for (int i=0; i<=EWALD_MAXORDER; ++i)			// transcribe order
     if (ewald_order&(1<<i)) {				// from pair_style
       int  k=0;
       char str[128];
       switch (i) {
 	case 1:
 	  k = 0; break;
 	case 6:
 	  if ((ewald_mix==GEOMETRIC || ewald_mix==SIXTHPOWER || 
                mixflag == 1) && mixflag!= 2) { k = 1; break; }
 	  else if (ewald_mix==ARITHMETIC && mixflag!=2) { k = 2; break; }
 	  else if (mixflag == 2) { k = 3; break; }
 	default:
 	  sprintf(str, "Unsupported order in kspace_style "
                   "pppm/disp, pair_style %s", force->pair_style);
 	  error->all(FLERR,str);
       }
       function[k] = 1;
     }
  
 
   // warn, if function[0] is not set but charge attribute is set!
 
   if (!function[0] && atom->q_flag && me == 0) {
     char str[128];
     sprintf(str, "Charges are set, but coulombic solver is not used");
     error->warning(FLERR, str);
   }
 
   // show error message if pppm/disp is not used correctly
 
   if (function[1] || function[2] || function[3]) {
     if (!gridflag_6 && !gewaldflag_6 && accuracy_real_6 < 0
         && accuracy_kspace_6 < 0 && !auto_disp_flag) {
       error->all(FLERR, "PPPMDisp used but no parameters set, "
               "for further information please see the pppm/disp "
               "documentation");
     }
   }
 
   // compute qsum & qsqsum, if function[0] is set, warn if not charge-neutral
 
   scale = 1.0;
   qqrd2e = force->qqrd2e;
   natoms_original = atom->natoms;
  
   if (function[0]) qsum_qsq();
 
   // if kspace is TIP4P, extract TIP4P params from pair style
   // bond/angle are not yet init(), so insure equilibrium request is valid
 
   qdist = 0.0;
  
   if (tip4pflag) {
     int itmp;
     double *p_qdist = (double *) force->pair->extract("qdist",itmp);
     int *p_typeO = (int *) force->pair->extract("typeO",itmp);
     int *p_typeH = (int *) force->pair->extract("typeH",itmp);
     int *p_typeA = (int *) force->pair->extract("typeA",itmp);
     int *p_typeB = (int *) force->pair->extract("typeB",itmp);
     if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB)
       error->all(FLERR,"KSpace style is incompatible with Pair style");
     qdist = *p_qdist;
     typeO = *p_typeO;
     typeH = *p_typeH;
     int typeA = *p_typeA;
     int typeB = *p_typeB;
 
     if (force->angle == NULL || force->bond == NULL)
       error->all(FLERR,"Bond and angle potentials must be defined for TIP4P");
     if (typeA < 1 || typeA > atom->nangletypes || 
 	force->angle->setflag[typeA] == 0)
       error->all(FLERR,"Bad TIP4P angle type for PPPMDisp/TIP4P");
     if (typeB < 1 || typeB > atom->nbondtypes || 
 	force->bond->setflag[typeB] == 0)
       error->all(FLERR,"Bad TIP4P bond type for PPPMDisp/TIP4P");
     double theta = force->angle->equilibrium_angle(typeA);
     double blen = force->bond->equilibrium_distance(typeB);
     alpha = qdist / (cos(0.5*theta) * blen);
   }
 
   // initialize the pair style to get the coefficients
 
   neighrequest_flag = 0;
   pair->init();
   neighrequest_flag = 1;
   init_coeffs();
 
   //if g_ewald and g_ewald_6 have not been specified, set some initial value
   //  to avoid problems when calculating the energies!
 
   if (!gewaldflag) g_ewald = 1;
   if (!gewaldflag_6) g_ewald_6 = 1;
 
   // set accuracy (force units) from accuracy_relative or accuracy_absolute
   
   if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
   else accuracy = accuracy_relative * two_charge_force;
 
   int (*procneigh)[2] = comm->procneigh;
 
   int iteration = 0;
   if (function[0]) {
     GridComm *cgtmp = NULL;
     while (order >= minorder) {
 
       if (iteration && me == 0)
           error->warning(FLERR,"Reducing PPPMDisp Coulomb order "
                          "b/c stencil extends beyond neighbor processor");
       iteration++;
 
       // set grid for dispersion interaction and coulomb interactions
  
       set_grid();
 
       if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET)
       error->all(FLERR,"PPPMDisp Coulomb grid is too large");
 
       set_fft_parameters(nx_pppm, ny_pppm, nz_pppm,
                          nxlo_fft, nylo_fft, nzlo_fft,
                          nxhi_fft, nyhi_fft, nzhi_fft,
                          nxlo_in, nylo_in, nzlo_in,
                          nxhi_in, nyhi_in, nzhi_in,
                          nxlo_out, nylo_out, nzlo_out,
                          nxhi_out, nyhi_out, nzhi_out,
                          nlower, nupper,
                          ngrid, nfft, nfft_both,
                          shift, shiftone, order);
 
       if (overlap_allowed) break;
 
       cgtmp = new GridComm(lmp, world,1,1,
                            nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                            nxlo_out,nxhi_out,nylo_out,nyhi_out,
                            nzlo_out,nzhi_out,
                            procneigh[0][0],procneigh[0][1],procneigh[1][0],
                            procneigh[1][1],procneigh[2][0],procneigh[2][1]);
       cgtmp->ghost_notify();
       if (!cgtmp->ghost_overlap()) break;
       delete cgtmp;
 
       order--;
     }
 
     if (order < minorder)
       error->all(FLERR,
                  "Coulomb PPPMDisp order has been reduced below minorder");
     if (cgtmp) delete cgtmp;
 
     // adjust g_ewald
   
     if (!gewaldflag) adjust_gewald();
 
     // calculate the final accuracy
   
     double acc = final_accuracy();
   
     // print stats
 
     int ngrid_max,nfft_both_max;
     MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world);
     MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world);
 
     if (me == 0) {
     #ifdef FFT_SINGLE
       const char fft_prec[] = "single";
     #else
       const char fft_prec[] = "double";
     #endif
   
       if (screen) {
         fprintf(screen,"  Coulomb G vector (1/distance)= %g\n",g_ewald);
         fprintf(screen,"  Coulomb grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
         fprintf(screen,"  Coulomb stencil order = %d\n",order);
         fprintf(screen,"  Coulomb estimated absolute RMS force accuracy = %g\n",
                 acc);
         fprintf(screen,"  Coulomb estimated relative force accuracy = %g\n",
                 acc/two_charge_force);
         fprintf(screen,"  using %s precision FFTs\n",fft_prec);
         fprintf(screen,"  3d grid and FFT values/proc = %d %d\n",
 		ngrid_max, nfft_both_max);
       }
       if (logfile) {
         fprintf(logfile,"  Coulomb G vector (1/distance) = %g\n",g_ewald);
         fprintf(logfile,"  Coulomb grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
         fprintf(logfile,"  Coulomb stencil order = %d\n",order);
         fprintf(logfile,
                 "  Coulomb estimated absolute RMS force accuracy = %g\n",
                 acc);
         fprintf(logfile,"  Coulomb estimated relative force accuracy = %g\n",
                 acc/two_charge_force);
         fprintf(logfile,"  using %s precision FFTs\n",fft_prec);
         fprintf(logfile,"  3d grid and FFT values/proc = %d %d\n",
 		ngrid_max, nfft_both_max);
       }
     }
   }
 
   iteration = 0;
   if (function[1] + function[2] + function[3]) {
     GridComm *cgtmp = NULL;
     while (order_6 >= minorder) {
 
       if (iteration && me == 0)
           error->warning(FLERR,"Reducing PPPMDisp dispersion order "
                          "b/c stencil extends beyond neighbor processor");
       iteration++;
 
       set_grid_6();
    
       if (nx_pppm_6 >= OFFSET || ny_pppm_6 >= OFFSET || nz_pppm_6 >= OFFSET)
       error->all(FLERR,"PPPMDisp Dispersion grid is too large");
 
       set_fft_parameters(nx_pppm_6, ny_pppm_6, nz_pppm_6,
                          nxlo_fft_6, nylo_fft_6, nzlo_fft_6,
                          nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
                          nxlo_in_6, nylo_in_6, nzlo_in_6,
                          nxhi_in_6, nyhi_in_6, nzhi_in_6,
                          nxlo_out_6, nylo_out_6, nzlo_out_6,
                          nxhi_out_6, nyhi_out_6, nzhi_out_6,
                          nlower_6, nupper_6,
                          ngrid_6, nfft_6, nfft_both_6,
                          shift_6, shiftone_6, order_6);
 
       if (overlap_allowed) break;
 
       cgtmp = new GridComm(lmp,world,1,1,
                            nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,
                            nzlo_in_6,nzhi_in_6,
                            nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,
                            nzlo_out_6,nzhi_out_6,
                            procneigh[0][0],procneigh[0][1],procneigh[1][0],
                            procneigh[1][1],procneigh[2][0],procneigh[2][1]);
       cgtmp->ghost_notify();
       if (!cgtmp->ghost_overlap()) break;
       delete cgtmp;
       order_6--;
     }
 
     if (order_6 < minorder) 
       error->all(FLERR,"Dispersion PPPMDisp order has been "
                  "reduced below minorder");
     if (cgtmp) delete cgtmp;
 
     // adjust g_ewald_6
 
     if (!gewaldflag_6 && accuracy_kspace_6 == accuracy_real_6) 
       adjust_gewald_6();
 
     // calculate the final accuracy
 
     double acc, acc_real, acc_kspace;
     final_accuracy_6(acc, acc_real, acc_kspace);
 
 
     // print stats
 
     int ngrid_max,nfft_both_max;
     MPI_Allreduce(&ngrid_6,&ngrid_max,1,MPI_INT,MPI_MAX,world);
     MPI_Allreduce(&nfft_both_6,&nfft_both_max,1,MPI_INT,MPI_MAX,world);
 
     if (me == 0) {
     #ifdef FFT_SINGLE
       const char fft_prec[] = "single";
     #else
       const char fft_prec[] = "double";
     #endif
   
       if (screen) {
         fprintf(screen,"  Dispersion G vector (1/distance)= %g\n",g_ewald_6);
         fprintf(screen,"  Dispersion grid = %d %d %d\n",
                 nx_pppm_6,ny_pppm_6,nz_pppm_6);
         fprintf(screen,"  Dispersion stencil order = %d\n",order_6);
         fprintf(screen,"  Dispersion estimated absolute "
                 "RMS force accuracy = %g\n",acc);
         fprintf(screen,"  Dispersion estimated absolute "
                 "real space RMS force accuracy = %g\n",acc_real);
         fprintf(screen,"  Dispersion estimated absolute "
                 "kspace RMS force accuracy = %g\n",acc_kspace);
         fprintf(screen,"  Dispersion estimated relative force accuracy = %g\n",
                 acc/two_charge_force);
         fprintf(screen,"  using %s precision FFTs\n",fft_prec);
         fprintf(screen,"  3d grid and FFT values/proc dispersion = %d %d\n",
                           ngrid_max,nfft_both_max);
       }
       if (logfile) {
         fprintf(logfile,"  Dispersion G vector (1/distance) = %g\n",g_ewald_6);
         fprintf(logfile,"  Dispersion grid = %d %d %d\n",
                 nx_pppm_6,ny_pppm_6,nz_pppm_6);
         fprintf(logfile,"  Dispersion stencil order = %d\n",order_6);
         fprintf(logfile,"  Dispersion estimated absolute "
                 "RMS force accuracy = %g\n",acc);
         fprintf(logfile,"  Dispersion estimated absolute "
                 "real space RMS force accuracy = %g\n",acc_real);
         fprintf(logfile,"  Dispersion estimated absolute "
                 "kspace RMS force accuracy = %g\n",acc_kspace);
         fprintf(logfile,"  Disperion estimated relative force accuracy = %g\n",
                 acc/two_charge_force);
         fprintf(logfile,"  using %s precision FFTs\n",fft_prec);
         fprintf(logfile,"  3d grid and FFT values/proc dispersion = %d %d\n",
                            ngrid_max,nfft_both_max);
       }
     }
   }
 
   // allocate K-space dependent memory
 
   allocate();
 
   // pre-compute Green's function denomiator expansion
   // pre-compute 1d charge distribution coefficients
 
   if (function[0]) {
     compute_gf_denom(gf_b, order);
     compute_rho_coeff(rho_coeff, drho_coeff, order);
     cg->ghost_notify();
     cg->setup();
     if (differentiation_flag == 1)
       compute_sf_precoeff(nx_pppm, ny_pppm, nz_pppm, order,
                           nxlo_fft, nylo_fft, nzlo_fft, 
                           nxhi_fft, nyhi_fft, nzhi_fft,
                           sf_precoeff1, sf_precoeff2, sf_precoeff3,
                           sf_precoeff4, sf_precoeff5, sf_precoeff6);
   }
   if (function[1] + function[2] + function[3]) {
     compute_gf_denom(gf_b_6, order_6);
     compute_rho_coeff(rho_coeff_6, drho_coeff_6, order_6);
     cg_6->ghost_notify();
     cg_6->setup();
     if (differentiation_flag == 1)
       compute_sf_precoeff(nx_pppm_6, ny_pppm_6, nz_pppm_6, order_6,
                           nxlo_fft_6, nylo_fft_6, nzlo_fft_6, 
                           nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
                           sf_precoeff1_6, sf_precoeff2_6, sf_precoeff3_6,
                           sf_precoeff4_6, sf_precoeff5_6, sf_precoeff6_6);
   }
 
 }
 
 /* ----------------------------------------------------------------------
    adjust PPPM coeffs, called initially and whenever volume has changed 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::setup()
 {
+
+  if (slabflag == 0 && domain->nonperiodic > 0)
+    error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMDisp");
+  if (slabflag == 1) {
+    if (domain->xperiodic != 1 || domain->yperiodic != 1 || 
+	domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
+      error->all(FLERR,"Incorrect boundaries with slab PPPMDisp");
+  }
+ 
   double *prd;
 
   // volume-dependent factors
   // adjust z dimension for 2d slab PPPM
   // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
   volume = xprd * yprd * zprd_slab;
 
  // compute fkx,fky,fkz for my FFT grid pts
 
   double unitkx = (2.0*MY_PI/xprd);
   double unitky = (2.0*MY_PI/yprd);
   double unitkz = (2.0*MY_PI/zprd_slab);
 
   //compute the virial coefficients and green functions
   if (function[0]){
 
     delxinv = nx_pppm/xprd;
     delyinv = ny_pppm/yprd;
     delzinv = nz_pppm/zprd_slab;
 
     delvolinv = delxinv*delyinv*delzinv;
 
     double per;
     int i, j, k, n;
 
     for (i = nxlo_fft; i <= nxhi_fft; i++) {
       per = i - nx_pppm*(2*i/nx_pppm);
       fkx[i] = unitkx*per;
       j = (nx_pppm - i) % nx_pppm;
       per = j - nx_pppm*(2*j/nx_pppm);
       fkx2[i] = unitkx*per;
     }
 
     for (i = nylo_fft; i <= nyhi_fft; i++) {
       per = i - ny_pppm*(2*i/ny_pppm);
       fky[i] = unitky*per;
       j = (ny_pppm - i) % ny_pppm;
       per = j - ny_pppm*(2*j/ny_pppm);
       fky2[i] = unitky*per;
     }
 
     for (i = nzlo_fft; i <= nzhi_fft; i++) {
       per = i - nz_pppm*(2*i/nz_pppm);
       fkz[i] = unitkz*per;
       j = (nz_pppm - i) % nz_pppm;
       per = j - nz_pppm*(2*j/nz_pppm);
       fkz2[i] = unitkz*per;
     }
 
     double sqk,vterm;
     double gew2inv = 1/(g_ewald*g_ewald);
     n = 0;
     for (k = nzlo_fft; k <= nzhi_fft; k++) {
       for (j = nylo_fft; j <= nyhi_fft; j++) {
         for (i = nxlo_fft; i <= nxhi_fft; i++) {
 	  sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k];
 	  if (sqk == 0.0) {
 	    vg[n][0] = 0.0;
 	    vg[n][1] = 0.0;
 	    vg[n][2] = 0.0;
 	    vg[n][3] = 0.0;
 	    vg[n][4] = 0.0;
 	    vg[n][5] = 0.0;
 	  } else {
 	    vterm = -2.0 * (1.0/sqk + 0.25*gew2inv);
 	    vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i];
 	    vg[n][1] = 1.0 + vterm*fky[j]*fky[j];
 	    vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k];
 	    vg[n][3] = vterm*fkx[i]*fky[j];
 	    vg[n][4] = vterm*fkx[i]*fkz[k];
 	    vg[n][5] = vterm*fky[j]*fkz[k];
             vg2[n][0] = vterm*0.5*(fkx[i]*fky[j] + fkx2[i]*fky2[j]);
             vg2[n][1] = vterm*0.5*(fkx[i]*fkz[k] + fkx2[i]*fkz2[k]);
             vg2[n][2] = vterm*0.5*(fky[j]*fkz[k] + fky2[j]*fkz2[k]);
   	  }
 	  n++;
         }
       }
     }
     compute_gf();
     if (differentiation_flag == 1) compute_sf_coeff();
   }
 
   if (function[1] + function[2] + function[3]) {
     delxinv_6 = nx_pppm_6/xprd;
     delyinv_6 = ny_pppm_6/yprd;
     delzinv_6 = nz_pppm_6/zprd_slab;
     delvolinv_6 = delxinv_6*delyinv_6*delzinv_6;
 
     double per;
     int i, j, k, n;
     for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
       per = i - nx_pppm_6*(2*i/nx_pppm_6);
       fkx_6[i] = unitkx*per;
       j = (nx_pppm_6 - i) % nx_pppm_6;
       per = j - nx_pppm_6*(2*j/nx_pppm_6);
       fkx2_6[i] = unitkx*per;
     }
     for (i = nylo_fft_6; i <= nyhi_fft_6; i++) {
       per = i - ny_pppm_6*(2*i/ny_pppm_6);
       fky_6[i] = unitky*per;
       j = (ny_pppm_6 - i) % ny_pppm_6;
       per = j - ny_pppm_6*(2*j/ny_pppm_6);
       fky2_6[i] = unitky*per;
     }
     for (i = nzlo_fft_6; i <= nzhi_fft_6; i++) {
       per = i - nz_pppm_6*(2*i/nz_pppm_6);
       fkz_6[i] = unitkz*per;
       j = (nz_pppm_6 - i) % nz_pppm_6;
       per = j - nz_pppm_6*(2*j/nz_pppm_6);
       fkz2_6[i] = unitkz*per;
     }
     double sqk,vterm;
     long double erft, expt,nom, denom;
     long double b, bs, bt;
     double rtpi = sqrt(MY_PI);
     double gewinv = 1/g_ewald_6;
     n = 0;
     for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) {
       for (j = nylo_fft_6; j <= nyhi_fft_6; j++) {
         for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
 	  sqk = fkx_6[i]*fkx_6[i] + fky_6[j]*fky_6[j] + fkz_6[k]*fkz_6[k];
 	  if (sqk == 0.0) {
 	    vg_6[n][0] = 0.0;
 	    vg_6[n][1] = 0.0;
 	    vg_6[n][2] = 0.0;
 	    vg_6[n][3] = 0.0;
 	    vg_6[n][4] = 0.0;
 	    vg_6[n][5] = 0.0;
 	  } else {
             b = 0.5*sqrt(sqk)*gewinv;
             bs = b*b;
             bt = bs*b;
             erft = 2*bt*rtpi*erfc((double) b);
             expt = exp(-bs);
             nom = erft - 2*bs*expt;
             denom = nom + expt;
             if (denom == 0) vterm = 3.0/sqk;
             else vterm = 3.0*nom/(sqk*denom);
 	    vg_6[n][0] = 1.0 + vterm*fkx_6[i]*fkx_6[i];
 	    vg_6[n][1] = 1.0 + vterm*fky_6[j]*fky_6[j];
 	    vg_6[n][2] = 1.0 + vterm*fkz_6[k]*fkz_6[k];
 	    vg_6[n][3] = vterm*fkx_6[i]*fky_6[j];
 	    vg_6[n][4] = vterm*fkx_6[i]*fkz_6[k];
 	    vg_6[n][5] = vterm*fky_6[j]*fkz_6[k];
             vg2_6[n][0] = vterm*0.5*(fkx_6[i]*fky_6[j] + fkx2_6[i]*fky2_6[j]);
             vg2_6[n][1] = vterm*0.5*(fkx_6[i]*fkz_6[k] + fkx2_6[i]*fkz2_6[k]);
             vg2_6[n][2] = vterm*0.5*(fky_6[j]*fkz_6[k] + fky2_6[j]*fkz2_6[k]);
 	  }
 	  n++;
         }
       }
     }
     compute_gf_6();
     if (differentiation_flag == 1) compute_sf_coeff_6();
   }
 }
 
 /* ----------------------------------------------------------------------
    reset local grid arrays and communication stencils
    called by fix balance b/c it changed sizes of processor sub-domains
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::setup_grid()
 {
   // free all arrays previously allocated
 
   deallocate();
   deallocate_peratom();
 
   // reset portion of global grid that each proc owns
 
   if (function[0])
     set_fft_parameters(nx_pppm, ny_pppm, nz_pppm,
                        nxlo_fft, nylo_fft, nzlo_fft,
                        nxhi_fft, nyhi_fft, nzhi_fft,
                        nxlo_in, nylo_in, nzlo_in,
                        nxhi_in, nyhi_in, nzhi_in,
                        nxlo_out, nylo_out, nzlo_out,
                        nxhi_out, nyhi_out, nzhi_out,
                        nlower, nupper,
                        ngrid, nfft, nfft_both,
                        shift, shiftone, order);
 
   if (function[1] + function[2] + function[3])
     set_fft_parameters(nx_pppm_6, ny_pppm_6, nz_pppm_6,
                        nxlo_fft_6, nylo_fft_6, nzlo_fft_6,
                        nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
                        nxlo_in_6, nylo_in_6, nzlo_in_6,
                        nxhi_in_6, nyhi_in_6, nzhi_in_6,
                        nxlo_out_6, nylo_out_6, nzlo_out_6,
                        nxhi_out_6, nyhi_out_6, nzhi_out_6,
                        nlower_6, nupper_6,
                        ngrid_6, nfft_6, nfft_both_6,
                        shift_6, shiftone_6, order_6);
 
   // reallocate K-space dependent memory
   // check if grid communication is now overlapping if not allowed
   // don't invoke allocate_peratom(), compute() will allocate when needed
 
   allocate();
 
   if (function[0]) {
     cg->ghost_notify();
     if (overlap_allowed == 0 && cg->ghost_overlap())
       error->all(FLERR,"PPPM grid stencil extends "
                  "beyond nearest neighbor processor");
     cg->setup();
   }
   if (function[1] + function[2] + function[3]) {
     cg_6->ghost_notify();
     if (overlap_allowed == 0 && cg_6->ghost_overlap())
       error->all(FLERR,"PPPM grid stencil extends "
                  "beyond nearest neighbor processor");
     cg_6->setup();
   }
 
   // pre-compute Green's function denomiator expansion
   // pre-compute 1d charge distribution coefficients
 
   if (function[0]) {
     compute_gf_denom(gf_b, order);
     compute_rho_coeff(rho_coeff, drho_coeff, order);
     if (differentiation_flag == 1) 
       compute_sf_precoeff(nx_pppm, ny_pppm, nz_pppm, order,
                           nxlo_fft, nylo_fft, nzlo_fft, 
                           nxhi_fft, nyhi_fft, nzhi_fft,
                           sf_precoeff1, sf_precoeff2, sf_precoeff3,
                           sf_precoeff4, sf_precoeff5, sf_precoeff6);
   }
   if (function[1] + function[2] + function[3]) {
     compute_gf_denom(gf_b_6, order_6);
     compute_rho_coeff(rho_coeff_6, drho_coeff_6, order_6);
     if (differentiation_flag == 1)
       compute_sf_precoeff(nx_pppm_6, ny_pppm_6, nz_pppm_6, order_6,
                           nxlo_fft_6, nylo_fft_6, nzlo_fft_6, 
                           nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
                           sf_precoeff1_6, sf_precoeff2_6, sf_precoeff3_6,
                           sf_precoeff4_6, sf_precoeff5_6, sf_precoeff6_6);
   }
 
   // pre-compute volume-dependent coeffs
 
   setup();
 }
 
 /* ----------------------------------------------------------------------
    compute the PPPM long-range force, energy, virial 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::compute(int eflag, int vflag)
 {
 
   int i;
   // convert atoms from box to lamda coords
 
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = evflag_atom = eflag_global = vflag_global = 
 	 eflag_atom = vflag_atom = 0;
 
   if (evflag_atom && !peratom_allocate_flag) {
     allocate_peratom();
     if (function[0]) {
       cg_peratom->ghost_notify();
       cg_peratom->setup();
     }
     if (function[1] + function[2] + function[3]) {
       cg_peratom_6->ghost_notify();
       cg_peratom_6->setup();
     }
     peratom_allocate_flag = 1;
   }
   
   if (triclinic == 0) boxlo = domain->boxlo;
   else {
     boxlo = domain->boxlo_lamda;
     domain->x2lamda(atom->nlocal);
   }
   // extend size of per-atom arrays if necessary
 
   if (atom->nlocal > nmax) {
 
     if (function[0]) memory->destroy(part2grid);
     if (function[1] + function[2] + function[3]) memory->destroy(part2grid_6);
     nmax = atom->nmax;
     if (function[0]) memory->create(part2grid,nmax,3,"pppm/disp:part2grid");
     if (function[1] + function[2] + function[3]) 
       memory->create(part2grid_6,nmax,3,"pppm/disp:part2grid_6");
   }
 
 
   energy = 0.0;
   energy_1 = 0.0;
   energy_6 = 0.0;
   if (vflag) for (i = 0; i < 6; i++) virial_6[i] = virial_1[i] = 0.0;
 
   // find grid points for all my particles
   // distribute partcles' charges/dispersion coefficients on the grid
   // communication between processors and remapping two fft
   // Solution of poissons equation in k-space and backtransformation
   // communication between processors
   // calculation of forces
 
   if (function[0]) {
 
     //perfrom calculations for coulomb interactions only
 
     particle_map_c(delxinv, delyinv, delzinv, shift, part2grid, nupper, nlower,
                  nxlo_out, nylo_out, nzlo_out, nxhi_out, nyhi_out, nzhi_out);
 
     make_rho_c();
 
     cg->reverse_comm(this,REVERSE_RHO);
  
     brick2fft(nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in,
 	      density_brick, density_fft, work1,remap); 
  
     if (differentiation_flag == 1) {
 
       poisson_ad(work1, work2, density_fft, fft1, fft2,
                  nx_pppm, ny_pppm, nz_pppm, nfft,
                  nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft,
                  nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in,
                  energy_1, greensfn, 
                  virial_1, vg,vg2,
                  u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, v5_brick);
 
       cg->forward_comm(this,FORWARD_AD);
 
       fieldforce_c_ad(); 
 
       if (vflag_atom) cg_peratom->forward_comm(this, FORWARD_AD_PERATOM);
 
     } else {
       poisson_ik(work1, work2, density_fft, fft1, fft2,
                  nx_pppm, ny_pppm, nz_pppm, nfft,
                  nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft,
                  nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in,
                  energy_1, greensfn, 
 	         fkx, fky, fkz,fkx2, fky2, fkz2,
                  vdx_brick, vdy_brick, vdz_brick, virial_1, vg,vg2,
                  u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, v5_brick);
 
       cg->forward_comm(this, FORWARD_IK);
 
       fieldforce_c_ik(); 
 
       if (evflag_atom) cg_peratom->forward_comm(this, FORWARD_IK_PERATOM);
     }
     if (evflag_atom) fieldforce_c_peratom();
   }
 
   if (function[1]) {
     //perfrom calculations for geometric mixing
     particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6,
                  nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6);
     make_rho_g();
 
 
     cg_6->reverse_comm(this, REVERSE_RHO_G);
 
     brick2fft(nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
 	      density_brick_g, density_fft_g, work1_6,remap_6);
  
     if (differentiation_flag == 1) {
 
       poisson_ad(work1_6, work2_6, density_fft_g, fft1_6, fft2_6,
                  nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
                  nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
                  nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
                  energy_6, greensfn_6, 
                  virial_6, vg_6, vg2_6,
                  u_brick_g, v0_brick_g, v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g);
 
       cg_6->forward_comm(this,FORWARD_AD_G);
 
       fieldforce_g_ad();
 
       if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_G);
 
     } else {
       poisson_ik(work1_6, work2_6, density_fft_g, fft1_6, fft2_6,
                  nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
                  nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
                  nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
                  energy_6, greensfn_6, 
 	         fkx_6, fky_6, fkz_6,fkx2_6, fky2_6, fkz2_6,
                  vdx_brick_g, vdy_brick_g, vdz_brick_g, virial_6, vg_6, vg2_6,
                  u_brick_g, v0_brick_g, v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g);
  
       cg_6->forward_comm(this,FORWARD_IK_G);
  
       fieldforce_g_ik();
 
 
       if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_G);
     }
     if (evflag_atom) fieldforce_g_peratom();
   }
 
   if (function[2]) {
     //perform calculations for arithmetic mixing
     particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6,
                  nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6);
     make_rho_a();
 
     cg_6->reverse_comm(this, REVERSE_RHO_A);
 
     brick2fft_a();
 
     if ( differentiation_flag == 1) {
 
       poisson_ad(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6,
                  nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
                  nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
                  nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
                  energy_6, greensfn_6, 
                  virial_6, vg_6, vg2_6,
                  u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3);
       poisson_2s_ad(density_fft_a0, density_fft_a6,
                     u_brick_a0, v0_brick_a0, v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0,
                     u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6);
       poisson_2s_ad(density_fft_a1, density_fft_a5,
                     u_brick_a1, v0_brick_a1, v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1,
                     u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5);
       poisson_2s_ad(density_fft_a2, density_fft_a4,
                     u_brick_a2, v0_brick_a2, v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2,
                     u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4);
 
       cg_6->forward_comm(this, FORWARD_AD_A);
 
       fieldforce_a_ad();
 
       if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_AD_PERATOM_A);
 
     }  else {
     
       poisson_ik(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6,
                  nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
                  nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
                  nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
                  energy_6, greensfn_6, 
 	         fkx_6, fky_6, fkz_6,fkx2_6, fky2_6, fkz2_6,
                  vdx_brick_a3, vdy_brick_a3, vdz_brick_a3, virial_6, vg_6, vg2_6,
                  u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3);
       poisson_2s_ik(density_fft_a0, density_fft_a6,
                     vdx_brick_a0, vdy_brick_a0, vdz_brick_a0,
                     vdx_brick_a6, vdy_brick_a6, vdz_brick_a6,
                     u_brick_a0, v0_brick_a0, v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0,
                     u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6);
       poisson_2s_ik(density_fft_a1, density_fft_a5,
                     vdx_brick_a1, vdy_brick_a1, vdz_brick_a1,
                     vdx_brick_a5, vdy_brick_a5, vdz_brick_a5,
                     u_brick_a1, v0_brick_a1, v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1,
                     u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5);
       poisson_2s_ik(density_fft_a2, density_fft_a4,
                     vdx_brick_a2, vdy_brick_a2, vdz_brick_a2,
                     vdx_brick_a4, vdy_brick_a4, vdz_brick_a4,
                     u_brick_a2, v0_brick_a2, v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2,
                     u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4);
 
       cg_6->forward_comm(this, FORWARD_IK_A);
 
       fieldforce_a_ik();
 
       if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_A);
     }
     if (evflag_atom) fieldforce_a_peratom();
   }
 
   if (function[3]) {
     //perfrom calculations if no mixing rule applies
     particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6,
                  nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6);
 
     make_rho_none();
 
     cg_6->reverse_comm(this, REVERSE_RHO_NONE);
 
     brick2fft_none();
 
     if (differentiation_flag == 1) {
 
       int n = 0;
       for (int k = 0; k<nsplit_alloc/2; k++) {
         poisson_none_ad(n,n+1,density_fft_none[n],density_fft_none[n+1],
                         u_brick_none[n],u_brick_none[n+1],
                         v0_brick_none, v1_brick_none, v2_brick_none,
                         v3_brick_none, v4_brick_none, v5_brick_none);
         n += 2;
       }
 
       cg_6->forward_comm(this,FORWARD_AD_NONE);
 
       fieldforce_none_ad();
 
       if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_NONE);
 
     } else {
       int n = 0;
       for (int k = 0; k<nsplit_alloc/2; k++) {
 
         poisson_none_ik(n,n+1,density_fft_none[n], density_fft_none[n+1],
                         vdx_brick_none[n], vdy_brick_none[n], vdz_brick_none[n],
                         vdx_brick_none[n+1], vdy_brick_none[n+1], vdz_brick_none[n+1],
                         u_brick_none, v0_brick_none, v1_brick_none, v2_brick_none,
                         v3_brick_none, v4_brick_none, v5_brick_none);
         n += 2;
       }
 
       cg_6->forward_comm(this,FORWARD_IK_NONE);
 
       fieldforce_none_ik();
 
       if (evflag_atom) 
         cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_NONE);
     }
     if (evflag_atom) fieldforce_none_peratom();
   }
 
   // update qsum and qsqsum, if atom count has changed and energy needed
 
   if ((eflag_global || eflag_atom) && atom->natoms != natoms_original) {
     qsum_qsq();
     natoms_original = atom->natoms;
   }
 
   // sum energy across procs and add in volume-dependent term
 
   const double qscale = force->qqrd2e * scale;
   if (eflag_global) {
     double energy_all;
     MPI_Allreduce(&energy_1,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
     energy_1 = energy_all;
     MPI_Allreduce(&energy_6,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
     energy_6 = energy_all;
    
     energy_1 *= 0.5*volume;
     energy_6 *= 0.5*volume;
     
     energy_1 -= g_ewald*qsqsum/MY_PIS +
       MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
     energy_6 += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij +
       1.0/12.0*pow(g_ewald_6,6)*csum;
     energy_1 *= qscale;
   }
 
   // sum virial across procs
 
   if (vflag_global) {
     double virial_all[6];
     MPI_Allreduce(virial_1,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
     for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
     MPI_Allreduce(virial_6,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
     for (i = 0; i < 6; i++) virial[i] += 0.5*volume*virial_all[i];
     if (function[1]+function[2]+function[3]){
       double a =  MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij;
       virial[0] -= a;
       virial[1] -= a;
       virial[2] -= a;
     }
   }
 
   if (eflag_atom) {
     if (function[0]) {
       double *q = atom->q;
       for (i = 0; i < atom->nlocal; i++) {
         eatom[i] -= qscale*g_ewald*q[i]*q[i]/MY_PIS + qscale*MY_PI2*q[i]*qsum / (g_ewald*g_ewald*volume); //coulomb self energy correction
       }
     }
     if (function[1] + function[2] + function[3]) {
       int tmp;
       for (i = 0; i < atom->nlocal; i++) {
         tmp = atom->type[i];
         eatom[i] += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp] +
                       1.0/12.0*pow(g_ewald_6,6)*cii[tmp];
       }
     }
   }
             
   if (vflag_atom) {
     if (function[1] + function[2] + function[3]) {
       int tmp;
       for (i = 0; i < atom->nlocal; i++) {
         tmp = atom->type[i];
         for (int n = 0; n < 3; n++) vatom[i][n] -= MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp]; //dispersion self virial correction
       }
     }
   }
 
 
   // 2d slab correction
 
   if (slabflag) slabcorr(eflag);
   if (function[0]) energy += energy_1;
   if (function[1] + function[2] + function[3]) energy += energy_6;
 
   // convert atoms back from lamda to box coords
   
   if (triclinic) domain->lamda2x(atom->nlocal);
 }
 
 /* ----------------------------------------------------------------------
    initialize coefficients needed for the dispersion density on the grids
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::init_coeffs()				// local pair coeffs
 {
   int tmp;
   int n = atom->ntypes;
   int converged;
   delete [] B;
   B = NULL;
   if (function[3] + function[2]) {                     // no mixing rule or arithmetic
     if (function[2] && me == 0) {
       if (screen) fprintf(screen,"  Optimizing splitting of Dispersion coefficients\n");
       if (logfile) fprintf(logfile,"  Optimizing splitting of Dispersion coefficients\n");
     }
 
     // allocate data for eigenvalue decomposition
     double **A=NULL;
     double **Q=NULL;
     if ( n > 1 ) {
       // get dispersion coefficients
       double **b = (double **) force->pair->extract("B",tmp);
       memory->create(A,n,n,"pppm/disp:A");
       memory->create(Q,n,n,"pppm/disp:Q");
       // fill coefficients to matrix a
       for (int i = 1; i <= n; i++)
         for (int j = 1; j <= n; j++)
           A[i-1][j-1] = b[i][j];
       // transform q to a unity matrix
       for (int i = 0; i < n; i++)
         for (int j = 0; j < n; j++)
           Q[i][j] = 0.0;
       for (int i = 0; i < n; i++)
         Q[i][i] = 1.0;
       // perfrom eigenvalue decomposition with QR algorithm
       converged = qr_alg(A,Q,n);
       if (function[3] && !converged) {
         error->all(FLERR,"Matrix factorization to split dispersion coefficients failed");
       }
       // determine number of used eigenvalues 
       //   based on maximum allowed number or cutoff criterion
       //   sort eigenvalues according to their size with bubble sort
       double t;
       for (int i = 0; i < n; i++) {
         for (int j = 0; j < n-1-i; j++) {
           if (fabs(A[j][j]) < fabs(A[j+1][j+1])) {
             t = A[j][j];
             A[j][j] = A[j+1][j+1];
             A[j+1][j+1] = t;
             for (int k = 0; k < n; k++) {
               t = Q[k][j];
               Q[k][j] = Q[k][j+1];
               Q[k][j+1] = t;
             }
           }
         }
       }
 
       //   check which eigenvalue is the first that is smaller
       //   than a specified tolerance
       //   check how many are maximum allowed by the user
       double amax = fabs(A[0][0]);
       double acrit = amax*splittol;
       double bmax = 0;
       double err = 0;
       nsplit = 0;
       for (int i = 0; i < n; i++) {
         if (fabs(A[i][i]) > acrit) nsplit++;
         else {
           bmax = fabs(A[i][i]);
           break;
         }
       }
 
       err =  bmax/amax;
       if (err > 1.0e-4) {
         char str[128];
         sprintf(str,"Estimated error in splitting of dispersion coeffs is %g",err);
         error->warning(FLERR, str);
       }
       // set B
       B = new double[nsplit*n+nsplit];
       for (int i = 0; i< nsplit; i++) {
         B[i] = A[i][i];
         for (int j = 0; j < n; j++) {
           B[nsplit*(j+1) + i] = Q[j][i];
         }
       }
 
       nsplit_alloc = nsplit;
       if (nsplit%2 == 1) nsplit_alloc = nsplit + 1;
     } else
         nsplit = 1; // use geometric mixing
 
     // check if the function should preferably be [1] or [2] or [3]
     if (nsplit == 1) {
       if ( B ) delete [] B;
       function[3] = 0;
       function[2] = 0;
       function[1] = 1;
       if (me == 0) {
         if (screen) fprintf(screen,"  Using geometric mixing for reciprocal space\n");
         if (logfile) fprintf(logfile,"  Using geometric mixing for reciprocal space\n");
       }
     }
     if (function[2] && nsplit <= 6) {
       if (me == 0) {
         if (screen) fprintf(screen,"  Using %d instead of 7 structure factors\n",nsplit);
         if (logfile) fprintf(logfile,"  Using %d instead of 7 structure factors\n",nsplit);
       }
       function[3] = 1;
       function[2] = 0;
     }
     if (function[2] && (nsplit > 6)) {
       if (me == 0) {
         if (screen) fprintf(screen,"  Using 7 structure factors\n");
         if (logfile) fprintf(logfile,"  Using 7 structure factors\n");
       }
       if ( B ) delete [] B;
     }
     if (function[3]) {
       if (me == 0) {
         if (screen) fprintf(screen,"  Using %d structure factors\n",nsplit);
         if (logfile) fprintf(logfile,"  Using %d structure factors\n",nsplit);
       }
       if (nsplit > 9) error->warning(FLERR, "Simulations might be very slow because of large number of structure factors");
     }
 
     memory->destroy(A);
     memory->destroy(Q);
   }
   if (function[1]) {					// geometric 1/r^6
     double **b = (double **) force->pair->extract("B",tmp);
     B = new double[n+1];
     for (int i=0; i<=n; ++i) B[i] = sqrt(fabs(b[i][i]));
   }
   if (function[2]) {					// arithmetic 1/r^6
     //cannot use epsilon, because this has not been set yet
     double **epsilon = (double **) force->pair->extract("epsilon",tmp);  
     //cannot use sigma, because this has not been set yet
     double **sigma = (double **) force->pair->extract("sigma",tmp);  
     if (!(epsilon&&sigma))
       error->all(FLERR,"Epsilon or sigma reference not set by pair style in PPPMDisp");
     double eps_i, sigma_i, sigma_n, *bi = B = new double[7*n+7];
     double c[7] = {
       1.0, sqrt(6.0), sqrt(15.0), sqrt(20.0), sqrt(15.0), sqrt(6.0), 1.0};
     for (int i=0; i<=n; ++i) {
       eps_i = sqrt(epsilon[i][i]);
       sigma_i = sigma[i][i];
       sigma_n = 1.0;
       for (int j=0; j<7; ++j) {
         *(bi++) = sigma_n*eps_i*c[j]*0.25;
         sigma_n *= sigma_i;
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    Eigenvalue decomposition of a real, symmetric matrix with the QR
    method (includes transpformation to Tridiagonal Matrix + Wilkinson
    shift)
 ------------------------------------------------------------------------- */
 
 int PPPMDisp::qr_alg(double **A, double **Q, int n)
 {
   int converged = 0;
   double an1, an, bn1, d, mue;
   // allocate some memory for the required operations
   double **A0,**Qi,**C,**D,**E;
   // make a copy of A for convergence check
   memory->create(A0,n,n,"pppm/disp:A0");
   for (int i = 0; i < n; i++)
     for (int j = 0; j < n; j++)
       A0[i][j] = A[i][j];
 
   // allocate an auxiliary matrix Qi
   memory->create(Qi,n,n,"pppm/disp:Qi");
 
   // alllocate an auxillary matrices for the matrix multiplication
   memory->create(C,n,n,"pppm/disp:C");
   memory->create(D,n,n,"pppm/disp:D");
   memory->create(E,n,n,"pppm/disp:E");
 
   // transform Matrix A to Tridiagonal form
   hessenberg(A,Q,n);
 
   // start loop for the matrix factorization
   int count = 0;
   int countmax = 100000;
   while (1) {
     // make a Wilkinson shift
     an1 = A[n-2][n-2];
     an = A[n-1][n-1];
     bn1 = A[n-2][n-1];
     d = (an1-an)/2;
     mue = an + d - copysign(1.,d)*sqrt(d*d + bn1*bn1);
     for (int i = 0; i < n; i++) 
       A[i][i] -= mue;
 
     // perform a QR factorization for a tridiagonal matrix A
     qr_tri(Qi,A,n);
 
     // update the matrices
     mmult(A,Qi,C,n);
     mmult(Q,Qi,C,n);
 
     // backward Wilkinson shift
     for (int i = 0; i < n; i++)
       A[i][i] += mue;
 
     // check the convergence
     converged = check_convergence(A,Q,A0,C,D,E,n);
     if (converged) break;
     count = count + 1;
     if (count == countmax) break;
   }
   
   // free allocated memory
   memory->destroy(Qi);
   memory->destroy(A0);
   memory->destroy(C);
   memory->destroy(D);
   memory->destroy(E);
   
   return converged;
 }
 
 /* ----------------------------------------------------------------------
    Transform a Matrix to Hessenberg form (for symmetric Matrices, the 
    result will be a tridiagonal matrix)
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::hessenberg(double **A, double **Q, int n)
 {
   double r,a,b,c,s,x1,x2;
   for (int i = 0; i < n-1; i++) {
     for (int j = i+2; j < n; j++) {
       // compute coeffs for the rotation matrix
       a = A[i+1][i];
       b = A[j][i];
       r = sqrt(a*a + b*b);
       c = a/r;
       s = b/r;
       // update the entries of A with multiplication from the left
       for (int k = 0; k < n; k++) {
         x1 = A[i+1][k];
         x2 = A[j][k];
         A[i+1][k] = c*x1 + s*x2;
         A[j][k] = -s*x1 + c*x2;
       }
       // update the entries of A and Q with a multiplication from the right
       for (int k = 0; k < n; k++) {
         x1 = A[k][i+1];
         x2 = A[k][j];
         A[k][i+1] = c*x1 + s*x2;
         A[k][j] = -s*x1 + c*x2;
         x1 = Q[k][i+1];
         x2 = Q[k][j];
         Q[k][i+1] = c*x1 + s*x2;
         Q[k][j] = -s*x1 + c*x2;
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    QR factorization for a tridiagonal matrix; Result of the factorization
    is stored in A and Qi
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::qr_tri(double** Qi,double** A,int n)
 {
   double r,a,b,c,s,x1,x2;
   int j,k,k0,kmax;
   // make Qi a unity matrix
   for (int i = 0; i < n; i++)
     for (int j = 0; j < n; j++)
       Qi[i][j] = 0.0;
   for (int i = 0; i < n; i++)
     Qi[i][i] = 1.0;
   // loop over main diagonal and first of diagonal of A
   for (int i = 0; i < n-1; i++) {
     j = i+1;
     // coefficients of the rotation matrix
     a = A[i][i];
     b = A[j][i];
     r = sqrt(a*a + b*b);
     c = a/r;
     s = b/r;
     // update the entries of A and Q
     k0 = (i-1>0)?i-1:0;   //min(i-1,0);
     kmax = (i+3<n)?i+3:n;  //min(i+3,n);
     for (k = k0; k < kmax; k++) {
       x1 = A[i][k];
       x2 = A[j][k];
       A[i][k] = c*x1 + s*x2;
       A[j][k] = -s*x1 + c*x2;
     }
     for (k = 0; k < n; k++) {
       x1 = Qi[k][i];
       x2 = Qi[k][j];
       Qi[k][i] = c*x1 + s*x2;
       Qi[k][j] = -s*x1 + c*x2;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    Multiply two matrices A and B, store the result in A; C provides
    some memory to store intermediate results
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::mmult(double** A, double** B, double** C, int n)
 {
   for (int i = 0; i < n; i++)
     for (int j = 0; j < n; j++)
       C[i][j] = 0.0;
 
   // perform matrix multiplication 
   for (int i = 0; i < n; i++)
     for (int j = 0; j < n; j++)
       for (int k = 0; k < n; k++)
         C[i][j] += A[i][k] * B[k][j];
   // copy the result back to matrix A
   for (int i = 0; i < n; i++)
     for (int j = 0; j < n; j++)
       A[i][j] = C[i][j];
 }
 
 /* ----------------------------------------------------------------------
    Check if the factorization has converged by comparing all elements of the
    original matrix and the new matrix
 ------------------------------------------------------------------------- */
 
 int PPPMDisp::check_convergence(double** A,double** Q,double** A0,
                                 double** C,double** D,double** E,int n)
 {
   double eps = 1.0e-8;
   int converged = 1;
   double epsmax = -1;
   double Bmax = 0.0;
   double diff;
   // get the largest eigenvalue of the original matrix
   for (int i = 0; i < n; i++)
     for (int j = 0; j < n; j++)
       Bmax = (Bmax>A0[i][j])?Bmax:A0[i][j];  //max(Bmax,A0[i][j]);
   double epsabs = eps*Bmax;
   
   // reconstruct the original matrix
   // store the diagonal elements in D
   for (int i = 0; i < n; i++)
     for (int j = 0; j < n; j++)
       D[i][j] = 0.0;
   for (int i = 0; i < n; i++)
     D[i][i] = A[i][i];
   // store matrix Q in E
   for (int i = 0; i < n; i++)
     for (int j = 0; j < n; j++)
       E[i][j] = Q[i][j];
   // E = Q*A
   mmult(E,D,C,n);
   // store transpose of Q in D
   for (int i = 0; i < n; i++)
     for (int j = 0; j < n; j++)
       D[i][j] = Q[j][i];
   // E = Q*A*Q.t
   mmult(E,D,C,n);
 
   //compare the original matrix and the final matrix
   for (int i = 0; i < n; i++) {
     for (int j = 0; j < n; j++) {
       diff = A0[i][j] - E[i][j];
       epsmax = (epsmax>fabs(diff))?epsmax:fabs(diff);//max(epsmax,fabs(diff));
     }
   }
   if (epsmax > epsabs) converged = 0;
   return converged;
 }
 
 /* ----------------------------------------------------------------------
    allocate memory that depends on # of K-vectors and order 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::allocate()
 {
 
   int (*procneigh)[2] = comm->procneigh;
 
   if (function[0]) {
     memory->create(work1,2*nfft_both,"pppm/disp:work1");
     memory->create(work2,2*nfft_both,"pppm/disp:work2");
 
     memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm/disp:fkx");
     memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm/disp:fky");
     memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm/disp:fkz");
 
     memory->create1d_offset(fkx2,nxlo_fft,nxhi_fft,"pppm/disp:fkx2");
     memory->create1d_offset(fky2,nylo_fft,nyhi_fft,"pppm/disp:fky2");
     memory->create1d_offset(fkz2,nzlo_fft,nzhi_fft,"pppm/disp:fkz2");
 
 
     memory->create(gf_b,order,"pppm/disp:gf_b");
     memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm/disp:rho1d");
     memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm/disp:rho_coeff");
     memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm/disp:rho1d");
     memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2,"pppm/disp:drho_coeff");
 
     memory->create(greensfn,nfft_both,"pppm/disp:greensfn");
     memory->create(vg,nfft_both,6,"pppm/disp:vg");
     memory->create(vg2,nfft_both,3,"pppm/disp:vg2");
 
     memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
   			    nxlo_out,nxhi_out,"pppm/disp:density_brick");
     if ( differentiation_flag == 1) {
       memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
   		  	      nxlo_out,nxhi_out,"pppm/disp:u_brick");
       memory->create(sf_precoeff1,nfft_both,"pppm/disp:sf_precoeff1");
       memory->create(sf_precoeff2,nfft_both,"pppm/disp:sf_precoeff2");
       memory->create(sf_precoeff3,nfft_both,"pppm/disp:sf_precoeff3");
       memory->create(sf_precoeff4,nfft_both,"pppm/disp:sf_precoeff4");
       memory->create(sf_precoeff5,nfft_both,"pppm/disp:sf_precoeff5");
       memory->create(sf_precoeff6,nfft_both,"pppm/disp:sf_precoeff6");
 
     } else {
       memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
   			      nxlo_out,nxhi_out,"pppm/disp:vdx_brick");
       memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
 			      nxlo_out,nxhi_out,"pppm/disp:vdy_brick");
       memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
 			      nxlo_out,nxhi_out,"pppm/disp:vdz_brick");
     }
     memory->create(density_fft,nfft_both,"pppm/disp:density_fft");
 
     int tmp;
 
     fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
 		     nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
 		     nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
 		     0,0,&tmp,collective_flag);
 
     fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
 		     nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
 		     nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
 		     0,0,&tmp,collective_flag);
 
     remap = new Remap(lmp,world,
 		      nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
 		      nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
 		      1,0,0,FFT_PRECISION,collective_flag);
 
   // create ghost grid object for rho and electric field communication
 
   if (differentiation_flag == 1)
     cg = new GridComm(lmp,world,1,1,
                       nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                       nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                       procneigh[0][0],procneigh[0][1],procneigh[1][0],
                       procneigh[1][1],procneigh[2][0],procneigh[2][1]);
   else
     cg = new GridComm(lmp,world,3,1,
                       nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                       nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                       procneigh[0][0],procneigh[0][1],procneigh[1][0],
                       procneigh[1][1],procneigh[2][0],procneigh[2][1]);
   }
 
   if (function[1]) {
     memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6");
     memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6");
 
     memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6");
     memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6");
     memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6");
 
     memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6");
     memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6");
     memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6");
 
     memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6");
     memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6");
     memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6");
     memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6");
     memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6");
 
     memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6");
     memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6");
     memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6");
 
     memory->create3d_offset(density_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_g");
     if ( differentiation_flag == 1) {
       memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g");
 
       memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6");
       memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6");
       memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6");
       memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6");
       memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6");
       memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6");
 
     }  else {
       memory->create3d_offset(vdx_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_g");
       memory->create3d_offset(vdy_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_g");
       memory->create3d_offset(vdz_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_g");
     }
     memory->create(density_fft_g,nfft_both_6,"pppm/disp:density_fft_g");
 
 
     int tmp;
 
     fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
 		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
 		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
 		     0,0,&tmp,collective_flag);
 
     fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
 		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
 		     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
 		     0,0,&tmp,collective_flag);
 
     remap_6 = new Remap(lmp,world,
 		      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
 		      nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
 		      1,0,0,FFT_PRECISION,collective_flag);
 
     // create ghost grid object for rho and electric field communication
 
     if (differentiation_flag == 1)
       cg_6 = new GridComm(lmp,world,1,1,
                         nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                         nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                         procneigh[0][0],procneigh[0][1],procneigh[1][0],
                         procneigh[1][1],procneigh[2][0],procneigh[2][1]);
     else
       cg_6 = new GridComm(lmp,world,3,1,
                         nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                         nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                         procneigh[0][0],procneigh[0][1],procneigh[1][0],
                         procneigh[1][1],procneigh[2][0],procneigh[2][1]);
   }
 
   if (function[2]) {
     memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6");
     memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6");
 
     memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6");
     memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6");
     memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6");
 
     memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6");
     memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6");
     memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6");
 
     memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6");
     memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6");
     memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6");
     memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6");
     memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6");
 
     memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6");
     memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6");
     memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6");
 
     memory->create3d_offset(density_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a0");
     memory->create3d_offset(density_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a1");
     memory->create3d_offset(density_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a2");
     memory->create3d_offset(density_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a3");
     memory->create3d_offset(density_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a4");
     memory->create3d_offset(density_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a5");
     memory->create3d_offset(density_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a6");
 
     memory->create(density_fft_a0,nfft_both_6,"pppm/disp:density_fft_a0");
     memory->create(density_fft_a1,nfft_both_6,"pppm/disp:density_fft_a1");
     memory->create(density_fft_a2,nfft_both_6,"pppm/disp:density_fft_a2");
     memory->create(density_fft_a3,nfft_both_6,"pppm/disp:density_fft_a3");
     memory->create(density_fft_a4,nfft_both_6,"pppm/disp:density_fft_a4");
     memory->create(density_fft_a5,nfft_both_6,"pppm/disp:density_fft_a5");
     memory->create(density_fft_a6,nfft_both_6,"pppm/disp:density_fft_a6");
 
 
     if ( differentiation_flag == 1 ) {
       memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a0");
       memory->create3d_offset(u_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a1");
       memory->create3d_offset(u_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a2");
       memory->create3d_offset(u_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a3");
       memory->create3d_offset(u_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a4");
       memory->create3d_offset(u_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a5");
       memory->create3d_offset(u_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a6");
 
       memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6");
       memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6");
       memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6");
       memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6");
       memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6");
       memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6");
 
     } else {
 
       memory->create3d_offset(vdx_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a0");
       memory->create3d_offset(vdy_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a0");
       memory->create3d_offset(vdz_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a0");
 
       memory->create3d_offset(vdx_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a1");
       memory->create3d_offset(vdy_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a1");
       memory->create3d_offset(vdz_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a1");
 
       memory->create3d_offset(vdx_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a2");
       memory->create3d_offset(vdy_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a2");
       memory->create3d_offset(vdz_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a2");
 
       memory->create3d_offset(vdx_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a3");
       memory->create3d_offset(vdy_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a3");
       memory->create3d_offset(vdz_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a3");
 
       memory->create3d_offset(vdx_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a4");
       memory->create3d_offset(vdy_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a4");
       memory->create3d_offset(vdz_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a4");
 
       memory->create3d_offset(vdx_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a5");
       memory->create3d_offset(vdy_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a5");
       memory->create3d_offset(vdz_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a5");
 
       memory->create3d_offset(vdx_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a6");
       memory->create3d_offset(vdy_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a6");
       memory->create3d_offset(vdz_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a6");
     }
 
 
 
     int tmp;
 
     fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
 		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
 		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
 		     0,0,&tmp,collective_flag);
 
     fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
 		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
 		     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
 		     0,0,&tmp,collective_flag);
 
     remap_6 = new Remap(lmp,world,
 		      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
 		      nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
 		      1,0,0,FFT_PRECISION,collective_flag);
 
     // create ghost grid object for rho and electric field communication
 
 
     if (differentiation_flag == 1)
       cg_6 = new GridComm(lmp,world,7,7,
                         nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                         nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                         procneigh[0][0],procneigh[0][1],procneigh[1][0],
                         procneigh[1][1],procneigh[2][0],procneigh[2][1]);
     else
       cg_6 = new GridComm(lmp,world,21,7,
                         nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                         nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                         procneigh[0][0],procneigh[0][1],procneigh[1][0],
                         procneigh[1][1],procneigh[2][0],procneigh[2][1]);
   }  
 
   if (function[3]) {
     memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6");
     memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6");
 
     memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6");
     memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6");
     memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6");
 
     memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6");
     memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6");
     memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6");
 
     memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6");
     memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6");
     memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6");
     memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6");
     memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6");
 
     memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6");
     memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6");
     memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6");
 
     memory->create4d_offset(density_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_none");
     if ( differentiation_flag == 1) {
       memory->create4d_offset(u_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_none");
 
       memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6");
       memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6");
       memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6");
       memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6");
       memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6");
       memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6");
 
     }  else {
       memory->create4d_offset(vdx_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_none");
       memory->create4d_offset(vdy_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_none");
       memory->create4d_offset(vdz_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_none");
     }
     memory->create(density_fft_none,nsplit_alloc,nfft_both_6,"pppm/disp:density_fft_none");
 
 
     int tmp;
 
     fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
 		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
 		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
 		     0,0,&tmp,collective_flag);
 
     fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
 		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
 		     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
 		     0,0,&tmp,collective_flag);
 
     remap_6 = new Remap(lmp,world,
 		      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
 		      nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
 		      1,0,0,FFT_PRECISION,collective_flag);
 
     // create ghost grid object for rho and electric field communication
 
     if (differentiation_flag == 1)
       cg_6 = new GridComm(lmp,world,nsplit_alloc,nsplit_alloc,
                         nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                         nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                         procneigh[0][0],procneigh[0][1],procneigh[1][0],
                         procneigh[1][1],procneigh[2][0],procneigh[2][1]);
     else
       cg_6 = new GridComm(lmp,world,3*nsplit_alloc,nsplit_alloc,
                         nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                         nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                         procneigh[0][0],procneigh[0][1],procneigh[1][0],
                         procneigh[1][1],procneigh[2][0],procneigh[2][1]);
   }
 
 }
 
 /* ----------------------------------------------------------------------
    allocate memory that depends on # of K-vectors and order
    for per atom calculations 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::allocate_peratom()
 {
 
   int (*procneigh)[2] = comm->procneigh;
 
   if (function[0]) {
 
     if (differentiation_flag != 1)
       memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
     	                      nxlo_out,nxhi_out,"pppm/disp:u_brick");
 
     memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
 			    nxlo_out,nxhi_out,"pppm/disp:v0_brick");
     memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
   			    nxlo_out,nxhi_out,"pppm/disp:v1_brick");
     memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
   			    nxlo_out,nxhi_out,"pppm/disp:v2_brick");
     memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
   			    nxlo_out,nxhi_out,"pppm/disp:v3_brick");
     memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
   			    nxlo_out,nxhi_out,"pppm/disp:v4_brick");
     memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
   			    nxlo_out,nxhi_out,"pppm/disp:v5_brick");
 
     // create ghost grid object for rho and electric field communication
 
     if (differentiation_flag == 1)
       cg_peratom =
         new GridComm(lmp,world,6,1,
                      nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                      nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
                      procneigh[1][1],procneigh[2][0],procneigh[2][1]);
     else
       cg_peratom =
         new GridComm(lmp,world,7,1,
                      nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                      nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
                      procneigh[1][1],procneigh[2][0],procneigh[2][1]);
 
   }
 
 
   if (function[1]) {
 
     if ( differentiation_flag != 1 )
       memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g");
 
     memory->create3d_offset(v0_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_g");
     memory->create3d_offset(v1_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_g");
     memory->create3d_offset(v2_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_g");
     memory->create3d_offset(v3_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_g");
     memory->create3d_offset(v4_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_g");
     memory->create3d_offset(v5_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_g");
 
     // create ghost grid object for rho and electric field communication
 
     if (differentiation_flag == 1)
       cg_peratom_6 =
         new GridComm(lmp,world,6,1,
                      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                      nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
                      procneigh[1][1],procneigh[2][0],procneigh[2][1]);
     else
       cg_peratom_6 =
         new GridComm(lmp,world,7,1,
                      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                      nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
                      procneigh[1][1],procneigh[2][0],procneigh[2][1]);
 
   }
 
   if (function[2]) {
    
     if ( differentiation_flag != 1 ) {
       memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a0");
       memory->create3d_offset(u_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a1");
       memory->create3d_offset(u_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a2");
       memory->create3d_offset(u_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a3");
       memory->create3d_offset(u_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a4");
       memory->create3d_offset(u_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a5");
       memory->create3d_offset(u_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a6");
     }
 
     memory->create3d_offset(v0_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a0");
     memory->create3d_offset(v1_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
     	                        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a0");
     memory->create3d_offset(v2_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a0");
     memory->create3d_offset(v3_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a0");
     memory->create3d_offset(v4_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a0");
     memory->create3d_offset(v5_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a0");
 
     memory->create3d_offset(v0_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a1");
     memory->create3d_offset(v1_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
    	                        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a1");
     memory->create3d_offset(v2_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a1");
     memory->create3d_offset(v3_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a1");
     memory->create3d_offset(v4_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   	  	                nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a1");
     memory->create3d_offset(v5_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a1");
 
     memory->create3d_offset(v0_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a2");
     memory->create3d_offset(v1_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a2");
     memory->create3d_offset(v2_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a2");
     memory->create3d_offset(v3_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a2");
     memory->create3d_offset(v4_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a2");
     memory->create3d_offset(v5_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a2");
 
     memory->create3d_offset(v0_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a3");
     memory->create3d_offset(v1_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a3");
     memory->create3d_offset(v2_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a3");
     memory->create3d_offset(v3_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   	  	                nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a3");
     memory->create3d_offset(v4_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a3");
     memory->create3d_offset(v5_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a3");
 
     memory->create3d_offset(v0_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a4");
     memory->create3d_offset(v1_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a4");
     memory->create3d_offset(v2_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a4");
     memory->create3d_offset(v3_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a4");
     memory->create3d_offset(v4_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a4");
     memory->create3d_offset(v5_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a4");
 
     memory->create3d_offset(v0_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a5");
     memory->create3d_offset(v1_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a5");
     memory->create3d_offset(v2_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a5");
     memory->create3d_offset(v3_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a5");
     memory->create3d_offset(v4_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a5");
     memory->create3d_offset(v5_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a5");
 
     memory->create3d_offset(v0_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   	  	                nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a6");
     memory->create3d_offset(v1_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a6");
     memory->create3d_offset(v2_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a6");
     memory->create3d_offset(v3_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a6");
     memory->create3d_offset(v4_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a6");
     memory->create3d_offset(v5_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a6");
 
     // create ghost grid object for rho and electric field communication
 
     if (differentiation_flag == 1)
       cg_peratom_6 =
         new GridComm(lmp,world,42,1,
                      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                      nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
                      procneigh[1][1],procneigh[2][0],procneigh[2][1]);
     else
       cg_peratom_6 =
         new GridComm(lmp,world,49,1,
                      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                      nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
                      procneigh[1][1],procneigh[2][0],procneigh[2][1]);
 
   }  
 
   if (function[3]) {
 
     if ( differentiation_flag != 1 )
       memory->create4d_offset(u_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_none");
 
     memory->create4d_offset(v0_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_none");
     memory->create4d_offset(v1_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_none");
     memory->create4d_offset(v2_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_none");
     memory->create4d_offset(v3_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_none");
     memory->create4d_offset(v4_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_none");
     memory->create4d_offset(v5_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_none");
 
     // create ghost grid object for rho and electric field communication
 
     if (differentiation_flag == 1)
       cg_peratom_6 =
         new GridComm(lmp,world,6*nsplit_alloc,1,
                      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                      nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
                      procneigh[1][1],procneigh[2][0],procneigh[2][1]);
     else
       cg_peratom_6 =
         new GridComm(lmp,world,7*nsplit_alloc,1,
                      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                      nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
                      procneigh[1][1],procneigh[2][0],procneigh[2][1]);
 
   }
 }
 
 
 /* ----------------------------------------------------------------------
    deallocate memory that depends on # of K-vectors and order 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::deallocate()
 {
   memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out);
   memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out);
   memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out);
   memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out);
   memory->destroy(density_fft);
   density_brick = vdx_brick = vdy_brick = vdz_brick = NULL;
   density_fft = NULL;
 
   memory->destroy3d_offset(density_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdx_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdy_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdz_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy(density_fft_g);
   density_brick_g = vdx_brick_g = vdy_brick_g = vdz_brick_g = NULL;
   density_fft_g = NULL;
 
   memory->destroy3d_offset(density_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdx_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdy_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdz_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy(density_fft_a0);
   density_brick_a0 = vdx_brick_a0 = vdy_brick_a0 = vdz_brick_a0 = NULL;
   density_fft_a0 = NULL;
 
   memory->destroy3d_offset(density_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdx_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdy_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdz_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy(density_fft_a1);
   density_brick_a1 = vdx_brick_a1 = vdy_brick_a1 = vdz_brick_a1 = NULL;
   density_fft_a1 = NULL;
 
   memory->destroy3d_offset(density_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdx_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdy_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdz_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy(density_fft_a2);
   density_brick_a2 = vdx_brick_a2 = vdy_brick_a2 = vdz_brick_a2 = NULL;
   density_fft_a2 = NULL;
 
   memory->destroy3d_offset(density_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdx_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdy_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdz_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy(density_fft_a3);
   density_brick_a3 = vdx_brick_a3 = vdy_brick_a3 = vdz_brick_a3 = NULL;
   density_fft_a3 = NULL;
  
   memory->destroy3d_offset(density_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdx_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdy_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdz_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy(density_fft_a4);
   density_brick_a4 = vdx_brick_a4 = vdy_brick_a4 = vdz_brick_a4 = NULL;
   density_fft_a4 = NULL;
 
   memory->destroy3d_offset(density_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdx_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdy_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdz_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy(density_fft_a5);
   density_brick_a5 = vdx_brick_a5 = vdy_brick_a5 = vdz_brick_a5 = NULL;
   density_fft_a5 = NULL;
 
   memory->destroy3d_offset(density_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdx_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdy_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdz_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy(density_fft_a6);
   density_brick_a6 = vdx_brick_a6 = vdy_brick_a6 = vdz_brick_a6 = NULL;
   density_fft_a6 = NULL;
 
   memory->destroy4d_offset(density_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy4d_offset(vdx_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy4d_offset(vdy_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy4d_offset(vdz_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy(density_fft_none);
   density_brick_none = vdx_brick_none = vdy_brick_none = vdz_brick_none = NULL;
   density_fft_none = NULL;
 
   memory->destroy(sf_precoeff1);
   memory->destroy(sf_precoeff2);
   memory->destroy(sf_precoeff3);
   memory->destroy(sf_precoeff4);
   memory->destroy(sf_precoeff5);
   memory->destroy(sf_precoeff6);
   sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = sf_precoeff4 = sf_precoeff5 = sf_precoeff6 = NULL;
 
   memory->destroy(sf_precoeff1_6);
   memory->destroy(sf_precoeff2_6);
   memory->destroy(sf_precoeff3_6);
   memory->destroy(sf_precoeff4_6);
   memory->destroy(sf_precoeff5_6);
   memory->destroy(sf_precoeff6_6);
   sf_precoeff1_6 = sf_precoeff2_6 = sf_precoeff3_6 = sf_precoeff4_6 = sf_precoeff5_6 = sf_precoeff6_6 = NULL;
 
   memory->destroy(greensfn);
   memory->destroy(greensfn_6);
   memory->destroy(work1);
   memory->destroy(work2);
   memory->destroy(work1_6);
   memory->destroy(work2_6);
   memory->destroy(vg);
   memory->destroy(vg2);
   memory->destroy(vg_6);
   memory->destroy(vg2_6);
   greensfn = greensfn_6 = NULL;
   work1 = work2 = work1_6 = work2_6 = NULL;
   vg = vg2 = vg_6 = vg2_6 = NULL;
 
   memory->destroy1d_offset(fkx,nxlo_fft);
   memory->destroy1d_offset(fky,nylo_fft);
   memory->destroy1d_offset(fkz,nzlo_fft);
   fkx = fky = fkz = NULL;
 
   memory->destroy1d_offset(fkx2,nxlo_fft);
   memory->destroy1d_offset(fky2,nylo_fft);
   memory->destroy1d_offset(fkz2,nzlo_fft);
   fkx2 = fky2 = fkz2 = NULL;
 
   memory->destroy1d_offset(fkx_6,nxlo_fft_6);
   memory->destroy1d_offset(fky_6,nylo_fft_6);
   memory->destroy1d_offset(fkz_6,nzlo_fft_6);
   fkx_6 = fky_6 = fkz_6 = NULL;
 
   memory->destroy1d_offset(fkx2_6,nxlo_fft_6);
   memory->destroy1d_offset(fky2_6,nylo_fft_6);
   memory->destroy1d_offset(fkz2_6,nzlo_fft_6);
   fkx2_6 = fky2_6 = fkz2_6 = NULL;
 
 
   memory->destroy(gf_b);
   memory->destroy2d_offset(rho1d,-order/2);
   memory->destroy2d_offset(rho_coeff,(1-order)/2);
   memory->destroy2d_offset(drho1d,-order/2);
   memory->destroy2d_offset(drho_coeff, (1-order)/2);
   gf_b = NULL;
   rho1d = rho_coeff = drho1d = drho_coeff = NULL;
 
   memory->destroy(gf_b_6);
   memory->destroy2d_offset(rho1d_6,-order_6/2);
   memory->destroy2d_offset(rho_coeff_6,(1-order_6)/2);
   memory->destroy2d_offset(drho1d_6,-order_6/2); 
   memory->destroy2d_offset(drho_coeff_6,(1-order_6)/2);
   gf_b_6 = NULL;
   rho1d_6 = rho_coeff_6 = drho1d_6 = drho_coeff_6 = NULL;
 
   delete fft1;
   delete fft2;
   delete remap;
   delete cg;
   fft1 = fft2 = NULL;
   remap = NULL;
   cg = NULL;
 
   delete fft1_6;
   delete fft2_6;
   delete remap_6;
   delete cg_6;
   fft1_6 = fft2_6 = NULL;
   remap_6 = NULL;
   cg_6 = NULL;
 }
 
 
 /* ----------------------------------------------------------------------
    deallocate memory that depends on # of K-vectors and order
    for per atom calculations 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::deallocate_peratom()
 {
   peratom_allocate_flag = 0;
 
   memory->destroy3d_offset(u_brick, nzlo_out, nylo_out, nxlo_out);
   memory->destroy3d_offset(v0_brick, nzlo_out, nylo_out, nxlo_out);
   memory->destroy3d_offset(v1_brick, nzlo_out, nylo_out, nxlo_out);
   memory->destroy3d_offset(v2_brick, nzlo_out, nylo_out, nxlo_out);
   memory->destroy3d_offset(v3_brick, nzlo_out, nylo_out, nxlo_out);
   memory->destroy3d_offset(v4_brick, nzlo_out, nylo_out, nxlo_out);
   memory->destroy3d_offset(v5_brick, nzlo_out, nylo_out, nxlo_out);
   u_brick = v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL;
 
   memory->destroy3d_offset(u_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v0_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v1_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v2_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v3_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v4_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v5_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
   u_brick_g = v0_brick_g = v1_brick_g = v2_brick_g = v3_brick_g = v4_brick_g = v5_brick_g = NULL;
 
   memory->destroy3d_offset(u_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v0_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v1_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v2_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v3_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v4_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v5_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
   u_brick_a0 = v0_brick_a0 = v1_brick_a0 = v2_brick_a0 = v3_brick_a0 = v4_brick_a0 = v5_brick_a0 = NULL;
 
   memory->destroy3d_offset(u_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v0_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v1_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v2_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v3_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v4_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v5_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
   u_brick_a1 = v0_brick_a1 = v1_brick_a1 = v2_brick_a1 = v3_brick_a1 = v4_brick_a1 = v5_brick_a1 = NULL;
 
   memory->destroy3d_offset(u_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v0_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v1_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v2_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v3_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v4_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v5_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
   u_brick_a2 = v0_brick_a2 = v1_brick_a2 = v2_brick_a2 = v3_brick_a2 = v4_brick_a2 = v5_brick_a2 = NULL;
 
   memory->destroy3d_offset(u_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v0_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v1_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v2_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v3_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v4_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v5_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
   u_brick_a3 = v0_brick_a3 = v1_brick_a3 = v2_brick_a3 = v3_brick_a3 = v4_brick_a3 = v5_brick_a3 = NULL;
  
   memory->destroy3d_offset(u_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v0_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v1_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v2_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v3_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v4_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v5_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
   u_brick_a4 = v0_brick_a4 = v1_brick_a4 = v2_brick_a4 = v3_brick_a4 = v4_brick_a4 = v5_brick_a4 = NULL;
  
   memory->destroy3d_offset(u_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v0_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v1_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v2_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v3_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v4_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v5_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
   u_brick_a5 = v0_brick_a5 = v1_brick_a5 = v2_brick_a5 = v3_brick_a5 = v4_brick_a5 = v5_brick_a5 = NULL;
 
   memory->destroy3d_offset(u_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v0_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v1_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v2_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v3_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v4_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v5_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
   u_brick_a6 = v0_brick_a6 = v1_brick_a6 = v2_brick_a6 = v3_brick_a6 = v4_brick_a6 = v5_brick_a6 = NULL;
 
   memory->destroy4d_offset(u_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy4d_offset(v0_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy4d_offset(v1_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy4d_offset(v2_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy4d_offset(v3_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy4d_offset(v4_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy4d_offset(v5_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
   u_brick_none = v0_brick_none = v1_brick_none = v2_brick_none = v3_brick_none = v4_brick_none = v5_brick_none = NULL;
 
   delete cg_peratom;
   delete cg_peratom_6;
   cg_peratom = cg_peratom_6 = NULL;
 }
 
 /* ----------------------------------------------------------------------
    set size of FFT grid (nx,ny,nz_pppm) and g_ewald
    for Coulomb interactions
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::set_grid()
 {
   double q2 = qsqsum * force->qqrd2e;
 
   // use xprd,yprd,zprd even if triclinic so grid size is the same
   // adjust z dimension for 2d slab PPPM
   // 3d PPPM just uses zprd since slab_volfactor = 1.0
 
   double xprd = domain->xprd;
   double yprd = domain->yprd;
   double zprd = domain->zprd;
   double zprd_slab = zprd*slab_volfactor;
   
   // make initial g_ewald estimate
   // based on desired accuracy and real space cutoff
   // fluid-occupied volume used to estimate real-space error
   // zprd used rather than zprd_slab
 
   double h, h_x,h_y,h_z;
   bigint natoms = atom->natoms;
 
   if (!gewaldflag) {
     g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2);
     if (g_ewald >= 1.0)  
       error->all(FLERR,"KSpace accuracy too large to estimate G vector");
     g_ewald = sqrt(-log(g_ewald)) / cutoff;
   } 
 
   // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy
   // nz_pppm uses extended zprd_slab instead of zprd
   // reduce it until accuracy target is met
 
   if (!gridflag) {
     h = h_x = h_y = h_z = 4.0/g_ewald;  
     int count = 0;
     while (1) {
       
       // set grid dimension
       nx_pppm = static_cast<int> (xprd/h_x);
       ny_pppm = static_cast<int> (yprd/h_y);
       nz_pppm = static_cast<int> (zprd_slab/h_z);
 
       if (nx_pppm <= 1) nx_pppm = 2;
       if (ny_pppm <= 1) ny_pppm = 2;
       if (nz_pppm <= 1) nz_pppm = 2;
 
       //set local grid dimension
       int npey_fft,npez_fft;
       if (nz_pppm >= nprocs) {
         npey_fft = 1;
         npez_fft = nprocs;
       } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
 
       int me_y = me % npey_fft;
       int me_z = me / npey_fft;
 
       nxlo_fft = 0;
       nxhi_fft = nx_pppm - 1;
       nylo_fft = me_y*ny_pppm/npey_fft;
       nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
       nzlo_fft = me_z*nz_pppm/npez_fft;
       nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
 
       double qopt = compute_qopt();
    
       double dfkspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);
 
       count++;
 
       // break loop if the accuracy has been reached or too many loops have been performed
       if (dfkspace <= accuracy) break;
       if (count > 500) error->all(FLERR, "Could not compute grid size for Coulomb interaction");
       h *= 0.95;
       h_x = h_y = h_z = h;
     }
   }
   
   // boost grid size until it is factorable
 
   while (!factorable(nx_pppm)) nx_pppm++;
   while (!factorable(ny_pppm)) ny_pppm++;
   while (!factorable(nz_pppm)) nz_pppm++;
 }
 
 /* ----------------------------------------------------------------------
    set the FFT parameters 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::set_fft_parameters(int& nx_p,int& ny_p,int& nz_p,
                                    int& nxlo_f,int& nylo_f,int& nzlo_f,
                                    int& nxhi_f,int& nyhi_f,int& nzhi_f,
                                    int& nxlo_i,int& nylo_i,int& nzlo_i,
                                    int& nxhi_i,int& nyhi_i,int& nzhi_i,
                                    int& nxlo_o,int& nylo_o,int& nzlo_o,
                                    int& nxhi_o,int& nyhi_o,int& nzhi_o,
 		                   int& nlow, int& nupp,
                                    int& ng, int& nf, int& nfb,
 		                   double& sft,double& sftone, int& ord)
 {
   // global indices of PPPM grid range from 0 to N-1
   // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
   //   global PPPM grid that I own without ghost cells
   // for slab PPPM, assign z grid as if it were not extended
 
   nxlo_i = static_cast<int> (comm->xsplit[comm->myloc[0]] * nx_p);
   nxhi_i = static_cast<int> (comm->xsplit[comm->myloc[0]+1] * nx_p) - 1;
 
   nylo_i = static_cast<int> (comm->ysplit[comm->myloc[1]] * ny_p);
   nyhi_i = static_cast<int> (comm->ysplit[comm->myloc[1]+1] * ny_p) - 1;
 
   nzlo_i = static_cast<int> 
       (comm->zsplit[comm->myloc[2]] * nz_p/slab_volfactor);
   nzhi_i = static_cast<int> 
       (comm->zsplit[comm->myloc[2]+1] * nz_p/slab_volfactor) - 1;
 
 
   // nlow,nupp = stencil size for mapping particles to PPPM grid
 
   nlow = -(ord-1)/2;
   nupp = ord/2;
 
   // sft values for particle <-> grid mapping
   // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
 
   if (ord % 2) sft = OFFSET + 0.5;
   else sft = OFFSET;
   if (ord % 2) sftone = 0.0;
   else sftone = 0.5;
 
   // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of
   //   global PPPM grid that my particles can contribute charge to
   // effectively nlo_in,nhi_in + ghost cells
   // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest
   //           position a particle in my box can be at
   // dist[3] = particle position bound = subbox + skin/2.0 + qdist
   //   qdist = offset due to TIP4P fictitious charge
   //   convert to triclinic if necessary
   // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping
   // for slab PPPM, assign z grid as if it were not extended
 
   double *prd,*sublo,*subhi;
 
   if (triclinic == 0) {
     prd = domain->prd;
     boxlo = domain->boxlo;
     sublo = domain->sublo;
     subhi = domain->subhi;
   } else {
     prd = domain->prd_lamda;
     boxlo = domain->boxlo_lamda;
     sublo = domain->sublo_lamda;
     subhi = domain->subhi_lamda;
   }
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
 
   double dist[3];
   double cuthalf = 0.5*neighbor->skin + qdist;
   if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf;
   else {
     dist[0] = cuthalf/domain->prd[0];
     dist[1] = cuthalf/domain->prd[1];
     dist[2] = cuthalf/domain->prd[2];
   }
     
   int nlo,nhi;
     
   nlo = static_cast<int> ((sublo[0]-dist[0]-boxlo[0]) * 
                             nx_p/xprd + sft) - OFFSET;
   nhi = static_cast<int> ((subhi[0]+dist[0]-boxlo[0]) * 
                             nx_p/xprd + sft) - OFFSET;
   nxlo_o = nlo + nlow;
   nxhi_o = nhi + nupp;
 
   nlo = static_cast<int> ((sublo[1]-dist[1]-boxlo[1]) * 
                             ny_p/yprd + sft) - OFFSET;
   nhi = static_cast<int> ((subhi[1]+dist[1]-boxlo[1]) * 
                             ny_p/yprd + sft) - OFFSET;
   nylo_o = nlo + nlow;
   nyhi_o = nhi + nupp;
 
   nlo = static_cast<int> ((sublo[2]-dist[2]-boxlo[2]) * 
                             nz_p/zprd_slab + sft) - OFFSET;
   nhi = static_cast<int> ((subhi[2]+dist[2]-boxlo[2]) * 
                             nz_p/zprd_slab + sft) - OFFSET;
   nzlo_o = nlo + nlow;
   nzhi_o = nhi + nupp;
 
   // for slab PPPM, change the grid boundary for processors at +z end
   //   to include the empty volume between periodically repeating slabs
   // for slab PPPM, want charge data communicated from -z proc to +z proc,
   //   but not vice versa, also want field data communicated from +z proc to
   //   -z proc, but not vice versa
   // this is accomplished by nzhi_i = nzhi_o on +z end (no ghost cells)
 
   if (slabflag && (comm->myloc[2] == comm->procgrid[2]-1)) {
     nzhi_i = nz_p - 1;
     nzhi_o = nz_p - 1;
   }
   
   // decomposition of FFT mesh
   // global indices range from 0 to N-1
   // proc owns entire x-dimension, clump of columns in y,z dimensions
   // npey_fft,npez_fft = # of procs in y,z dims
   // if nprocs is small enough, proc can own 1 or more entire xy planes,
   //   else proc owns 2d sub-blocks of yz plane
   // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions
   // nlo_fft,nhi_fft = lower/upper limit of the section
   //   of the global FFT mesh that I own
 
   int npey_fft,npez_fft;
   if (nz_p >= nprocs) {
     npey_fft = 1;
     npez_fft = nprocs;
   } else procs2grid2d(nprocs,ny_p,nz_p,&npey_fft,&npez_fft);
 
   int me_y = me % npey_fft;
   int me_z = me / npey_fft;
 
   nxlo_f = 0;
   nxhi_f = nx_p - 1;
   nylo_f = me_y*ny_p/npey_fft;
   nyhi_f = (me_y+1)*ny_p/npey_fft - 1;
   nzlo_f = me_z*nz_p/npez_fft;
   nzhi_f = (me_z+1)*nz_p/npez_fft - 1;
 
   // PPPM grid for this proc, including ghosts
 
   ng = (nxhi_o-nxlo_o+1) * (nyhi_o-nylo_o+1) *
     (nzhi_o-nzlo_o+1);
 
   // FFT arrays on this proc, without ghosts
   // nfft = FFT points in FFT decomposition on this proc
   // nfft_brick = FFT points in 3d brick-decomposition on this proc
   // nfft_both = greater of 2 values
 
   nf = (nxhi_f-nxlo_f+1) * (nyhi_f-nylo_f+1) *
     (nzhi_f-nzlo_f+1);
   int nfft_brick = (nxhi_i-nxlo_i+1) * (nyhi_i-nylo_i+1) *
     (nzhi_i-nzlo_i+1);
   nfb = MAX(nf,nfft_brick);
 
 }
 
 /* ----------------------------------------------------------------------
    check if all factors of n are in list of factors
    return 1 if yes, 0 if no 
 ------------------------------------------------------------------------- */
 
 int PPPMDisp::factorable(int n)
 {
   int i;
 
   while (n > 1) {
     for (i = 0; i < nfactors; i++) {
       if (n % factors[i] == 0) {
 	n /= factors[i];
 	break;
       }
     }
     if (i == nfactors) return 0;
   }
 
   return 1;
 }
 
 /* ----------------------------------------------------------------------
    pre-compute Green's function denominator expansion coeffs, Gamma(2n) 
 ------------------------------------------------------------------------- */
 void PPPMDisp::adjust_gewald()
 {
   
   // Use Newton solver to find g_ewald
 
   double dx;
         
   // Begin algorithm
   
   for (int i = 0; i < LARGE; i++) {
     dx = f() / derivf(); 
     g_ewald -= dx; //Update g_ewald
     if (fabs(f()) < SMALL) return;
   }
    
   // Failed to converge
   
   char str[128];
   sprintf(str, "Could not compute g_ewald");
   error->all(FLERR, str);
 
 }
 
 /* ----------------------------------------------------------------------
  Calculate f(x)
  ------------------------------------------------------------------------- */
 
 double PPPMDisp::f()
 {
   double df_rspace, df_kspace;
   double q2 = qsqsum * force->qqrd2e;
   double xprd = domain->xprd;
   double yprd = domain->yprd;
   double zprd = domain->zprd;
   double zprd_slab = zprd*slab_volfactor;
   bigint natoms = atom->natoms;
 
   df_rspace = 2.0*q2*exp(-g_ewald*g_ewald*cutoff*cutoff) / 
        sqrt(natoms*cutoff*xprd*yprd*zprd);
    
   double qopt = compute_qopt();
   df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);
    
   return df_rspace - df_kspace;
 }
 
 /* ----------------------------------------------------------------------
  Calculate numerical derivative f'(x) using forward difference
  [f(x + h) - f(x)] / h
  ------------------------------------------------------------------------- */
             
 double PPPMDisp::derivf()
 {  
   double h = 0.000001;  //Derivative step-size
   double df,f1,f2,g_ewald_old;
   
   f1 = f();
   g_ewald_old = g_ewald;
   g_ewald += h;
   f2 = f();
   g_ewald = g_ewald_old;
   df = (f2 - f1)/h;
   
   return df;
 } 
 
 /* ----------------------------------------------------------------------
    Calculate the final estimator for the accuracy
 ------------------------------------------------------------------------- */
 
 double PPPMDisp::final_accuracy()
 {
   double df_rspace, df_kspace;
   double q2 = qsqsum * force->qqrd2e;
   double xprd = domain->xprd;
   double yprd = domain->yprd;
   double zprd = domain->zprd;
   double zprd_slab = zprd*slab_volfactor;
   bigint natoms = atom->natoms;
   df_rspace = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) / 
              sqrt(natoms*cutoff*xprd*yprd*zprd);
 
   double qopt = compute_qopt();
 
   df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);
 
   double acc = sqrt(df_rspace*df_rspace + df_kspace*df_kspace);
   return acc;
 }
 
 /* ----------------------------------------------------------------------
    Calculate the final estimator for the Dispersion accuracy
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::final_accuracy_6(double& acc, double& acc_real, double& acc_kspace)
 {
   double xprd = domain->xprd;
   double yprd = domain->yprd;
   double zprd = domain->zprd;
   double zprd_slab = zprd*slab_volfactor;
   bigint natoms = atom->natoms;
   acc_real = lj_rspace_error();
 
   double qopt = compute_qopt_6();
 
   acc_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab);
 
   acc = sqrt(acc_real*acc_real + acc_kspace*acc_kspace);
   return;
 }
 
 /* ----------------------------------------------------------------------
    Compute qopt for Coulomb interactions
 ------------------------------------------------------------------------- */
 
 double PPPMDisp::compute_qopt()
 {
   double qopt;
   if (differentiation_flag == 1) {
     qopt = compute_qopt_ad();
   } else {
     qopt = compute_qopt_ik();
   }
   double qopt_all;
   MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
   return qopt_all;
 }
 
 /* ----------------------------------------------------------------------
    Compute qopt for Dispersion interactions
 ------------------------------------------------------------------------- */
 
 double PPPMDisp::compute_qopt_6()
 {
   double qopt;
   if (differentiation_flag == 1) {
     qopt = compute_qopt_6_ad();
   } else {
     qopt = compute_qopt_6_ik();
   }
   double qopt_all;
   MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
   return qopt_all;
 }
 
 /* ----------------------------------------------------------------------
    Compute qopt for the ik differentiation scheme and Coulomb interaction
 ------------------------------------------------------------------------- */
 
 double PPPMDisp::compute_qopt_ik()
 {
   double qopt = 0.0;
   int k,l,m;
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
 
   double unitkx = (2.0*MY_PI/xprd);
   double unitky = (2.0*MY_PI/yprd);
   double unitkz = (2.0*MY_PI/zprd_slab);
 
   int nx,ny,nz,kper,lper,mper;
   double sqk, u2;
   double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
   double sum1,sum2, sum3,dot1,dot2;
 
   int nbx = 2;
   int nby = 2;
   int nbz = 2;
 
   for (m = nzlo_fft; m <= nzhi_fft; m++) {
     mper = m - nz_pppm*(2*m/nz_pppm);
 
     for (l = nylo_fft; l <= nyhi_fft; l++) {
       lper = l - ny_pppm*(2*l/ny_pppm);
 
       for (k = nxlo_fft; k <= nxhi_fft; k++) {
         kper = k - nx_pppm*(2*k/nx_pppm);
       
         sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + 
           pow(unitkz*mper,2.0);
 
         if (sqk != 0.0) {
           sum1 = 0.0;
           sum2 = 0.0;
           sum3 = 0.0;
           for (nx = -nbx; nx <= nbx; nx++) {
             qx = unitkx*(kper+nx_pppm*nx);
             sx = exp(-0.25*pow(qx/g_ewald,2.0));
             wx = 1.0;
             argx = 0.5*qx*xprd/nx_pppm;
             if (argx != 0.0) wx = pow(sin(argx)/argx,order);
             for (ny = -nby; ny <= nby; ny++) {
               qy = unitky*(lper+ny_pppm*ny);
               sy = exp(-0.25*pow(qy/g_ewald,2.0));
               wy = 1.0;
               argy = 0.5*qy*yprd/ny_pppm;
               if (argy != 0.0) wy = pow(sin(argy)/argy,order);
               for (nz = -nbz; nz <= nbz; nz++) {
                 qz = unitkz*(mper+nz_pppm*nz);
                 sz = exp(-0.25*pow(qz/g_ewald,2.0));
                 wz = 1.0;
                 argz = 0.5*qz*zprd_slab/nz_pppm;
                 if (argz != 0.0) wz = pow(sin(argz)/argz,order);
 
                 dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
                 dot2 = qx*qx+qy*qy+qz*qz;
                 u2 =  pow(wx*wy*wz,2.0);
                 sum1 += sx*sy*sz*sx*sy*sz/dot2*4.0*4.0*MY_PI*MY_PI;
                 sum2 += u2*sx*sy*sz*4.0*MY_PI/dot2*dot1;
 		sum3 += u2;
               }
             }
           }
 	  sum2 *= sum2;
 	  sum3 *= sum3*sqk;
           qopt += sum1 -sum2/sum3;
         }
       }
     }
   }
   return qopt;
 }
 
 /* ----------------------------------------------------------------------
    Compute qopt for the ad differentiation scheme and Coulomb interaction
 ------------------------------------------------------------------------- */
 
 double PPPMDisp::compute_qopt_ad()
 {
   double qopt = 0.0;
   int k,l,m;
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
 
 
   double unitkx = (2.0*MY_PI/xprd);
   double unitky = (2.0*MY_PI/yprd);
   double unitkz = (2.0*MY_PI/zprd_slab);
 
   int nx,ny,nz,kper,lper,mper;
   double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
   double u2, sqk;
   double sum1,sum2,sum3,sum4,dot2;
 
   int nbx = 2;
   int nby = 2;
   int nbz = 2;
 
   for (m = nzlo_fft; m <= nzhi_fft; m++) {
     mper = m - nz_pppm*(2*m/nz_pppm);
 
     for (l = nylo_fft; l <= nyhi_fft; l++) {
       lper = l - ny_pppm*(2*l/ny_pppm);
 
       for (k = nxlo_fft; k <= nxhi_fft; k++) {
         kper = k - nx_pppm*(2*k/nx_pppm);
       
         sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + 
           pow(unitkz*mper,2.0);
 
         if (sqk != 0.0) {
     
           sum1 = 0.0;
           sum2 = 0.0;
           sum3 = 0.0;
           sum4 = 0.0;
           for (nx = -nbx; nx <= nbx; nx++) {
             qx = unitkx*(kper+nx_pppm*nx);
             sx = exp(-0.25*pow(qx/g_ewald,2.0));
             wx = 1.0;
             argx = 0.5*qx*xprd/nx_pppm;
             if (argx != 0.0) wx = pow(sin(argx)/argx,order);
             for (ny = -nby; ny <= nby; ny++) {
               qy = unitky*(lper+ny_pppm*ny);
               sy = exp(-0.25*pow(qy/g_ewald,2.0));
               wy = 1.0;
               argy = 0.5*qy*yprd/ny_pppm;
               if (argy != 0.0) wy = pow(sin(argy)/argy,order);
               for (nz = -nbz; nz <= nbz; nz++) {
                 qz = unitkz*(mper+nz_pppm*nz);
                 sz = exp(-0.25*pow(qz/g_ewald,2.0));
                 wz = 1.0;
                 argz = 0.5*qz*zprd_slab/nz_pppm;
                 if (argz != 0.0) wz = pow(sin(argz)/argz,order);
 
                 dot2 = qx*qx+qy*qy+qz*qz;
                 u2 =  pow(wx*wy*wz,2.0);
                 sum1 += sx*sy*sz*sx*sy*sz/dot2*4.0*4.0*MY_PI*MY_PI;
                 sum2 += sx*sy*sz * u2*4.0*MY_PI;
                 sum3 += u2;
                 sum4 += dot2*u2;
               }
             }
           }
           sum2 *= sum2;
           qopt += sum1 - sum2/(sum3*sum4);
         }
       }
     }
   }
   return qopt;
 }
 
 /* ----------------------------------------------------------------------
    Compute qopt for the ik differentiation scheme and Dispersion interaction
 ------------------------------------------------------------------------- */
 
 double PPPMDisp::compute_qopt_6_ik()
 {
   double qopt = 0.0;
   int k,l,m;
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
 
   double unitkx = (2.0*MY_PI/xprd);
   double unitky = (2.0*MY_PI/yprd);
   double unitkz = (2.0*MY_PI/zprd_slab);
 
   int nx,ny,nz,kper,lper,mper;
   double sqk, u2;
   double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
   double sum1,sum2, sum3;
   double dot1,dot2, rtdot2, term;
   double inv2ew = 2*g_ewald_6;
   inv2ew = 1.0/inv2ew;
   double rtpi = sqrt(MY_PI);
 
   int nbx = 2;
   int nby = 2;
   int nbz = 2;
 
   for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
     mper = m - nz_pppm_6*(2*m/nz_pppm_6);
 
     for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
       lper = l - ny_pppm_6*(2*l/ny_pppm_6);
 
       for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
         kper = k - nx_pppm_6*(2*k/nx_pppm_6);
       
         sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + 
           pow(unitkz*mper,2.0);
 
         if (sqk != 0.0) {
           sum1 = 0.0;
           sum2 = 0.0;
           sum3 = 0.0;
           for (nx = -nbx; nx <= nbx; nx++) {
             qx = unitkx*(kper+nx_pppm_6*nx);
             sx = exp(-qx*qx*inv2ew*inv2ew);
             wx = 1.0;
             argx = 0.5*qx*xprd/nx_pppm_6;
             if (argx != 0.0) wx = pow(sin(argx)/argx,order_6);
             for (ny = -nby; ny <= nby; ny++) {
               qy = unitky*(lper+ny_pppm_6*ny);
               sy = exp(-qy*qy*inv2ew*inv2ew);
               wy = 1.0;
               argy = 0.5*qy*yprd/ny_pppm_6;
               if (argy != 0.0) wy = pow(sin(argy)/argy,order_6);
               for (nz = -nbz; nz <= nbz; nz++) {
                 qz = unitkz*(mper+nz_pppm_6*nz);
                 sz = exp(-qz*qz*inv2ew*inv2ew);
                 wz = 1.0;
                 argz = 0.5*qz*zprd_slab/nz_pppm_6;
                 if (argz != 0.0) wz = pow(sin(argz)/argz,order_6);
 
                 dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
                 dot2 = qx*qx+qy*qy+qz*qz;
                 rtdot2 = sqrt(dot2);
                 term = (1-2*dot2*inv2ew*inv2ew)*sx*sy*sz +
 		       2*dot2*rtdot2*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtdot2*inv2ew);
                 term *= g_ewald_6*g_ewald_6*g_ewald_6;
                 u2 =  pow(wx*wy*wz,2.0);
                 sum1 += term*term*MY_PI*MY_PI*MY_PI/9.0 * dot2;
                 sum2 += -u2*term*MY_PI*rtpi/3.0*dot1;
 		sum3 += u2;
               }
             }
           }
 	  sum2 *= sum2;
 	  sum3 *= sum3*sqk;
           qopt += sum1 -sum2/sum3;
         }
       }
     }
   }
   return qopt;
 }
 
 /* ----------------------------------------------------------------------
    Compute qopt for the ad differentiation scheme and Dispersion interaction
 ------------------------------------------------------------------------- */
 
 double PPPMDisp::compute_qopt_6_ad()
 {
   double qopt = 0.0;
   int k,l,m;
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
 
   double unitkx = (2.0*MY_PI/xprd);
   double unitky = (2.0*MY_PI/yprd);
   double unitkz = (2.0*MY_PI/zprd_slab);
 
   int nx,ny,nz,kper,lper,mper;
   double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
   double u2, sqk;
   double sum1,sum2,sum3,sum4;
   double dot2, rtdot2, term;
   double inv2ew = 2*g_ewald_6;
   inv2ew = 1/inv2ew;
   double rtpi = sqrt(MY_PI);
 
   int nbx = 2;
   int nby = 2;
   int nbz = 2;
 
   for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
     mper = m - nz_pppm_6*(2*m/nz_pppm_6);
 
     for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
       lper = l - ny_pppm_6*(2*l/ny_pppm_6);
 
       for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
         kper = k - nx_pppm_6*(2*k/nx_pppm_6);
       
         sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + 
           pow(unitkz*mper,2.0);
 
         if (sqk != 0.0) {
     
           sum1 = 0.0;
           sum2 = 0.0;
           sum3 = 0.0;
           sum4 = 0.0;
           for (nx = -nbx; nx <= nbx; nx++) {
             qx = unitkx*(kper+nx_pppm_6*nx);
             sx = exp(-qx*qx*inv2ew*inv2ew);
             wx = 1.0;
             argx = 0.5*qx*xprd/nx_pppm_6;
             if (argx != 0.0) wx = pow(sin(argx)/argx,order_6);
             for (ny = -nby; ny <= nby; ny++) {
               qy = unitky*(lper+ny_pppm_6*ny);
               sy = exp(-qy*qy*inv2ew*inv2ew);
               wy = 1.0;
               argy = 0.5*qy*yprd/ny_pppm_6;
               if (argy != 0.0) wy = pow(sin(argy)/argy,order_6);
               for (nz = -nbz; nz <= nbz; nz++) {
                 qz = unitkz*(mper+nz_pppm_6*nz);
                 sz = exp(-qz*qz*inv2ew*inv2ew);
                 wz = 1.0;
                 argz = 0.5*qz*zprd_slab/nz_pppm_6;
                 if (argz != 0.0) wz = pow(sin(argz)/argz,order_6);
 
                 dot2 = qx*qx+qy*qy+qz*qz;
                 rtdot2 = sqrt(dot2);
                 term = (1-2*dot2*inv2ew*inv2ew)*sx*sy*sz +
 		       2*dot2*rtdot2*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtdot2*inv2ew);
                 term *= g_ewald_6*g_ewald_6*g_ewald_6;
                 u2 =  pow(wx*wy*wz,2.0);
                 sum1 += term*term*MY_PI*MY_PI*MY_PI/9.0 * dot2;
                 sum2 += -term*MY_PI*rtpi/3.0 * u2 * dot2;
                 sum3 += u2;
                 sum4 += dot2*u2;
               }
             }
           }
           sum2 *= sum2;
           qopt += sum1 - sum2/(sum3*sum4);
         }
       }
     }
   }
   return qopt;
 }
 
 /* ----------------------------------------------------------------------
    set size of FFT grid  and g_ewald_6
    for Dispersion interactions
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::set_grid_6()
 {
   // Calculate csum
   if (!csumflag) calc_csum();
   if (!gewaldflag_6) set_init_g6();
   if (!gridflag_6) set_n_pppm_6();
   while (!factorable(nx_pppm_6)) nx_pppm_6++;
   while (!factorable(ny_pppm_6)) ny_pppm_6++;
   while (!factorable(nz_pppm_6)) nz_pppm_6++;
   
 }
 
 /* ----------------------------------------------------------------------
    Calculate the sum of the squared dispersion coefficients and other 
    related quantities required for the calculations
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::calc_csum()
 {
   csumij = 0.0;
   csum = 0.0;
 
   int ntypes = atom->ntypes;   
   int i,j,k;
 
   delete [] cii;
   cii = new double[ntypes +1];
   for (i = 0; i<=ntypes; i++) cii[i] = 0.0;
   delete [] csumi; 
   csumi = new double[ntypes +1];
   for (i = 0; i<=ntypes; i++) csumi[i] = 0.0; 
   int *neach = new int[ntypes+1];
   for (i = 0; i<=ntypes; i++) neach[i] = 0; 
 
   //the following variables are needed to distinguish between arithmetic
   //  and geometric mixing
 
   if (function[1]) {
     for (i = 1; i <= ntypes; i++)
       cii[i] = B[i]*B[i];
     int tmp;
     for (i = 0; i < atom->nlocal; i++) {
       tmp = atom->type[i];
       neach[tmp]++;
       csum += B[tmp]*B[tmp];
     }
   }
   if (function[2]) {
     for (i = 1; i <= ntypes; i++)
       cii[i] = 64.0/20.0*B[7*i+3]*B[7*i+3];
     int tmp;
     for (i = 0; i < atom->nlocal; i++) {
       tmp = atom->type[i];
       neach[tmp]++;
       csum += 64.0/20.0*B[7*tmp+3]*B[7*tmp+3];
     }
   }
   if (function[3]) {
     for (i = 1; i <= ntypes; i++)
       for (j = 0; j < nsplit; j++)
         cii[i] += B[j]*B[nsplit*i + j]*B[nsplit*i + j];
     int tmp;
     for (i = 0; i < atom->nlocal; i++) {
       tmp = atom->type[i];
       neach[tmp]++;
       for (j = 0; j < nsplit; j++)
         csum += B[j]*B[nsplit*tmp + j]*B[nsplit*tmp + j];
     }
   }
 
 
   double tmp2;
   MPI_Allreduce(&csum,&tmp2,1,MPI_DOUBLE,MPI_SUM,world);
   csum = tmp2;
   csumflag = 1;
 
   int *neach_all = new int[ntypes+1];
   MPI_Allreduce(neach,neach_all,ntypes+1,MPI_INT,MPI_SUM,world);
 
   // copmute csumij and csumi
   double d1, d2;
   if (function[1]){
     for (i=1; i<=ntypes; i++) {
       for (j=1; j<=ntypes; j++) {
         csumi[i] += neach_all[j]*B[i]*B[j];
         d1 = neach_all[i]*B[i];
         d2 = neach_all[j]*B[j];
         csumij += d1*d2;
         //csumij += neach_all[i]*neach_all[j]*B[i]*B[j]; 
       }
     }
   }
   if (function[2]) {
     for (i=1; i<=ntypes; i++) {
       for (j=1; j<=ntypes; j++) {
         for (k=0; k<=6; k++) {
           csumi[i] += neach_all[j]*B[7*i + k]*B[7*(j+1)-k-1];
           d1 = neach_all[i]*B[7*i + k];
           d2 = neach_all[j]*B[7*(j+1)-k-1];
           csumij += d1*d2;
           //csumij += neach_all[i]*neach_all[j]*B[7*i + k]*B[7*(j+1)-k-1];
         }
       }
     }
   }
   if (function[3]) {
     for (i=1; i<=ntypes; i++) {
       for (j=1; j<=ntypes; j++) {
         for (k=0; k<nsplit; k++) {
 	  csumi[i] += neach_all[j]*B[k]*B[nsplit*i+k]*B[nsplit*j+k];
 	  d1 = neach_all[i]*B[nsplit*i+k];
 	  d2 = neach_all[j]*B[nsplit*j+k];
           csumij += B[k]*d1*d2;
 	}
       }
     }
   }    
 
   delete [] neach;
   delete [] neach_all;
 }
 
 /* ----------------------------------------------------------------------
    adjust g_ewald_6 to the new grid size
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::adjust_gewald_6()
 {
   // Use Newton solver to find g_ewald_6
   double dx;
 
   // Start loop
 
   for (int i = 0; i <  LARGE; i++) {
     dx = f_6() / derivf_6();
     g_ewald_6 -= dx; //update g_ewald_6
     if (fabs(f_6()) < SMALL) return;
   }
 
   // Failed to converge
 
   char str[128];
   sprintf(str, "Could not adjust g_ewald_6");
   error->all(FLERR, str);
 
 }
 
 /* ----------------------------------------------------------------------
  Calculate f(x) for Dispersion interaction
  ------------------------------------------------------------------------- */
 
 double PPPMDisp::f_6()
 {
   double df_rspace, df_kspace;
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
   bigint natoms = atom->natoms;
 
   df_rspace = lj_rspace_error();
    
   double qopt = compute_qopt_6();
   df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab);
    
   return df_rspace - df_kspace;
 }
 
 /* ----------------------------------------------------------------------
  Calculate numerical derivative f'(x) using forward difference
  [f(x + h) - f(x)] / h
  ------------------------------------------------------------------------- */
             
 double PPPMDisp::derivf_6()
 {  
   double h = 0.000001;  //Derivative step-size
   double df,f1,f2,g_ewald_old;
   
   f1 = f_6();
   g_ewald_old = g_ewald_6;
   g_ewald_6 += h;
   f2 = f_6();
   g_ewald_6 = g_ewald_old;
   df = (f2 - f1)/h;
   
   return df;
 } 
 
 
 /* ----------------------------------------------------------------------
    calculate an initial value for g_ewald_6
    ---------------------------------------------------------------------- */
 
 void PPPMDisp::set_init_g6()
 {
   // use xprd,yprd,zprd even if triclinic so grid size is the same
   // adjust z dimension for 2d slab PPPM
   // 3d PPPM just uses zprd since slab_volfactor = 1.0
 
   // make initial g_ewald estimate
   // based on desired error and real space cutoff
  
   // compute initial value for df_real with g_ewald_6 = 1/cutoff_lj
   // if df_real > 0, repeat divide g_ewald_6 by 2 until df_real < 0
   // else, repeat multiply g_ewald_6 by 2 until df_real > 0
   // perform bisection for the last two values of
   double df_real;
   double g_ewald_old; 
   double gmin, gmax;
 
   // check if there is a user defined accuracy
   double acc_rspace = accuracy;
   if (accuracy_real_6 > 0) acc_rspace = accuracy_real_6;
 
   g_ewald_6 = 1.0/cutoff_lj;
   df_real = lj_rspace_error() - acc_rspace;
   int counter = 0;
   if (df_real > 0) {
     while (df_real > 0 && counter < LARGE) {
       counter++;
       g_ewald_old = g_ewald_6;
       g_ewald_6 *= 2;
       df_real = lj_rspace_error() - acc_rspace;
     }
   }
 
   if (df_real < 0) {
     while (df_real < 0 && counter < LARGE) {
       counter++;
       g_ewald_old = g_ewald_6;
       g_ewald_6 *= 0.5;
       df_real = lj_rspace_error() - acc_rspace;
     }
   }
 
   if (counter >= LARGE-1) error->all(FLERR,"Cannot compute initial g_ewald_disp");
 
   gmin = MIN(g_ewald_6, g_ewald_old);
   gmax = MAX(g_ewald_6, g_ewald_old);
   g_ewald_6 = gmin + 0.5*(gmax-gmin);
   counter = 0;
   while (gmax-gmin > SMALL && counter < LARGE) {
     counter++;
     df_real = lj_rspace_error() -acc_rspace;
     if (df_real < 0) gmax = g_ewald_6;
     else gmin = g_ewald_6;
     g_ewald_6 = gmin + 0.5*(gmax-gmin);
   }
   if (counter >= LARGE-1) error->all(FLERR,"Cannot compute initial g_ewald_disp");
 
 }
 
 /* ----------------------------------------------------------------------
    calculate nx_pppm, ny_pppm, nz_pppm for dispersion interaction
    ---------------------------------------------------------------------- */
 
 void PPPMDisp::set_n_pppm_6()
 {
   bigint natoms = atom->natoms;
 
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
   double h, h_x,h_y,h_z;
 
   double acc_kspace = accuracy;
   if (accuracy_kspace_6 > 0.0) acc_kspace = accuracy_kspace_6;
 
   // initial value for the grid spacing
   h = h_x = h_y = h_z = 4.0/g_ewald_6;
   // decrease grid spacing untill required precision is obtained
   int count = 0;
   while(1) {
   
     // set grid dimension
     nx_pppm_6 = static_cast<int> (xprd/h_x);
     ny_pppm_6 = static_cast<int> (yprd/h_y);
     nz_pppm_6 = static_cast<int> (zprd_slab/h_z);
 
     if (nx_pppm_6 <= 1) nx_pppm_6 = 2;
     if (ny_pppm_6 <= 1) ny_pppm_6 = 2;
     if (nz_pppm_6 <= 1) nz_pppm_6 = 2;
 
     //set local grid dimension
     int npey_fft,npez_fft;
     if (nz_pppm_6 >= nprocs) {
       npey_fft = 1;
       npez_fft = nprocs;
     } else procs2grid2d(nprocs,ny_pppm_6,nz_pppm_6,&npey_fft,&npez_fft);
 
     int me_y = me % npey_fft;
     int me_z = me / npey_fft;
 
     nxlo_fft_6 = 0;
     nxhi_fft_6 = nx_pppm_6 - 1;
     nylo_fft_6 = me_y*ny_pppm_6/npey_fft;
     nyhi_fft_6 = (me_y+1)*ny_pppm_6/npey_fft - 1;
     nzlo_fft_6 = me_z*nz_pppm_6/npez_fft;
     nzhi_fft_6 = (me_z+1)*nz_pppm_6/npez_fft - 1;
 
     double qopt = compute_qopt_6();
  
     double df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab);
 
     count++;
 
     // break loop if the accuracy has been reached or too many loops have been performed
     if (df_kspace <= acc_kspace) break;
     if (count > 500) error->all(FLERR, "Could not compute grid size for Dispersion");
     h *= 0.95;
     h_x = h_y = h_z = h;
   }
 }
 
 /* ----------------------------------------------------------------------
    calculate the real space error for dispersion interactions
    ---------------------------------------------------------------------- */
 
 double PPPMDisp::lj_rspace_error()
 {
   bigint natoms = atom->natoms;
   double xprd = domain->xprd;
   double yprd = domain->yprd;
   double zprd = domain->zprd;
   double zprd_slab = zprd*slab_volfactor;
 
   double deltaf;
   double rgs = (cutoff_lj*g_ewald_6);
   rgs *= rgs;
   double rgs_inv = 1.0/rgs;
   deltaf = csum/sqrt(natoms*xprd*yprd*zprd_slab*cutoff_lj)*sqrt(MY_PI)*pow(g_ewald_6, 5)*
     exp(-rgs)*(1+rgs_inv*(3+rgs_inv*(6+rgs_inv*6)));
   return deltaf;
 }
 
 
 /* ----------------------------------------------------------------------
    Compyute the modified (hockney-eastwood) coulomb green function
    ---------------------------------------------------------------------- */ 
 
 void PPPMDisp::compute_gf()
 {
   int k,l,m,n;
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
   volume = xprd * yprd * zprd_slab;
 
   double unitkx = (2.0*MY_PI/xprd);
   double unitky = (2.0*MY_PI/yprd);
   double unitkz = (2.0*MY_PI/zprd_slab);
 
   int kper,lper,mper;
   double snx,sny,snz,snx2,sny2,snz2;
   double sqk;
   double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
   double numerator,denominator;
 
 
   n = 0;
   for (m = nzlo_fft; m <= nzhi_fft; m++) {
     mper = m - nz_pppm*(2*m/nz_pppm);
     qz = unitkz*mper;
     snz = sin(0.5*qz*zprd_slab/nz_pppm);
     snz2 = snz*snz;
     sz = exp(-0.25*pow(qz/g_ewald,2.0));
     wz = 1.0;
     argz = 0.5*qz*zprd_slab/nz_pppm;
     if (argz != 0.0) wz = pow(sin(argz)/argz,order);
     wz *= wz;
 
     for (l = nylo_fft; l <= nyhi_fft; l++) {
       lper = l - ny_pppm*(2*l/ny_pppm);
       qy = unitky*lper;
       sny = sin(0.5*qy*yprd/ny_pppm);
       sny2 = sny*sny;
       sy = exp(-0.25*pow(qy/g_ewald,2.0));
       wy = 1.0;
       argy = 0.5*qy*yprd/ny_pppm;
       if (argy != 0.0) wy = pow(sin(argy)/argy,order);
       wy *= wy;
 
       for (k = nxlo_fft; k <= nxhi_fft; k++) {
         kper = k - nx_pppm*(2*k/nx_pppm);
         qx = unitkx*kper;
         snx = sin(0.5*qx*xprd/nx_pppm);
         snx2 = snx*snx;
         sx = exp(-0.25*pow(qx/g_ewald,2.0));
         wx = 1.0;
         argx = 0.5*qx*xprd/nx_pppm;
         if (argx != 0.0) wx = pow(sin(argx)/argx,order);
         wx *= wx;
 
         sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0);
 
         if (sqk != 0.0) {
           numerator = 4.0*MY_PI/sqk;
           denominator = gf_denom(snx2,sny2,snz2, gf_b, order);  
           greensfn[n++] = numerator*sx*sy*sz*wx*wy*wz/denominator;
         } else greensfn[n++] = 0.0;
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    compute self force coefficients for ad-differentiation scheme
    and Coulomb interaction 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::compute_sf_precoeff(int nxp, int nyp, int nzp, int ord, 
                                     int nxlo_ft, int nylo_ft, int nzlo_ft,
                                     int nxhi_ft, int nyhi_ft, int nzhi_ft,
                                     double *sf_pre1, double *sf_pre2, double *sf_pre3,
                                     double *sf_pre4, double *sf_pre5, double *sf_pre6)
 {
 
   int i,k,l,m,n;
   double *prd;
 
   // volume-dependent factors
   // adjust z dimension for 2d slab PPPM
   // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
 
   double unitkx = (2.0*MY_PI/xprd);
   double unitky = (2.0*MY_PI/yprd);
   double unitkz = (2.0*MY_PI/zprd_slab);
 
   int nx,ny,nz,kper,lper,mper;
   double argx,argy,argz;
   double wx0[5],wy0[5],wz0[5],wx1[5],wy1[5],wz1[5],wx2[5],wy2[5],wz2[5];
   double qx0,qy0,qz0,qx1,qy1,qz1,qx2,qy2,qz2;
   double u0,u1,u2,u3,u4,u5,u6;
   double sum1,sum2,sum3,sum4,sum5,sum6;
 
   int nb = 2;
 
   n = 0;
   for (m = nzlo_ft; m <= nzhi_ft; m++) {
     mper = m - nzp*(2*m/nzp);
 
     for (l = nylo_ft; l <= nyhi_ft; l++) {
       lper = l - nyp*(2*l/nyp);
 
       for (k = nxlo_ft; k <= nxhi_ft; k++) {
         kper = k - nxp*(2*k/nxp);
       
         sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0;
         for (i = -nb; i <= nb; i++) {
 
           qx0 = unitkx*(kper+nxp*i);
           qx1 = unitkx*(kper+nxp*(i+1));
           qx2 = unitkx*(kper+nxp*(i+2));
           wx0[i+2] = 1.0;
           wx1[i+2] = 1.0;
           wx2[i+2] = 1.0;
           argx = 0.5*qx0*xprd/nxp;
           if (argx != 0.0) wx0[i+2] = pow(sin(argx)/argx,ord);
           argx = 0.5*qx1*xprd/nxp;
           if (argx != 0.0) wx1[i+2] = pow(sin(argx)/argx,ord);
           argx = 0.5*qx2*xprd/nxp;
           if (argx != 0.0) wx2[i+2] = pow(sin(argx)/argx,ord);
 
           qy0 = unitky*(lper+nyp*i);
           qy1 = unitky*(lper+nyp*(i+1));
           qy2 = unitky*(lper+nyp*(i+2));
           wy0[i+2] = 1.0;
           wy1[i+2] = 1.0;
           wy2[i+2] = 1.0;
           argy = 0.5*qy0*yprd/nyp;
           if (argy != 0.0) wy0[i+2] = pow(sin(argy)/argy,ord);
           argy = 0.5*qy1*yprd/nyp;
           if (argy != 0.0) wy1[i+2] = pow(sin(argy)/argy,ord);
           argy = 0.5*qy2*yprd/nyp;
           if (argy != 0.0) wy2[i+2] = pow(sin(argy)/argy,ord);
    
           qz0 = unitkz*(mper+nzp*i);
           qz1 = unitkz*(mper+nzp*(i+1));
           qz2 = unitkz*(mper+nzp*(i+2));
           wz0[i+2] = 1.0;
           wz1[i+2] = 1.0;
           wz2[i+2] = 1.0;
           argz = 0.5*qz0*zprd_slab/nzp;
           if (argz != 0.0) wz0[i+2] = pow(sin(argz)/argz,ord);
           argz = 0.5*qz1*zprd_slab/nzp;
           if (argz != 0.0) wz1[i+2] = pow(sin(argz)/argz,ord);
            argz = 0.5*qz2*zprd_slab/nzp;
           if (argz != 0.0) wz2[i+2] = pow(sin(argz)/argz,ord);
         }
     
         for (nx = 0; nx <= 4; nx++) {
           for (ny = 0; ny <= 4; ny++) {
             for (nz = 0; nz <= 4; nz++) {
               u0 = wx0[nx]*wy0[ny]*wz0[nz];
               u1 = wx1[nx]*wy0[ny]*wz0[nz];
               u2 = wx2[nx]*wy0[ny]*wz0[nz];
               u3 = wx0[nx]*wy1[ny]*wz0[nz];
               u4 = wx0[nx]*wy2[ny]*wz0[nz];
               u5 = wx0[nx]*wy0[ny]*wz1[nz];
               u6 = wx0[nx]*wy0[ny]*wz2[nz];
 
               sum1 += u0*u1;
               sum2 += u0*u2;
               sum3 += u0*u3;
               sum4 += u0*u4;
               sum5 += u0*u5;
               sum6 += u0*u6;
             }
           }
         }
         
         // store values
 
         sf_pre1[n] = sum1;
         sf_pre2[n] = sum2;
         sf_pre3[n] = sum3;
         sf_pre4[n] = sum4;
         sf_pre5[n] = sum5;
         sf_pre6[n++] = sum6;
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    Compute the modified (hockney-eastwood) dispersion green function
    ---------------------------------------------------------------------- */
 
 void PPPMDisp::compute_gf_6()
 {
   double *prd;
   int k,l,m,n;
 
   // volume-dependent factors
   // adjust z dimension for 2d slab PPPM
   // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
 
   double unitkx = (2.0*MY_PI/xprd);
   double unitky = (2.0*MY_PI/yprd);
   double unitkz = (2.0*MY_PI/zprd_slab);
 
   int kper,lper,mper;
   double sqk;
   double snx,sny,snz,snx2,sny2,snz2;
   double argx,argy,argz,wx,wy,wz,sx,sy,sz;
   double qx,qy,qz;
   double rtsqk, term;
   double numerator,denominator;
   double inv2ew = 2*g_ewald_6;
   inv2ew = 1/inv2ew;
   double rtpi = sqrt(MY_PI);
 
   numerator = -MY_PI*rtpi*g_ewald_6*g_ewald_6*g_ewald_6/(3.0);
 
   n = 0;
   for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
     mper = m - nz_pppm_6*(2*m/nz_pppm_6);
     qz = unitkz*mper;
     snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm_6);
     snz2 = snz*snz;
     sz = exp(-qz*qz*inv2ew*inv2ew);
     wz = 1.0;
     argz = 0.5*qz*zprd_slab/nz_pppm_6;
     if (argz != 0.0) wz = pow(sin(argz)/argz,order_6);
     wz *= wz;
               
     for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
       lper = l - ny_pppm_6*(2*l/ny_pppm_6);
       qy = unitky*lper;
       sny = sin(0.5*unitky*lper*yprd/ny_pppm_6);
       sny2 = sny*sny;
       sy = exp(-qy*qy*inv2ew*inv2ew);
       wy = 1.0;
       argy = 0.5*qy*yprd/ny_pppm_6;
       if (argy != 0.0) wy = pow(sin(argy)/argy,order_6);
       wy *= wy;
 
       for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
 	kper = k - nx_pppm_6*(2*k/nx_pppm_6);
         qx = unitkx*kper;
 	snx = sin(0.5*unitkx*kper*xprd/nx_pppm_6);
 	snx2 = snx*snx;
         sx = exp(-qx*qx*inv2ew*inv2ew);
 	wx = 1.0;
 	argx = 0.5*qx*xprd/nx_pppm_6;
 	if (argx != 0.0) wx = pow(sin(argx)/argx,order_6);
         wx *= wx;
       
 	sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0);
 
         if (sqk != 0.0) {
 	  denominator = gf_denom(snx2,sny2,snz2, gf_b_6, order_6); 
 	  rtsqk = sqrt(sqk);
           term = (1-2*sqk*inv2ew*inv2ew)*sx*sy*sz +
                   2*sqk*rtsqk*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtsqk*inv2ew);
 	  greensfn_6[n++] = numerator*term*wx*wy*wz/denominator;
         } else greensfn_6[n++] = 0.0;
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    compute self force coefficients for ad-differentiation scheme
    and Coulomb interaction 
 ------------------------------------------------------------------------- */
 void PPPMDisp::compute_sf_coeff()
 {
   int i,k,l,m,n;
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
   volume = xprd * yprd * zprd_slab;
 
   for (i = 0; i <= 5; i++) sf_coeff[i] = 0.0;
 
   n = 0;
   for (m = nzlo_fft; m <= nzhi_fft; m++) {
     for (l = nylo_fft; l <= nyhi_fft; l++) {
       for (k = nxlo_fft; k <= nxhi_fft; k++) {
         sf_coeff[0] += sf_precoeff1[n]*greensfn[n];
         sf_coeff[1] += sf_precoeff2[n]*greensfn[n];
         sf_coeff[2] += sf_precoeff3[n]*greensfn[n];
         sf_coeff[3] += sf_precoeff4[n]*greensfn[n];
         sf_coeff[4] += sf_precoeff5[n]*greensfn[n];
         sf_coeff[5] += sf_precoeff6[n]*greensfn[n];
         ++n;
       }
     }
   }
 
   // Compute the coefficients for the self-force correction
 
   double prex, prey, prez;
   prex = prey = prez = MY_PI/volume;
   prex *= nx_pppm/xprd;
   prey *= ny_pppm/yprd;
   prez *= nz_pppm/zprd_slab;
   sf_coeff[0] *= prex;
   sf_coeff[1] *= prex*2;
   sf_coeff[2] *= prey;
   sf_coeff[3] *= prey*2;
   sf_coeff[4] *= prez;
   sf_coeff[5] *= prez*2;
 
   // communicate values with other procs
 
   double tmp[6];
   MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world);
   for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n];
 }
 
 /* ----------------------------------------------------------------------
    compute self force coefficients for ad-differentiation scheme
    and Dispersion interaction 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::compute_sf_coeff_6()
 {
   int i,k,l,m,n;
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
   volume = xprd * yprd * zprd_slab;
 
   for (i = 0; i <= 5; i++) sf_coeff_6[i] = 0.0;
 
   n = 0;
   for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
     for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
       for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
         sf_coeff_6[0] += sf_precoeff1_6[n]*greensfn_6[n];
         sf_coeff_6[1] += sf_precoeff2_6[n]*greensfn_6[n];
         sf_coeff_6[2] += sf_precoeff3_6[n]*greensfn_6[n];
         sf_coeff_6[3] += sf_precoeff4_6[n]*greensfn_6[n];
         sf_coeff_6[4] += sf_precoeff5_6[n]*greensfn_6[n];
         sf_coeff_6[5] += sf_precoeff6_6[n]*greensfn_6[n];
         ++n;
       }
     }
   }
 
   
   // perform multiplication with prefactors
   
   double prex, prey, prez;
   prex = prey = prez = MY_PI/volume;
   prex *= nx_pppm_6/xprd;
   prey *= ny_pppm_6/yprd;
   prez *= nz_pppm_6/zprd_slab;
   sf_coeff_6[0] *= prex;
   sf_coeff_6[1] *= prex*2;
   sf_coeff_6[2] *= prey;
   sf_coeff_6[3] *= prey*2;
   sf_coeff_6[4] *= prez;
   sf_coeff_6[5] *= prez*2;
   
   // communicate values with other procs
   
   double tmp[6];
   MPI_Allreduce(sf_coeff_6,tmp,6,MPI_DOUBLE,MPI_SUM,world);
   for (n = 0; n < 6; n++) sf_coeff_6[n] = tmp[n];
 
 }
 
 /* ----------------------------------------------------------------------
    denominator for Hockney-Eastwood Green's function
      of x,y,z = sin(kx*deltax/2), etc
 
             inf                 n-1
    S(n,k) = Sum  W(k+pi*j)**2 = Sum b(l)*(z*z)**l
            j=-inf               l=0
 
           = -(z*z)**n /(2n-1)! * (d/dx)**(2n-1) cot(x)  at z = sin(x)
    gf_b = denominator expansion coeffs 
 ------------------------------------------------------------------------- */
 
 double PPPMDisp::gf_denom(double x, double y, double z, double *g_b, int ord)
 {
   double sx,sy,sz;
   sz = sy = sx = 0.0;
   for (int l = ord-1; l >= 0; l--) {
     sx = g_b[l] + sx*x;
     sy = g_b[l] + sy*y;
     sz = g_b[l] + sz*z;
   }
   double s = sx*sy*sz;
   return s*s;
 }
 
 /* ----------------------------------------------------------------------
    pre-compute Green's function denominator expansion coeffs, Gamma(2n) 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::compute_gf_denom(double* gf, int ord)
 {
   int k,l,m;
   
   for (l = 1; l < ord; l++) gf[l] = 0.0;
   gf[0] = 1.0;
   
   for (m = 1; m < ord; m++) {
     for (l = m; l > 0; l--) 
       gf[l] = 4.0 * (gf[l]*(l-m)*(l-m-0.5)-gf[l-1]*(l-m-1)*(l-m-1));
     gf[0] = 4.0 * (gf[0]*(l-m)*(l-m-0.5));
   }
 
   bigint ifact = 1;
   for (k = 1; k < 2*ord; k++) ifact *= k;
   double gaminv = 1.0/ifact;
   for (l = 0; l < ord; l++) gf[l] *= gaminv;
 }
 
 /* ----------------------------------------------------------------------
    ghost-swap to accumulate full density in brick decomposition 
    remap density from 3d brick decomposition to FFTdecomposition
    for coulomb interaction or dispersion interaction with geometric
    mixing
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::brick2fft(int nxlo_i, int nylo_i, int nzlo_i,
                          int nxhi_i, int nyhi_i, int nzhi_i,
                          FFT_SCALAR*** dbrick, FFT_SCALAR* dfft, FFT_SCALAR* work,
                          LAMMPS_NS::Remap* rmp)
 {
   int n,ix,iy,iz;
 
   // copy grabs inner portion of density from 3d brick
   // remap could be done as pre-stage of FFT,
   //   but this works optimally on only double values, not complex values
 
   n = 0;
   for (iz = nzlo_i; iz <= nzhi_i; iz++)
     for (iy = nylo_i; iy <= nyhi_i; iy++)
       for (ix = nxlo_i; ix <= nxhi_i; ix++)
 	dfft[n++] = dbrick[iz][iy][ix];
 
   rmp->perform(dfft,dfft,work);
 }
 
 
 /* ----------------------------------------------------------------------
    ghost-swap to accumulate full density in brick decomposition 
    remap density from 3d brick decomposition to FFTdecomposition
    for dispersion with arithmetic mixing rule
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::brick2fft_a()
 {
   int n,ix,iy,iz;
 
   // copy grabs inner portion of density from 3d brick
   // remap could be done as pre-stage of FFT,
   //   but this works optimally on only double values, not complex values
 
   n = 0;
   for (iz = nzlo_in_6; iz <= nzhi_in_6; iz++)
     for (iy = nylo_in_6; iy <= nyhi_in_6; iy++)
       for (ix = nxlo_in_6; ix <= nxhi_in_6; ix++) {
         density_fft_a0[n] = density_brick_a0[iz][iy][ix];
         density_fft_a1[n] = density_brick_a1[iz][iy][ix];
         density_fft_a2[n] = density_brick_a2[iz][iy][ix];
         density_fft_a3[n] = density_brick_a3[iz][iy][ix];
         density_fft_a4[n] = density_brick_a4[iz][iy][ix];
         density_fft_a5[n] = density_brick_a5[iz][iy][ix];
         density_fft_a6[n++] = density_brick_a6[iz][iy][ix];
       }
 
   remap_6->perform(density_fft_a0,density_fft_a0,work1_6);
   remap_6->perform(density_fft_a1,density_fft_a1,work1_6);
   remap_6->perform(density_fft_a2,density_fft_a2,work1_6);
   remap_6->perform(density_fft_a3,density_fft_a3,work1_6);
   remap_6->perform(density_fft_a4,density_fft_a4,work1_6);
   remap_6->perform(density_fft_a5,density_fft_a5,work1_6);
   remap_6->perform(density_fft_a6,density_fft_a6,work1_6);
 
 }
 
 /* ----------------------------------------------------------------------
    ghost-swap to accumulate full density in brick decomposition 
    remap density from 3d brick decomposition to FFTdecomposition
    for dispersion with special case
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::brick2fft_none()
 {
   int k,n,ix,iy,iz;
 
   // copy grabs inner portion of density from 3d brick
   // remap could be done as pre-stage of FFT,
   //   but this works optimally on only double values, not complex values
 
   for (k = 0; k<nsplit_alloc; k++) {
     n = 0;
     for (iz = nzlo_in_6; iz <= nzhi_in_6; iz++)
       for (iy = nylo_in_6; iy <= nyhi_in_6; iy++)
         for (ix = nxlo_in_6; ix <= nxhi_in_6; ix++) 
           density_fft_none[k][n++] = density_brick_none[k][iz][iy][ix];
   }
 
   for (k=0; k<nsplit_alloc; k++)
     remap_6->perform(density_fft_none[k],density_fft_none[k],work1_6);
 }
 
 /* ----------------------------------------------------------------------
    find center grid pt for each of my particles
    check that full stencil for the particle will fit in my 3d brick
    store central grid pt indices in part2grid array 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::particle_map(double delx, double dely, double delz,
                              double sft, int** p2g, int nup, int nlow,
                              int nxlo, int nylo, int nzlo,
                              int nxhi, int nyhi, int nzhi)
 {
   int nx,ny,nz;
 
   double **x = atom->x;
   int nlocal = atom->nlocal;
 
   if (!isfinite(boxlo[0]) || !isfinite(boxlo[1]) || !isfinite(boxlo[2]))
     error->one(FLERR,"Non-numeric box dimensions - simulation unstable");
 
   int flag = 0;
   for (int i = 0; i < nlocal; i++) {
     
     // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
     // current particle coord can be outside global and local box
     // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
 
     nx = static_cast<int> ((x[i][0]-boxlo[0])*delx+sft) - OFFSET;
     ny = static_cast<int> ((x[i][1]-boxlo[1])*dely+sft) - OFFSET;
     nz = static_cast<int> ((x[i][2]-boxlo[2])*delz+sft) - OFFSET;
 
     p2g[i][0] = nx;
     p2g[i][1] = ny;
     p2g[i][2] = nz;
 
     // check that entire stencil around nx,ny,nz will fit in my 3d brick
 
     if (nx+nlow < nxlo || nx+nup > nxhi ||
 	ny+nlow < nylo || ny+nup > nyhi ||
 	nz+nlow < nzlo || nz+nup > nzhi)
       flag = 1;
   }
 
   if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPMDisp");
 }
 
 
 void PPPMDisp::particle_map_c(double delx, double dely, double delz,
                                double sft, int** p2g, int nup, int nlow,
                                int nxlo, int nylo, int nzlo,
                                int nxhi, int nyhi, int nzhi)
 {
   particle_map(delx, dely, delz, sft, p2g, nup, nlow,
                nxlo, nylo, nzlo, nxhi, nyhi, nzhi);
 }
 
 /* ----------------------------------------------------------------------
    create discretized "density" on section of global grid due to my particles
    density(x,y,z) = charge "density" at grid points of my 3d brick
    (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
    in global grid 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::make_rho_c()
 {
   int l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
 
   // clear 3d density array
 
   memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0,
 	 ngrid*sizeof(FFT_SCALAR));
 
   // loop over my charges, add their contribution to nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
 
   double *q = atom->q;
   double **x = atom->x;
   int nlocal = atom->nlocal;
 
   for (int i = 0; i < nlocal; i++) {
 
     nx = part2grid[i][0];
     ny = part2grid[i][1];
     nz = part2grid[i][2];
     dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
     dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
     dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
 
     compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
 
     z0 = delvolinv * q[i];
     for (n = nlower; n <= nupper; n++) {
       mz = n+nz;
       y0 = z0*rho1d[2][n];
       for (m = nlower; m <= nupper; m++) {
 	my = m+ny;
 	x0 = y0*rho1d[1][m];
 	for (l = nlower; l <= nupper; l++) {
 	  mx = l+nx;
 	  density_brick[mz][my][mx] += x0*rho1d[0][l];
 	}
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    create discretized "density" on section of global grid due to my particles
    density(x,y,z) = dispersion "density" at grid points of my 3d brick
    (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
    in global grid --- geometric mixing
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::make_rho_g()
 {
   int l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
 
   // clear 3d density array
 
   memset(&(density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
 	 ngrid_6*sizeof(FFT_SCALAR));
 
   // loop over my charges, add their contribution to nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   int type;
   double **x = atom->x;
   int nlocal = atom->nlocal;
 
   for (int i = 0; i < nlocal; i++) {
 
     nx = part2grid_6[i][0];
     ny = part2grid_6[i][1];
     nz = part2grid_6[i][2];
     dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
     dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
     dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
 
     compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
     type = atom->type[i];
     z0 = delvolinv_6 * B[type];
     for (n = nlower_6; n <= nupper_6; n++) {
       mz = n+nz;
       y0 = z0*rho1d_6[2][n];
       for (m = nlower_6; m <= nupper_6; m++) {
 	my = m+ny;
 	x0 = y0*rho1d_6[1][m];
 	for (l = nlower_6; l <= nupper_6; l++) {
 	  mx = l+nx;
 	  density_brick_g[mz][my][mx] += x0*rho1d_6[0][l];
 	}
       }
     }
   }
 }
 
 
 /* ----------------------------------------------------------------------
    create discretized "density" on section of global grid due to my particles
    density(x,y,z) = dispersion "density" at grid points of my 3d brick
    (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
    in global grid --- arithmetic mixing
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::make_rho_a()
 {
   int l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0,w;
 
   // clear 3d density array
 
   memset(&(density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
 	 ngrid_6*sizeof(FFT_SCALAR));
   memset(&(density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
 	 ngrid_6*sizeof(FFT_SCALAR));
   memset(&(density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
 	 ngrid_6*sizeof(FFT_SCALAR));
   memset(&(density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
 	 ngrid_6*sizeof(FFT_SCALAR));
   memset(&(density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
 	 ngrid_6*sizeof(FFT_SCALAR));
   memset(&(density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
 	 ngrid_6*sizeof(FFT_SCALAR));
   memset(&(density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
 	 ngrid_6*sizeof(FFT_SCALAR));
 
   // loop over my particles, add their contribution to nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   int type;
   double **x = atom->x;
   int nlocal = atom->nlocal;
   
   for (int i = 0; i < nlocal; i++) {
 
     //do the following for all 4 grids
     nx = part2grid_6[i][0];
     ny = part2grid_6[i][1];
     nz = part2grid_6[i][2];
     dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
     dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
     dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
     compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
     type = atom->type[i];
     z0 = delvolinv_6;
     for (n = nlower_6; n <= nupper_6; n++) {
       mz = n+nz;
       y0 = z0*rho1d_6[2][n];
       for (m = nlower_6; m <= nupper_6; m++) {
 	my = m+ny;
 	x0 = y0*rho1d_6[1][m];
 	for (l = nlower_6; l <= nupper_6; l++) {
 	  mx = l+nx;
           w = x0*rho1d_6[0][l];
 	  density_brick_a0[mz][my][mx] += w*B[7*type];
 	  density_brick_a1[mz][my][mx] += w*B[7*type+1];
 	  density_brick_a2[mz][my][mx] += w*B[7*type+2];
 	  density_brick_a3[mz][my][mx] += w*B[7*type+3];
 	  density_brick_a4[mz][my][mx] += w*B[7*type+4];
 	  density_brick_a5[mz][my][mx] += w*B[7*type+5];
 	  density_brick_a6[mz][my][mx] += w*B[7*type+6];
 	}
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    create discretized "density" on section of global grid due to my particles
    density(x,y,z) = dispersion "density" at grid points of my 3d brick
    (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
    in global grid --- case when mixing rules don't apply
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::make_rho_none()
 {
   int k,l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0,w;
 
   // clear 3d density array
   for (k = 0; k < nsplit_alloc; k++)
     memset(&(density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
 	   ngrid_6*sizeof(FFT_SCALAR));
 
 
   // loop over my particles, add their contribution to nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   int type;
   double **x = atom->x;
   int nlocal = atom->nlocal;
   
   for (int i = 0; i < nlocal; i++) {
 
     //do the following for all 4 grids
     nx = part2grid_6[i][0];
     ny = part2grid_6[i][1];
     nz = part2grid_6[i][2];
     dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
     dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
     dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
     compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
     type = atom->type[i];
     z0 = delvolinv_6;
     for (n = nlower_6; n <= nupper_6; n++) {
       mz = n+nz;
       y0 = z0*rho1d_6[2][n];
       for (m = nlower_6; m <= nupper_6; m++) {
 	my = m+ny;
 	x0 = y0*rho1d_6[1][m];
 	for (l = nlower_6; l <= nupper_6; l++) {
 	  mx = l+nx;
           w = x0*rho1d_6[0][l];
           for (k = 0; k < nsplit; k++)
 	    density_brick_none[k][mz][my][mx] += w*B[nsplit*type + k];
 	}
       }
     }
   }
 }
 
 
 /* ----------------------------------------------------------------------
    FFT-based Poisson solver for ik differentiation
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::poisson_ik(FFT_SCALAR* wk1, FFT_SCALAR* wk2,
                            FFT_SCALAR* dfft, LAMMPS_NS::FFT3d* ft1,LAMMPS_NS::FFT3d* ft2, 
                            int nx_p, int ny_p, int nz_p, int nft,
                            int nxlo_ft, int nylo_ft, int nzlo_ft,
                            int nxhi_ft, int nyhi_ft, int nzhi_ft,
                            int nxlo_i, int nylo_i, int nzlo_i,
                            int nxhi_i, int nyhi_i, int nzhi_i,
                            double& egy, double* gfn,
                            double* kx, double* ky, double* kz,
                            double* kx2, double* ky2, double* kz2,
                            FFT_SCALAR*** vx_brick, FFT_SCALAR*** vy_brick, FFT_SCALAR*** vz_brick,
                            double* vir, double** vcoeff, double** vcoeff2,
                            FFT_SCALAR*** u_pa, FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa,
                            FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa)
 
 
 {
   int i,j,k,n;
   double eng;
 
   // transform charge/dispersion density (r -> k) 
   n = 0;
   for (i = 0; i < nft; i++) {
     wk1[n++] = dfft[i];
     wk1[n++] = ZEROF;
   }
 
   ft1->compute(wk1,wk1,1);
 
   // if requested, compute energy and virial contribution
 
   double scaleinv = 1.0/(nx_p*ny_p*nz_p);
   double s2 = scaleinv*scaleinv;
 
   if (eflag_global || vflag_global) {
     if (vflag_global) {
       n = 0;
       for (i = 0; i < nft; i++) {
 	eng = s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
 	for (j = 0; j < 6; j++) vir[j] += eng*vcoeff[i][j];
 	if (eflag_global) egy += eng;
 	n += 2;
       }
     } else {
       n = 0;
       for (i = 0; i < nft; i++) {
 	egy += 
 	  s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
 	n += 2;
       }
     }
   }
 
   // scale by 1/total-grid-pts to get rho(k)
   // multiply by Green's function to get V(k)
 
   n = 0;
   for (i = 0; i < nft; i++) {
     wk1[n++] *= scaleinv * gfn[i];
     wk1[n++] *= scaleinv * gfn[i];
   }
 
   // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
   // FFT leaves data in 3d brick decomposition
   // copy it into inner portion of vdx,vdy,vdz arrays
 
   // x & y direction gradient
 
   n = 0;
   for (k = nzlo_ft; k <= nzhi_ft; k++)
     for (j = nylo_ft; j <= nyhi_ft; j++)
       for (i = nxlo_ft; i <= nxhi_ft; i++) {
 	wk2[n] = 0.5*(kx[i]-kx2[i])*wk1[n+1] + 0.5*(ky[j]-ky2[j])*wk1[n];
 	wk2[n+1] = -0.5*(kx[i]-kx2[i])*wk1[n] + 0.5*(ky[j]-ky2[j])*wk1[n+1];
 	n += 2;
       }
 
   ft2->compute(wk2,wk2,-1);
 
   n = 0;
   for (k = nzlo_i; k <= nzhi_i; k++)
     for (j = nylo_i; j <= nyhi_i; j++)
       for (i = nxlo_i; i <= nxhi_i; i++) {
 	vx_brick[k][j][i] = wk2[n++];
 	vy_brick[k][j][i] = wk2[n++];
       }
 
   if (!eflag_atom) {
     // z direction gradient only
 
     n = 0;
     for (k = nzlo_ft; k <= nzhi_ft; k++)
       for (j = nylo_ft; j <= nyhi_ft; j++)
         for (i = nxlo_ft; i <= nxhi_ft; i++) {
 	  wk2[n] = kz[k]*wk1[n+1];
 	  wk2[n+1] = -kz[k]*wk1[n];
 	  n += 2;
         }
 
     ft2->compute(wk2,wk2,-1);
 
 
     n = 0;
     for (k = nzlo_i; k <= nzhi_i; k++)
       for (j = nylo_i; j <= nyhi_i; j++)
         for (i = nxlo_i; i <= nxhi_i; i++) {
 	  vz_brick[k][j][i] = wk2[n];
 	  n += 2;
         }
 
   }
 
   else {
     // z direction gradient & per-atom energy
 
     n = 0;
     for (k = nzlo_ft; k <= nzhi_ft; k++)
       for (j = nylo_ft; j <= nyhi_ft; j++)
         for (i = nxlo_ft; i <= nxhi_ft; i++) {
 	  wk2[n] = 0.5*(kz[k]-kz2[k])*wk1[n+1] - wk1[n+1];
 	  wk2[n+1] = -0.5*(kz[k]-kz2[k])*wk1[n] + wk1[n];
 	  n += 2;
         }
 
     ft2->compute(wk2,wk2,-1);
 
     n = 0;
     for (k = nzlo_i; k <= nzhi_i; k++)
       for (j = nylo_i; j <= nyhi_i; j++)
         for (i = nxlo_i; i <= nxhi_i; i++) {
 	  vz_brick[k][j][i] = wk2[n++];
 	  u_pa[k][j][i] = wk2[n++];;
         }
   }
 
   if (vflag_atom) poisson_peratom(wk1, wk2, ft2, vcoeff, vcoeff2, nft,
                                   nxlo_i, nylo_i, nzlo_i, nxhi_i, nyhi_i, nzhi_i,
                                   v0_pa, v1_pa, v2_pa, v3_pa, v4_pa, v5_pa);
 }
 
 /* ----------------------------------------------------------------------
    FFT-based Poisson solver for ad differentiation
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::poisson_ad(FFT_SCALAR* wk1, FFT_SCALAR* wk2,
                            FFT_SCALAR* dfft, LAMMPS_NS::FFT3d* ft1,LAMMPS_NS::FFT3d* ft2, 
                            int nx_p, int ny_p, int nz_p, int nft,
                            int nxlo_ft, int nylo_ft, int nzlo_ft,
                            int nxhi_ft, int nyhi_ft, int nzhi_ft,
                            int nxlo_i, int nylo_i, int nzlo_i,
                            int nxhi_i, int nyhi_i, int nzhi_i,
                            double& egy, double* gfn,
                            double* vir, double** vcoeff, double** vcoeff2,
                            FFT_SCALAR*** u_pa, FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa,
                            FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa)
 
 
 {
   int i,j,k,n;
   double eng;
 
   // transform charge/dispersion density (r -> k) 
   n = 0;
   for (i = 0; i < nft; i++) {
     wk1[n++] = dfft[i];
     wk1[n++] = ZEROF;
   }
 
   ft1->compute(wk1,wk1,1);
 
   // if requested, compute energy and virial contribution
 
   double scaleinv = 1.0/(nx_p*ny_p*nz_p);
   double s2 = scaleinv*scaleinv;
 
   if (eflag_global || vflag_global) {
     if (vflag_global) {
       n = 0;
       for (i = 0; i < nft; i++) {
 	eng = s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
 	for (j = 0; j < 6; j++) vir[j] += eng*vcoeff[i][j];
 	if (eflag_global) egy += eng;
 	n += 2;
       }
     } else {
       n = 0;
       for (i = 0; i < nft; i++) {
 	egy += 
 	  s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
 	n += 2;
       }
     }
   }
 
   // scale by 1/total-grid-pts to get rho(k)
   // multiply by Green's function to get V(k)
 
   n = 0;
   for (i = 0; i < nft; i++) {
     wk1[n++] *= scaleinv * gfn[i];
     wk1[n++] *= scaleinv * gfn[i];
   }
 
 
   n = 0;
   for (k = nzlo_ft; k <= nzhi_ft; k++)
     for (j = nylo_ft; j <= nyhi_ft; j++)
       for (i = nxlo_ft; i <= nxhi_ft; i++) {
         wk2[n] = wk1[n];
 	wk2[n+1] = wk1[n+1];
 	n += 2;
       }
 
   ft2->compute(wk2,wk2,-1);
 
 
   n = 0;
   for (k = nzlo_i; k <= nzhi_i; k++)
     for (j = nylo_i; j <= nyhi_i; j++)
       for (i = nxlo_i; i <= nxhi_i; i++) {
 	u_pa[k][j][i] = wk2[n++];
         n++;
       }
 
 
   if (vflag_atom) poisson_peratom(wk1, wk2, ft2, vcoeff, vcoeff2, nft,
                                   nxlo_i, nylo_i, nzlo_i, nxhi_i, nyhi_i, nzhi_i,
                                   v0_pa, v1_pa, v2_pa, v3_pa, v4_pa, v5_pa);
 
 }
 
 /* ----------------------------------------------------------------------
    Fourier Transform for per atom virial calculations
 ------------------------------------------------------------------------- */
 
 void PPPMDisp:: poisson_peratom(FFT_SCALAR* wk1, FFT_SCALAR* wk2, LAMMPS_NS::FFT3d* ft2, 
                                  double** vcoeff, double** vcoeff2, int nft,
                                  int nxlo_i, int nylo_i, int nzlo_i,
                                  int nxhi_i, int nyhi_i, int nzhi_i,
                                  FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa,
                                  FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa)
 {
  //v0 & v1 term
   int n, i, j, k;
   n = 0;
   for (i = 0; i < nft; i++) {
     wk2[n] = wk1[n]*vcoeff[i][0] - wk1[n+1]*vcoeff[i][1];
     wk2[n+1] = wk1[n+1]*vcoeff[i][0] +  wk1[n]*vcoeff[i][1];
     n += 2;
   }
 
   ft2->compute(wk2,wk2,-1); 
 
   n = 0;
   for (k = nzlo_i; k <= nzhi_i; k++)
     for (j = nylo_i; j <= nyhi_i; j++)
       for (i = nxlo_i; i <= nxhi_i; i++) {
         v0_pa[k][j][i] = wk2[n++];
         v1_pa[k][j][i] = wk2[n++];
       }
 
   //v2 & v3 term
    
   n = 0;
   for (i = 0; i < nft; i++) {
     wk2[n] = wk1[n]*vcoeff[i][2] - wk1[n+1]*vcoeff2[i][0];
     wk2[n+1] = wk1[n+1]*vcoeff[i][2] + wk1[n]*vcoeff2[i][0];
     n += 2;
   }
 
   ft2->compute(wk2,wk2,-1); 
 
   n = 0;
   for (k = nzlo_i; k <= nzhi_i; k++)
     for (j = nylo_i; j <= nyhi_i; j++)
       for (i = nxlo_i; i <= nxhi_i; i++) {
         v2_pa[k][j][i] = wk2[n++];
         v3_pa[k][j][i] = wk2[n++];
       }
 
   //v4 & v5 term
    
   n = 0;
   for (i = 0; i < nft; i++) {
     wk2[n] = wk1[n]*vcoeff2[i][1] - wk1[n+1]*vcoeff2[i][2];
     wk2[n+1] = wk1[n+1]*vcoeff2[i][1] + wk1[n]*vcoeff2[i][2];
     n += 2;
   }
 
   ft2->compute(wk2,wk2,-1); 
 
   n = 0;
   for (k = nzlo_i; k <= nzhi_i; k++)
     for (j = nylo_i; j <= nyhi_i; j++)
       for (i = nxlo_i; i <= nxhi_i; i++) {
         v4_pa[k][j][i] = wk2[n++];
         v5_pa[k][j][i] = wk2[n++];
       }	 
  
 }
 
 /* ----------------------------------------------------------------------
    Poisson solver for one mesh with 2 different dispersion densities 
    for ik scheme
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::poisson_2s_ik(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
                               FFT_SCALAR*** vxbrick_1, FFT_SCALAR*** vybrick_1, FFT_SCALAR*** vzbrick_1,
                               FFT_SCALAR*** vxbrick_2, FFT_SCALAR*** vybrick_2, FFT_SCALAR*** vzbrick_2,
                               FFT_SCALAR*** u_pa_1, FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
                               FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
                               FFT_SCALAR*** u_pa_2, FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
                               FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
 
 {
   int i,j,k,n;
   double eng;
 
   double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
 
   // transform charge/dispersion density (r -> k)
   // only one tansform required when energies and pressures do not
   //  need to be calculated 
   if (eflag_global + vflag_global == 0) {
     n = 0;
     for (i = 0; i < nfft_6; i++) {
       work1_6[n++] = dfft_1[i];
       work1_6[n++] = dfft_2[i];
     }
   
     fft1_6->compute(work1_6,work1_6,1);
   }
   // two transforms are required when energies and pressures are
   //   calculated
   else {
     n = 0;
     for (i = 0; i < nfft_6; i++) {
       work1_6[n] = dfft_1[i];
       work2_6[n++] = ZEROF;
       work1_6[n] = ZEROF;
       work2_6[n++] = dfft_2[i];
     }
 
     fft1_6->compute(work1_6,work1_6,1);
     fft1_6->compute(work2_6,work2_6,1);
 
     double s2 = scaleinv*scaleinv;
 
     if (vflag_global) {
       n = 0;
       for (i = 0; i < nfft_6; i++) {
 	eng = 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]);
 	for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
 	if (eflag_global)energy_6 += eng;
 	n += 2;
       }
     } else {
       n = 0;
       for (i = 0; i < nfft_6; i++) {
 	energy_6 += 
 	  2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]);
 	n += 2;
       }
     }
     // unify the two transformed vectors for efficient calculations later
     for ( i = 0; i < 2*nfft_6; i++) {
       work1_6[i] += work2_6[i];
     }
   }
 
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work1_6[n++] *= scaleinv * greensfn_6[i];
     work1_6[n++] *= scaleinv * greensfn_6[i];
   }
 
   // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
   // FFT leaves data in 3d brick decomposition
   // copy it into inner portion of vdx,vdy,vdz arrays
 
   // x direction gradient
 
   n = 0;
   for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
     for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
       for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
 	work2_6[n] = 0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n+1];
 	work2_6[n+1] = -0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n];
 	n += 2;
       }
 
   fft2_6->compute(work2_6,work2_6,-1);
   
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
 	vxbrick_1[k][j][i] = work2_6[n++];
         vxbrick_2[k][j][i] = work2_6[n++];
       }
 
   // y direction gradient
 
   n = 0;
   for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
     for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
       for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
 	work2_6[n] = 0.5*(fky_6[j]-fky2_6[j])*work1_6[n+1];
 	work2_6[n+1] = -0.5*(fky_6[j]-fky2_6[j])*work1_6[n];
 	n += 2;
       }
 
   fft2_6->compute(work2_6,work2_6,-1);
 
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
 	vybrick_1[k][j][i] = work2_6[n++];
         vybrick_2[k][j][i] = work2_6[n++];
       }
 
   // z direction gradient
 
   n = 0;
   for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
     for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
       for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
 	work2_6[n] = 0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n+1];
 	work2_6[n+1] = -0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n];
 	n += 2;
       }
 
   fft2_6->compute(work2_6,work2_6,-1);
 
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
 	vzbrick_1[k][j][i] = work2_6[n++];
 	vzbrick_2[k][j][i] = work2_6[n++];
       }
 
   //Per-atom energy
     
   if (eflag_atom) {
     n = 0;
     for (i = 0; i < nfft_6; i++) {
       work2_6[n] = work1_6[n];
       work2_6[n+1] = work1_6[n+1];
       n += 2;
     }
     
     fft2_6->compute(work2_6,work2_6,-1); 
     
     n = 0;
     for (k = nzlo_in_6; k <= nzhi_in_6; k++)
       for (j = nylo_in_6; j <= nyhi_in_6; j++)
         for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
           u_pa_1[k][j][i] = work2_6[n++];
           u_pa_2[k][j][i] = work2_6[n++];
         }
   } 
 
   if (vflag_atom) poisson_2s_peratom(v0_pa_1, v1_pa_1, v2_pa_1, v3_pa_1, v4_pa_1, v5_pa_1,
                                      v0_pa_2, v1_pa_2, v2_pa_2, v3_pa_2, v4_pa_2, v5_pa_2);
 }
 
 
 /* ----------------------------------------------------------------------
    Poisson solver for one mesh with 2 different dispersion densities 
    for ik scheme
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::poisson_none_ik(int n1, int n2,FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
                               FFT_SCALAR*** vxbrick_1, FFT_SCALAR*** vybrick_1, FFT_SCALAR*** vzbrick_1,
                               FFT_SCALAR*** vxbrick_2, FFT_SCALAR*** vybrick_2, FFT_SCALAR*** vzbrick_2,
                               FFT_SCALAR**** u_pa, FFT_SCALAR**** v0_pa, FFT_SCALAR**** v1_pa, FFT_SCALAR**** v2_pa,
                               FFT_SCALAR**** v3_pa, FFT_SCALAR**** v4_pa, FFT_SCALAR**** v5_pa)
 {
   int i,j,k,n;
   double eng;
 
   double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
 
   // transform charge/dispersion density (r -> k)
   // only one tansform required when energies and pressures do not
   //  need to be calculated 
   if (eflag_global + vflag_global == 0) {
     n = 0;
     for (i = 0; i < nfft_6; i++) {
       work1_6[n++] = dfft_1[i];
       work1_6[n++] = dfft_2[i];
     }
   
     fft1_6->compute(work1_6,work1_6,1);
   }
 
 
   // two transforms are required when energies and pressures are
   //   calculated
   else {
     n = 0;
     for (i = 0; i < nfft_6; i++) {
       work1_6[n] = dfft_1[i];
       work2_6[n++] = ZEROF;
       work1_6[n] = ZEROF;
       work2_6[n++] = dfft_2[i];
     }
    
 
     fft1_6->compute(work1_6,work1_6,1);
     fft1_6->compute(work2_6,work2_6,1);
 
     double s2 = scaleinv*scaleinv;
 
     if (vflag_global) {
       n = 0;
       for (i = 0; i < nfft_6; i++) {
 	eng = s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1]));
 	for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
 	if (eflag_global)energy_6 += eng;
 	n += 2;
       }
     } else {
       n = 0;
       for (i = 0; i < nfft_6; i++) {
 	energy_6 += 
 	  s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1]));
 	n += 2;
       }
     }
     // unify the two transformed vectors for efficient calculations later
     for ( i = 0; i < 2*nfft_6; i++) {
       work1_6[i] += work2_6[i];
     }
   }
 
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work1_6[n++] *= scaleinv * greensfn_6[i];
     work1_6[n++] *= scaleinv * greensfn_6[i];
   }
 
   // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
   // FFT leaves data in 3d brick decomposition
   // copy it into inner portion of vdx,vdy,vdz arrays
 
   // x direction gradient
 
   n = 0;
   for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
     for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
       for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
 	work2_6[n] = 0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n+1];
 	work2_6[n+1] = -0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n];
 	n += 2;
       }
 
   fft2_6->compute(work2_6,work2_6,-1);
   
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
 	vxbrick_1[k][j][i] = B[n1]*work2_6[n++];
         vxbrick_2[k][j][i] = B[n2]*work2_6[n++];
       }
 
   // y direction gradient
 
   n = 0;
   for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
     for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
       for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
 	work2_6[n] = 0.5*(fky_6[j]-fky2_6[j])*work1_6[n+1];
 	work2_6[n+1] = -0.5*(fky_6[j]-fky2_6[j])*work1_6[n];
 	n += 2;
       }
 
   fft2_6->compute(work2_6,work2_6,-1);
 
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
 	vybrick_1[k][j][i] = B[n1]*work2_6[n++];
         vybrick_2[k][j][i] = B[n2]*work2_6[n++];
       }
 
   // z direction gradient
 
   n = 0;
   for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
     for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
       for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
 	work2_6[n] = 0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n+1];
 	work2_6[n+1] = -0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n];
 	n += 2;
       }
 
   fft2_6->compute(work2_6,work2_6,-1);
 
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
 	vzbrick_1[k][j][i] = B[n1]*work2_6[n++];
 	vzbrick_2[k][j][i] = B[n2]*work2_6[n++];
       }
 
   //Per-atom energy
     
   if (eflag_atom) {
     n = 0;
     for (i = 0; i < nfft_6; i++) {
       work2_6[n] = work1_6[n];
       work2_6[n+1] = work1_6[n+1];
       n += 2;
     }
     
     fft2_6->compute(work2_6,work2_6,-1); 
     
     n = 0;
     for (k = nzlo_in_6; k <= nzhi_in_6; k++)
       for (j = nylo_in_6; j <= nyhi_in_6; j++)
         for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
           u_pa[n1][k][j][i] = B[n1]*work2_6[n++];
           u_pa[n2][k][j][i] = B[n2]*work2_6[n++];
         }
   } 
 
   if (vflag_atom) poisson_none_peratom(n1,n2,
                                        v0_pa[n1], v1_pa[n1], v2_pa[n1], v3_pa[n1], v4_pa[n1], v5_pa[n1],
                                        v0_pa[n2], v1_pa[n2], v2_pa[n2], v3_pa[n2], v4_pa[n2], v5_pa[n2]);
 }
 
 /* ----------------------------------------------------------------------
    Poisson solver for one mesh with 2 different dispersion densities 
    for ad scheme
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::poisson_2s_ad(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
                               FFT_SCALAR*** u_pa_1, FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
                               FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
                               FFT_SCALAR*** u_pa_2, FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
                               FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
 
 {
   int i,j,k,n;
   double eng;
 
   double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
 
   // transform charge/dispersion density (r -> k)
   // only one tansform required when energies and pressures do not
   //  need to be calculated 
   if (eflag_global + vflag_global == 0) {
     n = 0;
     for (i = 0; i < nfft_6; i++) {
       work1_6[n++] = dfft_1[i];
       work1_6[n++] = dfft_2[i];
     }
   
     fft1_6->compute(work1_6,work1_6,1);
   }
   // two transforms are required when energies and pressures are
   //   calculated
   else {
     n = 0;
     for (i = 0; i < nfft_6; i++) {
       work1_6[n] = dfft_1[i];
       work2_6[n++] = ZEROF;
       work1_6[n] = ZEROF;
       work2_6[n++] = dfft_2[i];
     }
 
     fft1_6->compute(work1_6,work1_6,1);
     fft1_6->compute(work2_6,work2_6,1);
 
     double s2 = scaleinv*scaleinv;
 
     if (vflag_global) {
       n = 0;
       for (i = 0; i < nfft_6; i++) {
 	eng = 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]);
 	for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
 	if (eflag_global)energy_6 += eng;
 	n += 2;
       }
     } else {
       n = 0;
       for (i = 0; i < nfft_6; i++) {
 	energy_6 += 
 	  2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]);
 	n += 2;
       }
     }
     // unify the two transformed vectors for efficient calculations later
     for ( i = 0; i < 2*nfft_6; i++) {
       work1_6[i] += work2_6[i];
     }
   }
 
 
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work1_6[n++] *= scaleinv * greensfn_6[i];
     work1_6[n++] *= scaleinv * greensfn_6[i];
   }
 
 
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work2_6[n] = work1_6[n];
     work2_6[n+1] = work1_6[n+1];
     n += 2;
   }
     
   fft2_6->compute(work2_6,work2_6,-1); 
     
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
         u_pa_1[k][j][i] = work2_6[n++];
         u_pa_2[k][j][i] = work2_6[n++];
       } 
 
   if (vflag_atom) poisson_2s_peratom(v0_pa_1, v1_pa_1, v2_pa_1, v3_pa_1, v4_pa_1, v5_pa_1,
                                      v0_pa_2, v1_pa_2, v2_pa_2, v3_pa_2, v4_pa_2, v5_pa_2);
 }
 
 /* ----------------------------------------------------------------------
    Poisson solver for one mesh with 2 different dispersion densities 
    for ad scheme
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::poisson_none_ad(int n1, int n2, FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
                                FFT_SCALAR*** u_pa_1, FFT_SCALAR*** u_pa_2,
                                FFT_SCALAR**** v0_pa, FFT_SCALAR**** v1_pa, FFT_SCALAR**** v2_pa,
                                FFT_SCALAR**** v3_pa, FFT_SCALAR**** v4_pa, FFT_SCALAR**** v5_pa)
 {
   int i,j,k,n;
   double eng;
 
   double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
 
   // transform charge/dispersion density (r -> k)
   // only one tansform required when energies and pressures do not
   //  need to be calculated 
   if (eflag_global + vflag_global == 0) {
     n = 0;
     for (i = 0; i < nfft_6; i++) {
       work1_6[n++] = dfft_1[i];
       work1_6[n++] = dfft_2[i];
     }
   
     fft1_6->compute(work1_6,work1_6,1);
   }
   // two transforms are required when energies and pressures are
   //   calculated
   else {
     n = 0;
     for (i = 0; i < nfft_6; i++) {
       work1_6[n] = dfft_1[i];
       work2_6[n++] = ZEROF;
       work1_6[n] = ZEROF;
       work2_6[n++] = dfft_2[i];
     }
 
     fft1_6->compute(work1_6,work1_6,1);
     fft1_6->compute(work2_6,work2_6,1);
 
     double s2 = scaleinv*scaleinv;
 
     if (vflag_global) {
       n = 0;
       for (i = 0; i < nfft_6; i++) {
 	eng = s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1]));
 	for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
 	if (eflag_global)energy_6 += eng;
 	n += 2;
       }
     } else {
       n = 0;
       for (i = 0; i < nfft_6; i++) {
 	energy_6 += 
 	  s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1]));
 	n += 2;
       }
     }
     // unify the two transformed vectors for efficient calculations later
     for ( i = 0; i < 2*nfft_6; i++) {
       work1_6[i] += work2_6[i];
     }
   }
 
 
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work1_6[n++] *= scaleinv * greensfn_6[i];
     work1_6[n++] *= scaleinv * greensfn_6[i];
   }
 
 
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work2_6[n] = work1_6[n];
     work2_6[n+1] = work1_6[n+1];
     n += 2;
   }
   
   fft2_6->compute(work2_6,work2_6,-1); 
     
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
         u_pa_1[k][j][i] = B[n1]*work2_6[n++];
         u_pa_2[k][j][i] = B[n2]*work2_6[n++];
       } 
 
   if (vflag_atom) poisson_none_peratom(n1,n2,
                                        v0_pa[n1], v1_pa[n1], v2_pa[n1], v3_pa[n1], v4_pa[n1], v5_pa[n1],
                                        v0_pa[n2], v1_pa[n2], v2_pa[n2], v3_pa[n2], v4_pa[n2], v5_pa[n2]);
 }
 
 /* ----------------------------------------------------------------------
    Fourier Transform for per atom virial calculations
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::poisson_2s_peratom(FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
                                    FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
                                    FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
                                    FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
 {
   //Compute first virial term v0
   int n, i, j, k;
 
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work2_6[n] = work1_6[n]*vg_6[i][0];
     work2_6[n+1] = work1_6[n+1]*vg_6[i][0];
     n += 2;
   }
    
   fft2_6->compute(work2_6,work2_6,-1); 
     
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
         v0_pa_1[k][j][i] = work2_6[n++];
         v0_pa_2[k][j][i] = work2_6[n++];
       }
 	 
   //Compute second virial term v1  
   
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work2_6[n] = work1_6[n]*vg_6[i][1];
     work2_6[n+1] = work1_6[n+1]*vg_6[i][1];
     n += 2;
   }
     
   fft2_6->compute(work2_6,work2_6,-1); 
   
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
         v1_pa_1[k][j][i] = work2_6[n++];
         v1_pa_2[k][j][i] = work2_6[n++];
       }
 	  
   //Compute third virial term v2
    
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work2_6[n] = work1_6[n]*vg_6[i][2];
     work2_6[n+1] = work1_6[n+1]*vg_6[i][2];
     n += 2;
   }
     
   fft2_6->compute(work2_6,work2_6,-1); 
     
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
         v2_pa_1[k][j][i] = work2_6[n++];
         v2_pa_2[k][j][i] = work2_6[n++];
       }
 
   //Compute fourth virial term v3
    
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work2_6[n] = work1_6[n]*vg2_6[i][0];
     work2_6[n+1] = work1_6[n+1]*vg2_6[i][0];
     n += 2;
   }
     
   fft2_6->compute(work2_6,work2_6,-1); 
     
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
         v3_pa_1[k][j][i] = work2_6[n++];
         v3_pa_2[k][j][i] = work2_6[n++];
       }
 
   //Compute fifth virial term v4
    
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work2_6[n] = work1_6[n]*vg2_6[i][1];
     work2_6[n+1] = work1_6[n+1]*vg2_6[i][1];
     n += 2;
   }
     
   fft2_6->compute(work2_6,work2_6,-1); 
     
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
         v4_pa_1[k][j][i] = work2_6[n++];
         v4_pa_2[k][j][i] = work2_6[n++];
       }
    
   //Compute last virial term v5
    
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work2_6[n] = work1_6[n]*vg2_6[i][2];
     work2_6[n+1] = work1_6[n+1]*vg2_6[i][2];
     n += 2;
   }
     
   fft2_6->compute(work2_6,work2_6,-1); 
     
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
         v5_pa_1[k][j][i] = work2_6[n++];
         v5_pa_2[k][j][i] = work2_6[n++];
       }
 }
 
 /* ----------------------------------------------------------------------
    Fourier Transform for per atom virial calculations
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::poisson_none_peratom(int n1, int n2,                              
                                  FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
                                  FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
                                  FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
                                  FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
 {
   //Compute first virial term v0
   int n, i, j, k;
 
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work2_6[n] = work1_6[n]*vg_6[i][0];
     work2_6[n+1] = work1_6[n+1]*vg_6[i][0];
     n += 2;
   }
    
   fft2_6->compute(work2_6,work2_6,-1); 
     
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
         v0_pa_1[k][j][i] = B[n1]*work2_6[n++];
         v0_pa_2[k][j][i] = B[n2]*work2_6[n++];
       }
 	 
   //Compute second virial term v1  
   
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work2_6[n] = work1_6[n]*vg_6[i][1];
     work2_6[n+1] = work1_6[n+1]*vg_6[i][1];
     n += 2;
   }
     
   fft2_6->compute(work2_6,work2_6,-1); 
   
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
         v1_pa_1[k][j][i] = B[n1]*work2_6[n++];
         v1_pa_2[k][j][i] = B[n2]*work2_6[n++];
       }
 	  
   //Compute third virial term v2
    
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work2_6[n] = work1_6[n]*vg_6[i][2];
     work2_6[n+1] = work1_6[n+1]*vg_6[i][2];
     n += 2;
   }
     
   fft2_6->compute(work2_6,work2_6,-1); 
     
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
         v2_pa_1[k][j][i] = B[n1]*work2_6[n++];
         v2_pa_2[k][j][i] = B[n2]*work2_6[n++];
       }
 
   //Compute fourth virial term v3
    
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work2_6[n] = work1_6[n]*vg2_6[i][0];
     work2_6[n+1] = work1_6[n+1]*vg2_6[i][0];
     n += 2;
   }
     
   fft2_6->compute(work2_6,work2_6,-1); 
     
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
         v3_pa_1[k][j][i] = B[n1]*work2_6[n++];
         v3_pa_2[k][j][i] = B[n2]*work2_6[n++];
       }
 
   //Compute fifth virial term v4
    
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work2_6[n] = work1_6[n]*vg2_6[i][1];
     work2_6[n+1] = work1_6[n+1]*vg2_6[i][1];
     n += 2;
   }
     
   fft2_6->compute(work2_6,work2_6,-1); 
     
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
         v4_pa_1[k][j][i] = B[n1]*work2_6[n++];
         v4_pa_2[k][j][i] = B[n2]*work2_6[n++];
       }
    
   //Compute last virial term v5
    
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work2_6[n] = work1_6[n]*vg2_6[i][2];
     work2_6[n+1] = work1_6[n+1]*vg2_6[i][2];
     n += 2;
   }
     
   fft2_6->compute(work2_6,work2_6,-1); 
     
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
         v5_pa_1[k][j][i] = B[n1]*work2_6[n++];
         v5_pa_2[k][j][i] = B[n2]*work2_6[n++];
       }
 }
  
 /* ----------------------------------------------------------------------
    interpolate from grid to get electric field & force on my particles 
    for ik scheme
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::fieldforce_c_ik()
 {
   int i,l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
   FFT_SCALAR ekx,eky,ekz;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of E-field on particle
 
   double *q = atom->q;
   double **x = atom->x;
   double **f = atom->f;
 
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
     nx = part2grid[i][0];
     ny = part2grid[i][1];
     nz = part2grid[i][2];
     dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
     dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
     dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
 
     compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
 
     ekx = eky = ekz = ZEROF;
     for (n = nlower; n <= nupper; n++) {
       mz = n+nz;
       z0 = rho1d[2][n];
       for (m = nlower; m <= nupper; m++) {
 	my = m+ny;
 	y0 = z0*rho1d[1][m];
 	for (l = nlower; l <= nupper; l++) {
 	  mx = l+nx;
 	  x0 = y0*rho1d[0][l];
 	  ekx -= x0*vdx_brick[mz][my][mx];
 	  eky -= x0*vdy_brick[mz][my][mx];
 	  ekz -= x0*vdz_brick[mz][my][mx];
 	}
       }
     }
 
     // convert E-field to force
 
     const double qfactor = force->qqrd2e * scale * q[i];
     f[i][0] += qfactor*ekx;
     f[i][1] += qfactor*eky;
     if (slabflag != 2) f[i][2] += qfactor*ekz;
   }
 }
 /* ----------------------------------------------------------------------
    interpolate from grid to get electric field & force on my particles
    for ad scheme 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::fieldforce_c_ad()
 {
   int i,l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz;
   FFT_SCALAR ekx,eky,ekz;
   double s1,s2,s3;
   double sf = 0.0;
 
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
 
   double hx_inv = nx_pppm/xprd;
   double hy_inv = ny_pppm/yprd;
   double hz_inv = nz_pppm/zprd_slab;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of E-field on particle
 
   double *q = atom->q;
   double **x = atom->x;
   double **f = atom->f;
 
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
     nx = part2grid[i][0];
     ny = part2grid[i][1];
     nz = part2grid[i][2];
     dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
     dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
     dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
 
     compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
     compute_drho1d(dx,dy,dz, order, drho_coeff, drho1d);
 
     ekx = eky = ekz = ZEROF;
     for (n = nlower; n <= nupper; n++) {
       mz = n+nz;
       for (m = nlower; m <= nupper; m++) {
         my = m+ny;
         for (l = nlower; l <= nupper; l++) {
           mx = l+nx;
           ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
           eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
           ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx];
         }
       }
     }
     ekx *= hx_inv;
     eky *= hy_inv;
     ekz *= hz_inv;
     // convert E-field to force and substract self forces
     const double qfactor = force->qqrd2e * scale;
 
     s1 = x[i][0]*hx_inv;
     s2 = x[i][1]*hy_inv;
     s3 = x[i][2]*hz_inv;
     sf = sf_coeff[0]*sin(2*MY_PI*s1);
     sf += sf_coeff[1]*sin(4*MY_PI*s1);
     sf *= 2*q[i]*q[i];
     f[i][0] += qfactor*(ekx*q[i] - sf);
 
     sf = sf_coeff[2]*sin(2*MY_PI*s2);
     sf += sf_coeff[3]*sin(4*MY_PI*s2);
     sf *= 2*q[i]*q[i];
     f[i][1] += qfactor*(eky*q[i] - sf);
 
 
     sf = sf_coeff[4]*sin(2*MY_PI*s3);
     sf += sf_coeff[5]*sin(4*MY_PI*s3);
     sf *= 2*q[i]*q[i];
     if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf);
   }
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get electric field & force on my particles 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::fieldforce_c_peratom()
 {
   int i,l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
   FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of E-field on particle
 
   double *q = atom->q;
   double **x = atom->x;
 
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
     nx = part2grid[i][0];
     ny = part2grid[i][1];
     nz = part2grid[i][2];
     dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
     dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
     dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
 
     compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
 
     u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
     for (n = nlower; n <= nupper; n++) {
       mz = n+nz;
       z0 = rho1d[2][n];
       for (m = nlower; m <= nupper; m++) {
 	my = m+ny;
 	y0 = z0*rho1d[1][m];
 	for (l = nlower; l <= nupper; l++) {
 	  mx = l+nx;
 	  x0 = y0*rho1d[0][l];
 	  if (eflag_atom) u_pa += x0*u_brick[mz][my][mx];	
 	  if (vflag_atom) {
             v0 += x0*v0_brick[mz][my][mx];
             v1 += x0*v1_brick[mz][my][mx];
             v2 += x0*v2_brick[mz][my][mx];
             v3 += x0*v3_brick[mz][my][mx];
             v4 += x0*v4_brick[mz][my][mx];
             v5 += x0*v5_brick[mz][my][mx];
           }
 	}
       }
     }
 
     // convert E-field to force
 
     const double qfactor = 0.5*force->qqrd2e * scale * q[i];
 
     if (eflag_atom) eatom[i] += u_pa*qfactor;
     if (vflag_atom) {
       vatom[i][0] += v0*qfactor;
       vatom[i][1] += v1*qfactor;
       vatom[i][2] += v2*qfactor;
       vatom[i][3] += v3*qfactor;
       vatom[i][4] += v4*qfactor;
       vatom[i][5] += v5*qfactor;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get dispersion field & force on my particles
    for geometric mixing rule 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::fieldforce_g_ik()
 {
   int i,l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
   FFT_SCALAR ekx,eky,ekz;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of dispersion field on particle
 
   double **x = atom->x;
   double **f = atom->f;
   int type;
   double lj;
 
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
     nx = part2grid_6[i][0];
     ny = part2grid_6[i][1];
     nz = part2grid_6[i][2];
     dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
     dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
     dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
 
     compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
 
     ekx = eky = ekz = ZEROF;
     for (n = nlower_6; n <= nupper_6; n++) {
       mz = n+nz;
       z0 = rho1d_6[2][n];
       for (m = nlower_6; m <= nupper_6; m++) {
 	my = m+ny;
 	y0 = z0*rho1d_6[1][m];
 	for (l = nlower_6; l <= nupper_6; l++) {
 	  mx = l+nx;
 	  x0 = y0*rho1d_6[0][l];
 	  ekx -= x0*vdx_brick_g[mz][my][mx];
 	  eky -= x0*vdy_brick_g[mz][my][mx];
 	  ekz -= x0*vdz_brick_g[mz][my][mx];
 	}
       }
     }
 
     // convert E-field to force
     type = atom->type[i];
     lj = B[type];
     f[i][0] += lj*ekx;
     f[i][1] += lj*eky;
     if (slabflag != 2) f[i][2] += lj*ekz;
   }
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get dispersion field & force on my particles
    for geometric mixing rule for ad scheme
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::fieldforce_g_ad()
 {
   int i,l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz;
   FFT_SCALAR ekx,eky,ekz;
   double s1,s2,s3;
   double sf = 0.0;
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
 
   double hx_inv = nx_pppm_6/xprd;
   double hy_inv = ny_pppm_6/yprd;
   double hz_inv = nz_pppm_6/zprd_slab;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of dispersion field on particle
 
   double **x = atom->x;
   double **f = atom->f;
   int type;
   double lj;
 
   int nlocal = atom->nlocal;
 
  
   for (i = 0; i < nlocal; i++) {
     nx = part2grid_6[i][0];
     ny = part2grid_6[i][1];
     nz = part2grid_6[i][2];
     dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
     dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
     dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
 
     compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
     compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6);
 
 
     ekx = eky = ekz = ZEROF;
     for (n = nlower_6; n <= nupper_6; n++) {
       mz = n+nz;
       for (m = nlower_6; m <= nupper_6; m++) {
         my = m+ny;
         for (l = nlower_6; l <= nupper_6; l++) {
           mx = l+nx;
           ekx += drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n]*u_brick_g[mz][my][mx];
           eky += rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n]*u_brick_g[mz][my][mx];
           ekz += rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n]*u_brick_g[mz][my][mx];
         }
       }
     }
     ekx *= hx_inv;
     eky *= hy_inv;
     ekz *= hz_inv;
 
     // convert E-field to force
     type = atom->type[i];
     lj = B[type];
 
     s1 = x[i][0]*hx_inv;
     s2 = x[i][1]*hy_inv;
     s3 = x[i][2]*hz_inv;
 
     sf = sf_coeff_6[0]*sin(2*MY_PI*s1);
     sf += sf_coeff_6[1]*sin(4*MY_PI*s1);
     sf *= 2*lj*lj;
     f[i][0] += ekx*lj - sf;
 
     sf = sf_coeff_6[2]*sin(2*MY_PI*s2);
     sf += sf_coeff_6[3]*sin(4*MY_PI*s2);
     sf *= 2*lj*lj;
     f[i][1] += eky*lj - sf;
 
 
     sf = sf_coeff_6[4]*sin(2*MY_PI*s3);
     sf += sf_coeff_6[5]*sin(4*MY_PI*s3);
     sf *= 2*lj*lj;
     if (slabflag != 2) f[i][2] += ekz*lj - sf;
 
   }
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get dispersion field & force on my particles
    for geometric mixing rule for per atom quantities
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::fieldforce_g_peratom()
 {
   int i,l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
   FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of dispersion field on particle
 
   double **x = atom->x;
   int type;
   double lj;
 
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
     nx = part2grid_6[i][0];
     ny = part2grid_6[i][1];
     nz = part2grid_6[i][2];
     dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
     dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
     dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
 
     compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
 
     u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
     for (n = nlower_6; n <= nupper_6; n++) {
       mz = n+nz;
       z0 = rho1d_6[2][n];
       for (m = nlower_6; m <= nupper_6; m++) {
 	my = m+ny;
 	y0 = z0*rho1d_6[1][m];
 	for (l = nlower_6; l <= nupper_6; l++) {
 	  mx = l+nx;
 	  x0 = y0*rho1d_6[0][l];
 	  if (eflag_atom) u_pa += x0*u_brick_g[mz][my][mx];	
 	  if (vflag_atom) {
             v0 += x0*v0_brick_g[mz][my][mx];
             v1 += x0*v1_brick_g[mz][my][mx];
             v2 += x0*v2_brick_g[mz][my][mx];
             v3 += x0*v3_brick_g[mz][my][mx];
             v4 += x0*v4_brick_g[mz][my][mx];
             v5 += x0*v5_brick_g[mz][my][mx];
           }
 	}
       }
     }
 
     // convert E-field to force
     type = atom->type[i];
     lj = B[type]*0.5;
 
     if (eflag_atom) eatom[i] += u_pa*lj;
     if (vflag_atom) {
       vatom[i][0] += v0*lj;
       vatom[i][1] += v1*lj;
       vatom[i][2] += v2*lj;
       vatom[i][3] += v3*lj;
       vatom[i][4] += v4*lj;
       vatom[i][5] += v5*lj;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get dispersion field & force on my particles
    for arithmetic mixing rule and ik scheme
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::fieldforce_a_ik()
 {
   int i,l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
   FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2;
   FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5;
   FFT_SCALAR ekx6, eky6, ekz6;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of dispersion field on particle
 
   double **x = atom->x;
   double **f = atom->f;
   int type;
   double lj0, lj1, lj2, lj3, lj4, lj5, lj6;
 
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
 
     nx = part2grid_6[i][0];
     ny = part2grid_6[i][1];
     nz = part2grid_6[i][2];
     dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
     dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
     dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
     compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
     ekx0 = eky0 = ekz0 = ZEROF;
     ekx1 = eky1 = ekz1 = ZEROF;
     ekx2 = eky2 = ekz2 = ZEROF;
     ekx3 = eky3 = ekz3 = ZEROF;
     ekx4 = eky4 = ekz4 = ZEROF;
     ekx5 = eky5 = ekz5 = ZEROF;
     ekx6 = eky6 = ekz6 = ZEROF;
     for (n = nlower_6; n <= nupper_6; n++) {
       mz = n+nz;
       z0 = rho1d_6[2][n];
       for (m = nlower_6; m <= nupper_6; m++) {
 	my = m+ny;
 	y0 = z0*rho1d_6[1][m];
 	for (l = nlower_6; l <= nupper_6; l++) {
 	  mx = l+nx;
 	  x0 = y0*rho1d_6[0][l];
 	  ekx0 -= x0*vdx_brick_a0[mz][my][mx];
 	  eky0 -= x0*vdy_brick_a0[mz][my][mx];
 	  ekz0 -= x0*vdz_brick_a0[mz][my][mx];
 	  ekx1 -= x0*vdx_brick_a1[mz][my][mx];
 	  eky1 -= x0*vdy_brick_a1[mz][my][mx];
 	  ekz1 -= x0*vdz_brick_a1[mz][my][mx];
           ekx2 -= x0*vdx_brick_a2[mz][my][mx];
 	  eky2 -= x0*vdy_brick_a2[mz][my][mx];
 	  ekz2 -= x0*vdz_brick_a2[mz][my][mx];
 	  ekx3 -= x0*vdx_brick_a3[mz][my][mx];
 	  eky3 -= x0*vdy_brick_a3[mz][my][mx];
 	  ekz3 -= x0*vdz_brick_a3[mz][my][mx];
 	  ekx4 -= x0*vdx_brick_a4[mz][my][mx];
 	  eky4 -= x0*vdy_brick_a4[mz][my][mx];
 	  ekz4 -= x0*vdz_brick_a4[mz][my][mx];
           ekx5 -= x0*vdx_brick_a5[mz][my][mx];
 	  eky5 -= x0*vdy_brick_a5[mz][my][mx];
 	  ekz5 -= x0*vdz_brick_a5[mz][my][mx];
           ekx6 -= x0*vdx_brick_a6[mz][my][mx];
 	  eky6 -= x0*vdy_brick_a6[mz][my][mx];
 	  ekz6 -= x0*vdz_brick_a6[mz][my][mx];
 	}
       }
     }
     // convert D-field to force
     type = atom->type[i];
     lj0 = B[7*type+6];
     lj1 = B[7*type+5];
     lj2 = B[7*type+4];
     lj3 = B[7*type+3];
     lj4 = B[7*type+2];
     lj5 = B[7*type+1];
     lj6 = B[7*type];
     f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6;
     f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6;
     if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6;
   }
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get dispersion field & force on my particles
    for arithmetic mixing rule for the ad scheme
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::fieldforce_a_ad()
 {
   int i,l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
   FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2;
   FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5;
   FFT_SCALAR ekx6, eky6, ekz6;
 
   double s1,s2,s3;
   double sf = 0.0;
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
 
   double hx_inv = nx_pppm_6/xprd;
   double hy_inv = ny_pppm_6/yprd;
   double hz_inv = nz_pppm_6/zprd_slab;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of dispersion field on particle
 
   double **x = atom->x;
   double **f = atom->f;
   int type;
   double lj0, lj1, lj2, lj3, lj4, lj5, lj6;
 
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
 
     nx = part2grid_6[i][0];
     ny = part2grid_6[i][1];
     nz = part2grid_6[i][2];
     dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
     dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
     dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
 
     compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
     compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6);
 
     ekx0 = eky0 = ekz0 = ZEROF;
     ekx1 = eky1 = ekz1 = ZEROF;
     ekx2 = eky2 = ekz2 = ZEROF;
     ekx3 = eky3 = ekz3 = ZEROF;
     ekx4 = eky4 = ekz4 = ZEROF;
     ekx5 = eky5 = ekz5 = ZEROF;
     ekx6 = eky6 = ekz6 = ZEROF;
     for (n = nlower_6; n <= nupper_6; n++) {
       mz = n+nz;
       for (m = nlower_6; m <= nupper_6; m++) {
 	my = m+ny;
 	for (l = nlower_6; l <= nupper_6; l++) {
 	  mx = l+nx;
           x0 = drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n];
           y0 = rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n];
           z0 = rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n];
 
           ekx0 += x0*u_brick_a0[mz][my][mx];
           eky0 += y0*u_brick_a0[mz][my][mx];
           ekz0 += z0*u_brick_a0[mz][my][mx];
 
           ekx1 += x0*u_brick_a1[mz][my][mx];
           eky1 += y0*u_brick_a1[mz][my][mx];
           ekz1 += z0*u_brick_a1[mz][my][mx];
 
           ekx2 += x0*u_brick_a2[mz][my][mx];
           eky2 += y0*u_brick_a2[mz][my][mx];
           ekz2 += z0*u_brick_a2[mz][my][mx];
 
           ekx3 += x0*u_brick_a3[mz][my][mx];
           eky3 += y0*u_brick_a3[mz][my][mx];
           ekz3 += z0*u_brick_a3[mz][my][mx];
 
           ekx4 += x0*u_brick_a4[mz][my][mx];
           eky4 += y0*u_brick_a4[mz][my][mx];
           ekz4 += z0*u_brick_a4[mz][my][mx];
 
           ekx5 += x0*u_brick_a5[mz][my][mx];
           eky5 += y0*u_brick_a5[mz][my][mx];
           ekz5 += z0*u_brick_a5[mz][my][mx];
 
           ekx6 += x0*u_brick_a6[mz][my][mx];
           eky6 += y0*u_brick_a6[mz][my][mx];
           ekz6 += z0*u_brick_a6[mz][my][mx];
 	}
       }
     }
 
     ekx0 *= hx_inv;
     eky0 *= hy_inv;
     ekz0 *= hz_inv;
 
     ekx1 *= hx_inv;
     eky1 *= hy_inv;
     ekz1 *= hz_inv;
 
     ekx2 *= hx_inv;
     eky2 *= hy_inv;
     ekz2 *= hz_inv;
 
     ekx3 *= hx_inv;
     eky3 *= hy_inv;
     ekz3 *= hz_inv;
 
     ekx4 *= hx_inv;
     eky4 *= hy_inv;
     ekz4 *= hz_inv;
 
     ekx5 *= hx_inv;
     eky5 *= hy_inv;
     ekz5 *= hz_inv;
 
     ekx6 *= hx_inv;
     eky6 *= hy_inv;
     ekz6 *= hz_inv;
 
     // convert D-field to force
     type = atom->type[i];
     lj0 = B[7*type+6];
     lj1 = B[7*type+5];
     lj2 = B[7*type+4];
     lj3 = B[7*type+3];
     lj4 = B[7*type+2];
     lj5 = B[7*type+1];
     lj6 = B[7*type];
 
     s1 = x[i][0]*hx_inv;
     s2 = x[i][1]*hy_inv;
     s3 = x[i][2]*hz_inv;
 
     sf = sf_coeff_6[0]*sin(2*MY_PI*s1);
     sf += sf_coeff_6[1]*sin(4*MY_PI*s1);
     sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
     f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6 - sf;
 
     sf = sf_coeff_6[2]*sin(2*MY_PI*s2);
     sf += sf_coeff_6[3]*sin(4*MY_PI*s2);
     sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
     f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6 - sf;
 
     sf = sf_coeff_6[4]*sin(2*MY_PI*s3);
     sf += sf_coeff_6[5]*sin(4*MY_PI*s3);
     sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
     if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6 - sf;
   }
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get dispersion field & force on my particles
    for arithmetic mixing rule for per atom quantities
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::fieldforce_a_peratom()
 {
   int i,l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
   FFT_SCALAR u_pa0,v00,v10,v20,v30,v40,v50;
   FFT_SCALAR u_pa1,v01,v11,v21,v31,v41,v51;
   FFT_SCALAR u_pa2,v02,v12,v22,v32,v42,v52;
   FFT_SCALAR u_pa3,v03,v13,v23,v33,v43,v53;
   FFT_SCALAR u_pa4,v04,v14,v24,v34,v44,v54;
   FFT_SCALAR u_pa5,v05,v15,v25,v35,v45,v55;
   FFT_SCALAR u_pa6,v06,v16,v26,v36,v46,v56;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of dispersion field on particle
 
   double **x = atom->x;
   int type;
   double lj0, lj1, lj2, lj3, lj4, lj5, lj6;
 
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
 
     nx = part2grid_6[i][0];
     ny = part2grid_6[i][1];
     nz = part2grid_6[i][2];
     dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
     dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
     dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
     compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
 
     u_pa0 = v00 = v10 = v20 = v30 = v40 = v50 = ZEROF;
     u_pa1 = v01 = v11 = v21 = v31 = v41 = v51 = ZEROF;
     u_pa2 = v02 = v12 = v22 = v32 = v42 = v52 = ZEROF;
     u_pa3 = v03 = v13 = v23 = v33 = v43 = v53 = ZEROF;
     u_pa4 = v04 = v14 = v24 = v34 = v44 = v54 = ZEROF;
     u_pa5 = v05 = v15 = v25 = v35 = v45 = v55 = ZEROF;
     u_pa6 = v06 = v16 = v26 = v36 = v46 = v56 = ZEROF;
     for (n = nlower_6; n <= nupper_6; n++) {
       mz = n+nz;
       z0 = rho1d_6[2][n];
       for (m = nlower_6; m <= nupper_6; m++) {
 	my = m+ny;
 	y0 = z0*rho1d_6[1][m];
 	for (l = nlower_6; l <= nupper_6; l++) {
 	  mx = l+nx;
 	  x0 = y0*rho1d_6[0][l];
           if (eflag_atom) {
             u_pa0 += x0*u_brick_a0[mz][my][mx];
             u_pa1 += x0*u_brick_a1[mz][my][mx];
             u_pa2 += x0*u_brick_a2[mz][my][mx];
             u_pa3 += x0*u_brick_a3[mz][my][mx];
             u_pa4 += x0*u_brick_a4[mz][my][mx];
             u_pa5 += x0*u_brick_a5[mz][my][mx];
             u_pa6 += x0*u_brick_a6[mz][my][mx];
 	  }
           if (vflag_atom) {
             v00 += x0*v0_brick_a0[mz][my][mx];
             v10 += x0*v1_brick_a0[mz][my][mx];
             v20 += x0*v2_brick_a0[mz][my][mx];
             v30 += x0*v3_brick_a0[mz][my][mx];
             v40 += x0*v4_brick_a0[mz][my][mx];
             v50 += x0*v5_brick_a0[mz][my][mx];
             v01 += x0*v0_brick_a1[mz][my][mx];
             v11 += x0*v1_brick_a1[mz][my][mx];
             v21 += x0*v2_brick_a1[mz][my][mx];
             v31 += x0*v3_brick_a1[mz][my][mx];
             v41 += x0*v4_brick_a1[mz][my][mx];
             v51 += x0*v5_brick_a1[mz][my][mx];
             v02 += x0*v0_brick_a2[mz][my][mx];
             v12 += x0*v1_brick_a2[mz][my][mx];
             v22 += x0*v2_brick_a2[mz][my][mx];
             v32 += x0*v3_brick_a2[mz][my][mx];
             v42 += x0*v4_brick_a2[mz][my][mx];
             v52 += x0*v5_brick_a2[mz][my][mx];
             v03 += x0*v0_brick_a3[mz][my][mx];
             v13 += x0*v1_brick_a3[mz][my][mx];
             v23 += x0*v2_brick_a3[mz][my][mx];
             v33 += x0*v3_brick_a3[mz][my][mx];
             v43 += x0*v4_brick_a3[mz][my][mx];
             v53 += x0*v5_brick_a3[mz][my][mx];
             v04 += x0*v0_brick_a4[mz][my][mx];
             v14 += x0*v1_brick_a4[mz][my][mx];
             v24 += x0*v2_brick_a4[mz][my][mx];
             v34 += x0*v3_brick_a4[mz][my][mx];
             v44 += x0*v4_brick_a4[mz][my][mx];
             v54 += x0*v5_brick_a4[mz][my][mx];
             v05 += x0*v0_brick_a5[mz][my][mx];
             v15 += x0*v1_brick_a5[mz][my][mx];
             v25 += x0*v2_brick_a5[mz][my][mx];
             v35 += x0*v3_brick_a5[mz][my][mx];
             v45 += x0*v4_brick_a5[mz][my][mx];
             v55 += x0*v5_brick_a5[mz][my][mx];
             v06 += x0*v0_brick_a6[mz][my][mx];
             v16 += x0*v1_brick_a6[mz][my][mx];
             v26 += x0*v2_brick_a6[mz][my][mx];
             v36 += x0*v3_brick_a6[mz][my][mx];
             v46 += x0*v4_brick_a6[mz][my][mx];
             v56 += x0*v5_brick_a6[mz][my][mx];
           }
 	}
       }
     }
     // convert D-field to force
     type = atom->type[i];
     lj0 = B[7*type+6]*0.5;
     lj1 = B[7*type+5]*0.5;
     lj2 = B[7*type+4]*0.5;
     lj3 = B[7*type+3]*0.5;
     lj4 = B[7*type+2]*0.5;
     lj5 = B[7*type+1]*0.5;
     lj6 = B[7*type]*0.5;
 
  
     if (eflag_atom) 
       eatom[i] += u_pa0*lj0 + u_pa1*lj1 + u_pa2*lj2 + 
         u_pa3*lj3 + u_pa4*lj4 + u_pa5*lj5 + u_pa6*lj6;
     if (vflag_atom) {
       vatom[i][0] += v00*lj0 + v01*lj1 + v02*lj2 + v03*lj3 + 
         v04*lj4 + v05*lj5 + v06*lj6;
       vatom[i][1] += v10*lj0 + v11*lj1 + v12*lj2 + v13*lj3 + 
         v14*lj4 + v15*lj5 + v16*lj6;
       vatom[i][2] += v20*lj0 + v21*lj1 + v22*lj2 + v23*lj3 + 
         v24*lj4 + v25*lj5 + v26*lj6;
       vatom[i][3] += v30*lj0 + v31*lj1 + v32*lj2 + v33*lj3 + 
         v34*lj4 + v35*lj5 + v36*lj6;
       vatom[i][4] += v40*lj0 + v41*lj1 + v42*lj2 + v43*lj3 + 
         v44*lj4 + v45*lj5 + v46*lj6;
       vatom[i][5] += v50*lj0 + v51*lj1 + v52*lj2 + v53*lj3 + 
         v54*lj4 + v55*lj5 + v56*lj6;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get dispersion field & force on my particles
    for arithmetic mixing rule and ik scheme
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::fieldforce_none_ik()
 {
   int i,k,l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
   FFT_SCALAR *ekx, *eky, *ekz;
 
   ekx = new FFT_SCALAR[nsplit];
   eky = new FFT_SCALAR[nsplit];
   ekz = new FFT_SCALAR[nsplit];
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of dispersion field on particle
 
   double **x = atom->x;
   double **f = atom->f;
   int type;
   double lj;
 
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
 
     nx = part2grid_6[i][0];
     ny = part2grid_6[i][1];
     nz = part2grid_6[i][2];
     dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
     dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
     dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
     compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
     for (k = 0; k < nsplit; k++)
       ekx[k] = eky[k] = ekz[k] = ZEROF;
     for (n = nlower_6; n <= nupper_6; n++) {
       mz = n+nz;
       z0 = rho1d_6[2][n];
       for (m = nlower_6; m <= nupper_6; m++) {
 	my = m+ny;
 	y0 = z0*rho1d_6[1][m];
 	for (l = nlower_6; l <= nupper_6; l++) {
 	  mx = l+nx;
 	  x0 = y0*rho1d_6[0][l];
           for (k = 0; k < nsplit; k++) {
 	    ekx[k] -= x0*vdx_brick_none[k][mz][my][mx];
 	    eky[k] -= x0*vdy_brick_none[k][mz][my][mx];
 	    ekz[k] -= x0*vdz_brick_none[k][mz][my][mx];
           }
 	}
       }
     }
     // convert D-field to force
     type = atom->type[i];
     for (k = 0; k < nsplit; k++) {
       lj = B[nsplit*type + k];
       f[i][0] += lj*ekx[k];
       f[i][1] +=lj*eky[k];
       if (slabflag != 2) f[i][2] +=lj*ekz[k];
     }
   }
 
   delete [] ekx;
   delete [] eky;
   delete [] ekz;
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get dispersion field & force on my particles
    for arithmetic mixing rule for the ad scheme
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::fieldforce_none_ad()
 {
   int i,k,l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
   FFT_SCALAR *ekx, *eky, *ekz;
 
   ekx = new FFT_SCALAR[nsplit];
   eky = new FFT_SCALAR[nsplit];
   ekz = new FFT_SCALAR[nsplit];
 
 
   double s1,s2,s3;
   double sf1,sf2,sf3;
   double sf = 0.0;
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
 
   double hx_inv = nx_pppm_6/xprd;
   double hy_inv = ny_pppm_6/yprd;
   double hz_inv = nz_pppm_6/zprd_slab;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of dispersion field on particle
 
   double **x = atom->x;
   double **f = atom->f;
   int type;
   double lj;
 
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
 
     nx = part2grid_6[i][0];
     ny = part2grid_6[i][1];
     nz = part2grid_6[i][2];
     dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
     dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
     dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
 
     compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
     compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6);
 
     for (k = 0; k < nsplit; k++)
       ekx[k] = eky[k] = ekz[k] = ZEROF;
 
     for (n = nlower_6; n <= nupper_6; n++) {
       mz = n+nz;
       for (m = nlower_6; m <= nupper_6; m++) {
 	my = m+ny;
 	for (l = nlower_6; l <= nupper_6; l++) {
 	  mx = l+nx;
           x0 = drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n];
           y0 = rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n];
           z0 = rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n];
           
           for (k = 0; k < nsplit; k++) {
             ekx[k] += x0*u_brick_none[k][mz][my][mx];
             eky[k] += y0*u_brick_none[k][mz][my][mx];
             ekz[k] += z0*u_brick_none[k][mz][my][mx];
           }
 	}
       }
     }
 
     for (k = 0; k < nsplit; k++) {
       ekx[k] *= hx_inv;
       eky[k] *= hy_inv;
       ekz[k] *= hz_inv;
     }
 
     // convert D-field to force
     type = atom->type[i];
 
     s1 = x[i][0]*hx_inv;
     s2 = x[i][1]*hy_inv;
     s3 = x[i][2]*hz_inv;
 
     sf1 = sf_coeff_6[0]*sin(2*MY_PI*s1);
     sf1 += sf_coeff_6[1]*sin(4*MY_PI*s1);
 
     sf2 = sf_coeff_6[2]*sin(2*MY_PI*s2);
     sf2 += sf_coeff_6[3]*sin(4*MY_PI*s2);
 
     sf3 = sf_coeff_6[4]*sin(2*MY_PI*s3);
     sf3 += sf_coeff_6[5]*sin(4*MY_PI*s3);
 
     for (k = 0; k < nsplit; k++) {
       lj = B[nsplit*type + k];
 
       sf = sf1*B[k]*2*lj*lj;
       f[i][0] += lj*ekx[k] - sf;
 
 
       sf = sf2*B[k]*2*lj*lj;
       f[i][1] += lj*eky[k] - sf;
 
       sf = sf3*B[k]*2*lj*lj;
       if (slabflag != 2) f[i][2] += lj*ekz[k] - sf;
     }
   }
 
   delete [] ekx;
   delete [] eky;
   delete [] ekz;
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get dispersion field & force on my particles
    for arithmetic mixing rule for per atom quantities
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::fieldforce_none_peratom()
 {
   int i,k,l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
   FFT_SCALAR *u_pa,*v0,*v1,*v2,*v3,*v4,*v5;
   
   u_pa = new FFT_SCALAR[nsplit];
   v0 = new FFT_SCALAR[nsplit];
   v1 = new FFT_SCALAR[nsplit];
   v2 = new FFT_SCALAR[nsplit];
   v3 = new FFT_SCALAR[nsplit];
   v4 = new FFT_SCALAR[nsplit];
   v5 = new FFT_SCALAR[nsplit];
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of dispersion field on particle
 
   double **x = atom->x;
   int type;
   double lj;
 
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
 
     nx = part2grid_6[i][0];
     ny = part2grid_6[i][1];
     nz = part2grid_6[i][2];
     dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
     dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
     dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
     compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
 
     for (k = 0; k < nsplit; k++) 
       u_pa[k] = v0[k] = v1[k] = v2[k] = v3[k] = v4[k] = v5[k] = ZEROF;
  
     for (n = nlower_6; n <= nupper_6; n++) {
       mz = n+nz;
       z0 = rho1d_6[2][n];
       for (m = nlower_6; m <= nupper_6; m++) {
 	my = m+ny;
 	y0 = z0*rho1d_6[1][m];
 	for (l = nlower_6; l <= nupper_6; l++) {
 	  mx = l+nx;
 	  x0 = y0*rho1d_6[0][l];
           if (eflag_atom) {
             for (k = 0; k < nsplit; k++)
               u_pa[k] += x0*u_brick_none[k][mz][my][mx];
 	  }
           if (vflag_atom) {
             for (k = 0; k < nsplit; k++) {
               v0[k] += x0*v0_brick_none[k][mz][my][mx];
               v1[k] += x0*v1_brick_none[k][mz][my][mx];
               v2[k] += x0*v2_brick_none[k][mz][my][mx];
               v3[k] += x0*v3_brick_none[k][mz][my][mx];
               v4[k] += x0*v4_brick_none[k][mz][my][mx];
               v5[k] += x0*v5_brick_none[k][mz][my][mx];
             }
           }
 	}
       }
     }
     // convert D-field to force
     type = atom->type[i];
     for (k = 0; k < nsplit; k++) {
       lj = B[nsplit*type + k]*0.5;
  
       if (eflag_atom) {
         eatom[i] += u_pa[k]*lj;
       }
       if (vflag_atom) {
         vatom[i][0] += v0[k]*lj;
         vatom[i][1] += v1[k]*lj;
         vatom[i][2] += v2[k]*lj;
         vatom[i][3] += v3[k]*lj;
         vatom[i][4] += v4[k]*lj;
         vatom[i][5] += v5[k]*lj;
       }
     }
   }
 
   delete [] u_pa;
   delete [] v0;
   delete [] v1;
   delete [] v2;
   delete [] v3;
   delete [] v4;
   delete [] v5;
 }
 
 /* ----------------------------------------------------------------------
    pack values to buf to send to another proc
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
 {
   int n = 0;
 
   switch (flag) {
 
   // Coulomb interactions
 
   case FORWARD_IK: {
     FFT_SCALAR *xsrc = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *ysrc = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *zsrc = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       buf[n++] = xsrc[list[i]];
       buf[n++] = ysrc[list[i]];
       buf[n++] = zsrc[list[i]];
     }
     break;
   }
 
   case FORWARD_AD: {
     FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++)
       buf[i] = src[list[i]];
     break;
   }
 
   case FORWARD_IK_PERATOM: {
     FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       if (eflag_atom) buf[n++] = esrc[list[i]];
       if (vflag_atom) {
         buf[n++] = v0src[list[i]];
         buf[n++] = v1src[list[i]];
         buf[n++] = v2src[list[i]];
         buf[n++] = v3src[list[i]];
         buf[n++] = v4src[list[i]];
         buf[n++] = v5src[list[i]];
       }
     }
     break;
   }
 
   case FORWARD_AD_PERATOM: {
     FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       buf[n++] = v0src[list[i]];
       buf[n++] = v1src[list[i]];
       buf[n++] = v2src[list[i]];
       buf[n++] = v3src[list[i]];
       buf[n++] = v4src[list[i]];
       buf[n++] = v5src[list[i]];
     }
     break;
   }
 
   // Dispersion interactions, geometric mixing
 
   case FORWARD_IK_G: {
     FFT_SCALAR *xsrc = &vdx_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ysrc = &vdy_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zsrc = &vdz_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     for (int i = 0; i < nlist; i++) {
       buf[n++] = xsrc[list[i]];
       buf[n++] = ysrc[list[i]];
       buf[n++] = zsrc[list[i]];
     }
     break;
   }
 
   case FORWARD_AD_G: {
     FFT_SCALAR *src = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     for (int i = 0; i < nlist; i++)
       buf[i] = src[list[i]];
     break;
   }
 
   case FORWARD_IK_PERATOM_G: {
     FFT_SCALAR *esrc = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     for (int i = 0; i < nlist; i++) {
       if (eflag_atom) buf[n++] = esrc[list[i]];
       if (vflag_atom) {
         buf[n++] = v0src[list[i]];
         buf[n++] = v1src[list[i]];
         buf[n++] = v2src[list[i]];
         buf[n++] = v3src[list[i]];
         buf[n++] = v4src[list[i]];
         buf[n++] = v5src[list[i]];
       }
     }
     break;
   }
 
   case FORWARD_AD_PERATOM_G: {
     FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     for (int i = 0; i < nlist; i++) {
       buf[n++] = v0src[list[i]];
       buf[n++] = v1src[list[i]];
       buf[n++] = v2src[list[i]];
       buf[n++] = v3src[list[i]];
       buf[n++] = v4src[list[i]];
       buf[n++] = v5src[list[i]];
     }
     break;
   }
 
   // Dispersion interactions, arithmetic mixing
 
   case FORWARD_IK_A: {
     FFT_SCALAR *xsrc0 = &vdx_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ysrc0 = &vdy_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zsrc0 = &vdz_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *xsrc1 = &vdx_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ysrc1 = &vdy_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zsrc1 = &vdz_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *xsrc2 = &vdx_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ysrc2 = &vdy_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zsrc2 = &vdz_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *xsrc3 = &vdx_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ysrc3 = &vdy_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zsrc3 = &vdz_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *xsrc4 = &vdx_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ysrc4 = &vdy_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zsrc4 = &vdz_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *xsrc5 = &vdx_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ysrc5 = &vdy_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zsrc5 = &vdz_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *xsrc6 = &vdx_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ysrc6 = &vdy_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zsrc6 = &vdz_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     for (int i = 0; i < nlist; i++) {
       buf[n++] = xsrc0[list[i]];
       buf[n++] = ysrc0[list[i]];
       buf[n++] = zsrc0[list[i]];
 
       buf[n++] = xsrc1[list[i]];
       buf[n++] = ysrc1[list[i]];
       buf[n++] = zsrc1[list[i]];
 
       buf[n++] = xsrc2[list[i]];
       buf[n++] = ysrc2[list[i]];
       buf[n++] = zsrc2[list[i]];
 
       buf[n++] = xsrc3[list[i]];
       buf[n++] = ysrc3[list[i]];
       buf[n++] = zsrc3[list[i]];
 
       buf[n++] = xsrc4[list[i]];
       buf[n++] = ysrc4[list[i]];
       buf[n++] = zsrc4[list[i]];
 
       buf[n++] = xsrc5[list[i]];
       buf[n++] = ysrc5[list[i]];
       buf[n++] = zsrc5[list[i]];
 
       buf[n++] = xsrc6[list[i]];
       buf[n++] = ysrc6[list[i]];
       buf[n++] = zsrc6[list[i]];
     }
     break;
   }
 
   case FORWARD_AD_A: {
     FFT_SCALAR *src0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *src1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *src2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *src3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *src4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *src5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *src6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     for (int i = 0; i < nlist; i++) {
       buf[n++] = src0[list[i]];
       buf[n++] = src1[list[i]];
       buf[n++] = src2[list[i]];
       buf[n++] = src3[list[i]];
       buf[n++] = src4[list[i]];
       buf[n++] = src5[list[i]];
       buf[n++] = src6[list[i]];
     }
     break;
   }
 
   case FORWARD_IK_PERATOM_A: {
     FFT_SCALAR *esrc0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *esrc1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *esrc2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *esrc3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *esrc4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *esrc5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *esrc6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     for (int i = 0; i < nlist; i++) {
       if (eflag_atom) {
         buf[n++] = esrc0[list[i]];
         buf[n++] = esrc1[list[i]];
         buf[n++] = esrc2[list[i]];
         buf[n++] = esrc3[list[i]];
         buf[n++] = esrc4[list[i]];
         buf[n++] = esrc5[list[i]];
         buf[n++] = esrc6[list[i]];
       }
       if (vflag_atom) {
         buf[n++] = v0src0[list[i]];
         buf[n++] = v1src0[list[i]];
         buf[n++] = v2src0[list[i]];
         buf[n++] = v3src0[list[i]];
         buf[n++] = v4src0[list[i]];
         buf[n++] = v5src0[list[i]];
 
         buf[n++] = v0src1[list[i]];
         buf[n++] = v1src1[list[i]];
         buf[n++] = v2src1[list[i]];
         buf[n++] = v3src1[list[i]];
         buf[n++] = v4src1[list[i]];
         buf[n++] = v5src1[list[i]];
 
         buf[n++] = v0src2[list[i]];
         buf[n++] = v1src2[list[i]];
         buf[n++] = v2src2[list[i]];
         buf[n++] = v3src2[list[i]];
         buf[n++] = v4src2[list[i]];
         buf[n++] = v5src2[list[i]];
 
         buf[n++] = v0src3[list[i]];
         buf[n++] = v1src3[list[i]];
         buf[n++] = v2src3[list[i]];
         buf[n++] = v3src3[list[i]];
         buf[n++] = v4src3[list[i]];
         buf[n++] = v5src3[list[i]];
 
         buf[n++] = v0src4[list[i]];
         buf[n++] = v1src4[list[i]];
         buf[n++] = v2src4[list[i]];
         buf[n++] = v3src4[list[i]];
         buf[n++] = v4src4[list[i]];
         buf[n++] = v5src4[list[i]];
 
         buf[n++] = v0src5[list[i]];
         buf[n++] = v1src5[list[i]];
         buf[n++] = v2src5[list[i]];
         buf[n++] = v3src5[list[i]];
         buf[n++] = v4src5[list[i]];
         buf[n++] = v5src5[list[i]];
 
         buf[n++] = v0src6[list[i]];
         buf[n++] = v1src6[list[i]];
         buf[n++] = v2src6[list[i]];
         buf[n++] = v3src6[list[i]];
         buf[n++] = v4src6[list[i]];
         buf[n++] = v5src6[list[i]];
       }
     }
     break;
   }
 
   case FORWARD_AD_PERATOM_A: {
     FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     for (int i = 0; i < nlist; i++) {
       buf[n++] = v0src0[list[i]];
       buf[n++] = v1src0[list[i]];
       buf[n++] = v2src0[list[i]];
       buf[n++] = v3src0[list[i]];
       buf[n++] = v4src0[list[i]];
       buf[n++] = v5src0[list[i]];
 
       buf[n++] = v0src1[list[i]];
       buf[n++] = v1src1[list[i]];
       buf[n++] = v2src1[list[i]];
       buf[n++] = v3src1[list[i]];
       buf[n++] = v4src1[list[i]];
       buf[n++] = v5src1[list[i]];
 
       buf[n++] = v0src2[list[i]];
       buf[n++] = v1src2[list[i]];
       buf[n++] = v2src2[list[i]];
       buf[n++] = v3src2[list[i]];
       buf[n++] = v4src2[list[i]];
       buf[n++] = v5src2[list[i]];
 
       buf[n++] = v0src3[list[i]];
       buf[n++] = v1src3[list[i]];
       buf[n++] = v2src3[list[i]];
       buf[n++] = v3src3[list[i]];
       buf[n++] = v4src3[list[i]];
       buf[n++] = v5src3[list[i]];
 
       buf[n++] = v0src4[list[i]];
       buf[n++] = v1src4[list[i]];
       buf[n++] = v2src4[list[i]];
       buf[n++] = v3src4[list[i]];
       buf[n++] = v4src4[list[i]];
       buf[n++] = v5src4[list[i]];
 
       buf[n++] = v0src5[list[i]];
       buf[n++] = v1src5[list[i]];
       buf[n++] = v2src5[list[i]];
       buf[n++] = v3src5[list[i]];
       buf[n++] = v4src5[list[i]];
       buf[n++] = v5src5[list[i]];
 
       buf[n++] = v0src6[list[i]];
       buf[n++] = v1src6[list[i]];
       buf[n++] = v2src6[list[i]];
       buf[n++] = v3src6[list[i]];
       buf[n++] = v4src6[list[i]];
       buf[n++] = v5src6[list[i]];
     }
     break;
   }
 
   // Dispersion interactions, no mixing
 
   case FORWARD_IK_NONE: {
     for (int k = 0; k < nsplit_alloc; k++) {
       FFT_SCALAR *xsrc = &vdx_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       FFT_SCALAR *ysrc = &vdy_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       FFT_SCALAR *zsrc = &vdz_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       for (int i = 0; i < nlist; i++) {
         buf[n++] = xsrc[list[i]];
         buf[n++] = ysrc[list[i]];
         buf[n++] = zsrc[list[i]];
       }
     }
     break;
   }
 
   case FORWARD_AD_NONE: {
     for (int k = 0; k < nsplit_alloc; k++) {
       FFT_SCALAR *src = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       for (int i = 0; i < nlist; i++)
         buf[n++] = src[list[i]];
     }
     break;
   }
 
   case FORWARD_IK_PERATOM_NONE: {
     for (int k = 0; k < nsplit_alloc; k++) {
       FFT_SCALAR *esrc = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       for (int i = 0; i < nlist; i++) {
         if (eflag_atom) buf[n++] = esrc[list[i]];
         if (vflag_atom) {
           buf[n++] = v0src[list[i]];
           buf[n++] = v1src[list[i]];
           buf[n++] = v2src[list[i]];
           buf[n++] = v3src[list[i]];
           buf[n++] = v4src[list[i]];
           buf[n++] = v5src[list[i]];
         }
       }
     }
     break;
   }
 
   case FORWARD_AD_PERATOM_NONE: {
     for (int k = 0; k < nsplit_alloc; k++) {
       FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       for (int i = 0; i < nlist; i++) {
         buf[n++] = v0src[list[i]];
         buf[n++] = v1src[list[i]];
         buf[n++] = v2src[list[i]];
         buf[n++] = v3src[list[i]];
         buf[n++] = v4src[list[i]];
         buf[n++] = v5src[list[i]];
       }
     }
     break;
   }
 
   }
 }
 
 /* ----------------------------------------------------------------------
    unpack another proc's own values from buf and set own ghost values
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
 {
   int n = 0;
 
   switch (flag) {
 
   // Coulomb interactions
 
   case FORWARD_IK: {
     FFT_SCALAR *xdest = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *ydest = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *zdest = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       xdest[list[i]] = buf[n++];
       ydest[list[i]] = buf[n++];
       zdest[list[i]] = buf[n++];
     }
     break;
   }
 
   case FORWARD_AD: {
     FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++)
       dest[list[i]] = buf[n++];
     break;
   }
 
   case FORWARD_IK_PERATOM: {
     FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       if (eflag_atom) esrc[list[i]] = buf[n++];
       if (vflag_atom) {
         v0src[list[i]] = buf[n++];
         v1src[list[i]] = buf[n++];
         v2src[list[i]] = buf[n++];
         v3src[list[i]] = buf[n++];
         v4src[list[i]] = buf[n++];
         v5src[list[i]] = buf[n++];
       }
     }
     break;
   }
 
   case FORWARD_AD_PERATOM: {
     FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       v0src[list[i]] = buf[n++];
       v1src[list[i]] = buf[n++];
       v2src[list[i]] = buf[n++];
       v3src[list[i]] = buf[n++];
       v4src[list[i]] = buf[n++];
       v5src[list[i]] = buf[n++];
     }
     break;
   }
 
   // Disperion interactions, geometric mixing
 
   case FORWARD_IK_G: {
     FFT_SCALAR *xdest = &vdx_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ydest = &vdy_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zdest = &vdz_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     for (int i = 0; i < nlist; i++) {
       xdest[list[i]] = buf[n++];
       ydest[list[i]] = buf[n++];
       zdest[list[i]] = buf[n++];
     }
     break;
   }
 
   case FORWARD_AD_G: {
     FFT_SCALAR *dest = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     for (int i = 0; i < nlist; i++)
       dest[list[i]] = buf[n++];
     break;
   }
 
   case FORWARD_IK_PERATOM_G: {
     FFT_SCALAR *esrc = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     for (int i = 0; i < nlist; i++) {
       if (eflag_atom) esrc[list[i]] = buf[n++];
       if (vflag_atom) {
         v0src[list[i]] = buf[n++];
         v1src[list[i]] = buf[n++];
         v2src[list[i]] = buf[n++];
         v3src[list[i]] = buf[n++];
         v4src[list[i]] = buf[n++];
         v5src[list[i]] = buf[n++];
       }
     }
     break;
   }
 
   case FORWARD_AD_PERATOM_G: {
     FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     for (int i = 0; i < nlist; i++) {
       v0src[list[i]] = buf[n++];
       v1src[list[i]] = buf[n++];
       v2src[list[i]] = buf[n++];
       v3src[list[i]] = buf[n++];
       v4src[list[i]] = buf[n++];
       v5src[list[i]] = buf[n++];
     }
     break;
   }
 
   // Disperion interactions, arithmetic mixing
 
   case FORWARD_IK_A: {
     FFT_SCALAR *xdest0 = &vdx_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ydest0 = &vdy_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zdest0 = &vdz_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *xdest1 = &vdx_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ydest1 = &vdy_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zdest1 = &vdz_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *xdest2 = &vdx_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ydest2 = &vdy_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zdest2 = &vdz_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *xdest3 = &vdx_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ydest3 = &vdy_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zdest3 = &vdz_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *xdest4 = &vdx_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ydest4 = &vdy_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zdest4 = &vdz_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *xdest5 = &vdx_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ydest5 = &vdy_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zdest5 = &vdz_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *xdest6 = &vdx_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ydest6 = &vdy_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zdest6 = &vdz_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     for (int i = 0; i < nlist; i++) {
       xdest0[list[i]] = buf[n++];
       ydest0[list[i]] = buf[n++];
       zdest0[list[i]] = buf[n++];
 
       xdest1[list[i]] = buf[n++];
       ydest1[list[i]] = buf[n++];
       zdest1[list[i]] = buf[n++];
 
       xdest2[list[i]] = buf[n++];
       ydest2[list[i]] = buf[n++];
       zdest2[list[i]] = buf[n++];
 
       xdest3[list[i]] = buf[n++];
       ydest3[list[i]] = buf[n++];
       zdest3[list[i]] = buf[n++];
 
       xdest4[list[i]] = buf[n++];
       ydest4[list[i]] = buf[n++];
       zdest4[list[i]] = buf[n++];
 
       xdest5[list[i]] = buf[n++];
       ydest5[list[i]] = buf[n++];
       zdest5[list[i]] = buf[n++];
 
       xdest6[list[i]] = buf[n++];
       ydest6[list[i]] = buf[n++];
       zdest6[list[i]] = buf[n++];
     }
     break;
   }
 
   case FORWARD_AD_A: {
     FFT_SCALAR *dest0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *dest1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *dest2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *dest3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *dest4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *dest5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *dest6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     for (int i = 0; i < nlist; i++) {
       dest0[list[i]] = buf[n++];
       dest1[list[i]] = buf[n++];
       dest2[list[i]] = buf[n++];
       dest3[list[i]] = buf[n++];
       dest4[list[i]] = buf[n++];
       dest5[list[i]] = buf[n++];
       dest6[list[i]] = buf[n++];
     }
     break;
   }
 
   case FORWARD_IK_PERATOM_A: {
     FFT_SCALAR *esrc0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *esrc1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *esrc2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *esrc3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *esrc4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *esrc5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *esrc6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     for (int i = 0; i < nlist; i++) {
       if (eflag_atom) {
         esrc0[list[i]] = buf[n++];
         esrc1[list[i]] = buf[n++];
         esrc2[list[i]] = buf[n++];
         esrc3[list[i]] = buf[n++];
         esrc4[list[i]] = buf[n++];
         esrc5[list[i]] = buf[n++];
         esrc6[list[i]] = buf[n++];
       }
       if (vflag_atom) {
         v0src0[list[i]] = buf[n++];
         v1src0[list[i]] = buf[n++];
         v2src0[list[i]] = buf[n++];
         v3src0[list[i]] = buf[n++];
         v4src0[list[i]] = buf[n++];
         v5src0[list[i]] = buf[n++];
 
         v0src1[list[i]] = buf[n++];
         v1src1[list[i]] = buf[n++];
         v2src1[list[i]] = buf[n++];
         v3src1[list[i]] = buf[n++];
         v4src1[list[i]] = buf[n++];
         v5src1[list[i]] = buf[n++];
 
         v0src2[list[i]] = buf[n++];
         v1src2[list[i]] = buf[n++];
         v2src2[list[i]] = buf[n++];
         v3src2[list[i]] = buf[n++];
         v4src2[list[i]] = buf[n++];
         v5src2[list[i]] = buf[n++];
 
         v0src3[list[i]] = buf[n++];
         v1src3[list[i]] = buf[n++];
         v2src3[list[i]] = buf[n++];
         v3src3[list[i]] = buf[n++];
         v4src3[list[i]] = buf[n++];
         v5src3[list[i]] = buf[n++];
 
         v0src4[list[i]] = buf[n++];
         v1src4[list[i]] = buf[n++];
         v2src4[list[i]] = buf[n++];
         v3src4[list[i]] = buf[n++];
         v4src4[list[i]] = buf[n++];
         v5src4[list[i]] = buf[n++];
 
         v0src5[list[i]] = buf[n++];
         v1src5[list[i]] = buf[n++];
         v2src5[list[i]] = buf[n++];
         v3src5[list[i]] = buf[n++];
         v4src5[list[i]] = buf[n++];
         v5src5[list[i]] = buf[n++];
 
         v0src6[list[i]] = buf[n++];
         v1src6[list[i]] = buf[n++];
         v2src6[list[i]] = buf[n++];
         v3src6[list[i]] = buf[n++];
         v4src6[list[i]] = buf[n++];
         v5src6[list[i]] = buf[n++];
       }
     }
     break;
   }
 
   case FORWARD_AD_PERATOM_A: {
     FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     for (int i = 0; i < nlist; i++) {
       v0src0[list[i]] = buf[n++];
       v1src0[list[i]] = buf[n++];
       v2src0[list[i]] = buf[n++];
       v3src0[list[i]] = buf[n++];
       v4src0[list[i]] = buf[n++];
       v5src0[list[i]] = buf[n++];
 
       v0src1[list[i]] = buf[n++];
       v1src1[list[i]] = buf[n++];
       v2src1[list[i]] = buf[n++];
       v3src1[list[i]] = buf[n++];
       v4src1[list[i]] = buf[n++];
       v5src1[list[i]] = buf[n++];
 
       v0src2[list[i]] = buf[n++];
       v1src2[list[i]] = buf[n++];
       v2src2[list[i]] = buf[n++];
       v3src2[list[i]] = buf[n++];
       v4src2[list[i]] = buf[n++];
       v5src2[list[i]] = buf[n++];
 
       v0src3[list[i]] = buf[n++];
       v1src3[list[i]] = buf[n++];
       v2src3[list[i]] = buf[n++];
       v3src3[list[i]] = buf[n++];
       v4src3[list[i]] = buf[n++];
       v5src3[list[i]] = buf[n++];
 
       v0src4[list[i]] = buf[n++];
       v1src4[list[i]] = buf[n++];
       v2src4[list[i]] = buf[n++];
       v3src4[list[i]] = buf[n++];
       v4src4[list[i]] = buf[n++];
       v5src4[list[i]] = buf[n++];
 
       v0src5[list[i]] = buf[n++];
       v1src5[list[i]] = buf[n++];
       v2src5[list[i]] = buf[n++];
       v3src5[list[i]] = buf[n++];
       v4src5[list[i]] = buf[n++];
       v5src5[list[i]] = buf[n++];
 
       v0src6[list[i]] = buf[n++];
       v1src6[list[i]] = buf[n++];
       v2src6[list[i]] = buf[n++];
       v3src6[list[i]] = buf[n++];
       v4src6[list[i]] = buf[n++];
       v5src6[list[i]] = buf[n++];
     }
     break;
   }
 
   // Disperion interactions, geometric mixing
 
   case FORWARD_IK_NONE: {
     for (int k = 0; k < nsplit_alloc; k++) {
       FFT_SCALAR *xdest = &vdx_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       FFT_SCALAR *ydest = &vdy_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       FFT_SCALAR *zdest = &vdz_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       for (int i = 0; i < nlist; i++) {
         xdest[list[i]] = buf[n++];
         ydest[list[i]] = buf[n++];
         zdest[list[i]] = buf[n++];
       }
     }
     break;
   }
 
   case FORWARD_AD_NONE: {
     for (int k = 0; k < nsplit_alloc; k++) {
       FFT_SCALAR *dest = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       for (int i = 0; i < nlist; i++)
         dest[list[i]] = buf[n++];
     }
     break;
   }
 
   case FORWARD_IK_PERATOM_NONE: {
     for (int k = 0; k < nsplit_alloc; k++) {
       FFT_SCALAR *esrc = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       for (int i = 0; i < nlist; i++) {
         if (eflag_atom) esrc[list[i]] = buf[n++];
         if (vflag_atom) {
           v0src[list[i]] = buf[n++];
           v1src[list[i]] = buf[n++];
           v2src[list[i]] = buf[n++];
           v3src[list[i]] = buf[n++];
           v4src[list[i]] = buf[n++];
           v5src[list[i]] = buf[n++];
         }
       }
     }
     break;
   }
 
   case FORWARD_AD_PERATOM_NONE: {
     for (int k = 0; k < nsplit_alloc; k++) {
       FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       for (int i = 0; i < nlist; i++) {
         v0src[list[i]] = buf[n++];
         v1src[list[i]] = buf[n++];
         v2src[list[i]] = buf[n++];
         v3src[list[i]] = buf[n++];
         v4src[list[i]] = buf[n++];
         v5src[list[i]] = buf[n++];
       }
     }
     break;
   }
 
   }
 }
 
 /* ----------------------------------------------------------------------
    pack ghost values into buf to send to another proc
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
 {
   int n = 0;
 
   //Coulomb interactions
 
   if (flag == REVERSE_RHO) {
     FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++)
       buf[i] = src[list[i]];
 
   //Dispersion interactions, geometric mixing
 
   } else if (flag == REVERSE_RHO_G) {
     FFT_SCALAR *src = &density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     for (int i = 0; i < nlist; i++)
       buf[i] = src[list[i]];
 
   //Dispersion interactions, arithmetic mixing
 
   } else if (flag == REVERSE_RHO_A) {
     FFT_SCALAR *src0 = &density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *src1 = &density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *src2 = &density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *src3 = &density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *src4 = &density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *src5 = &density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *src6 = &density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     for (int i = 0; i < nlist; i++) {
       buf[n++] = src0[list[i]];
       buf[n++] = src1[list[i]];
       buf[n++] = src2[list[i]];
       buf[n++] = src3[list[i]];
       buf[n++] = src4[list[i]];
       buf[n++] = src5[list[i]];
       buf[n++] = src6[list[i]];
     }
 
   //Dispersion interactions, no mixing
 
   } else if (flag == REVERSE_RHO_NONE) {
     for (int k = 0; k < nsplit_alloc; k++) {
       FFT_SCALAR *src = &density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       for (int i = 0; i < nlist; i++) {
         buf[n++] = src[list[i]];
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    unpack another proc's ghost values from buf and add to own values
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
 {
   int n = 0;
 
   //Coulomb interactions
 
   if (flag == REVERSE_RHO) {
     FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++)
       dest[list[i]] += buf[i];
 
   //Dispersion interactions, geometric mixing
 
   } else if (flag == REVERSE_RHO_G) {
     FFT_SCALAR *dest = &density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     for (int i = 0; i < nlist; i++)
       dest[list[i]] += buf[i];
 
   //Dispersion interactions, arithmetic mixing
 
   } else if (flag == REVERSE_RHO_A) {
     FFT_SCALAR *dest0 = &density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *dest1 = &density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *dest2 = &density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *dest3 = &density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *dest4 = &density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *dest5 = &density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *dest6 = &density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     for (int i = 0; i < nlist; i++) {
       dest0[list[i]] += buf[n++];
       dest1[list[i]] += buf[n++];
       dest2[list[i]] += buf[n++];
       dest3[list[i]] += buf[n++];
       dest4[list[i]] += buf[n++];
       dest5[list[i]] += buf[n++];
       dest6[list[i]] += buf[n++];
     }
 
   //Dispersion interactions, no mixing
 
   } else if (flag == REVERSE_RHO_NONE) {
     for (int k = 0; k < nsplit_alloc; k++) {
       FFT_SCALAR *dest = &density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
       for (int i = 0; i < nlist; i++)
         dest[list[i]] += buf[n++];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    map nprocs to NX by NY grid as PX by PY procs - return optimal px,py 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py)
 {
   // loop thru all possible factorizations of nprocs
   // surf = surface area of largest proc sub-domain
   // innermost if test minimizes surface area and surface/volume ratio
 
   int bestsurf = 2 * (nx + ny);
   int bestboxx = 0;
   int bestboxy = 0;
 
   int boxx,boxy,surf,ipx,ipy;
 
   ipx = 1;
   while (ipx <= nprocs) {
     if (nprocs % ipx == 0) {
       ipy = nprocs/ipx;
       boxx = nx/ipx;
       if (nx % ipx) boxx++;
       boxy = ny/ipy;
       if (ny % ipy) boxy++;
       surf = boxx + boxy;
       if (surf < bestsurf || 
 	  (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) {
 	bestsurf = surf;
 	bestboxx = boxx;
 	bestboxy = boxy;
 	*px = ipx;
 	*py = ipy;
       }
     }
     ipx++;
   }
 }
 
 /* ----------------------------------------------------------------------
    charge assignment into rho1d
    dx,dy,dz = distance of particle from "lower left" grid point 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
 			      const FFT_SCALAR &dz, int ord, 
                              FFT_SCALAR **rho_c, FFT_SCALAR **r1d)
 {
   int k,l;
   FFT_SCALAR r1,r2,r3;
 
   for (k = (1-ord)/2; k <= ord/2; k++) {
     r1 = r2 = r3 = ZEROF;
 
     for (l = ord-1; l >= 0; l--) {
       r1 = rho_c[l][k] + r1*dx;
       r2 = rho_c[l][k] + r2*dy;
       r3 = rho_c[l][k] + r3*dz;
     }
     r1d[0][k] = r1;
     r1d[1][k] = r2;
     r1d[2][k] = r3;
   }
 }
 
 /* ----------------------------------------------------------------------
    charge assignment into drho1d
    dx,dy,dz = distance of particle from "lower left" grid point
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::compute_drho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
                           const FFT_SCALAR &dz, int ord, 
                               FFT_SCALAR **drho_c, FFT_SCALAR **dr1d)
 {
   int k,l;
   FFT_SCALAR r1,r2,r3;
 
   for (k = (1-ord)/2; k <= ord/2; k++) {
     r1 = r2 = r3 = ZEROF;
 
     for (l = ord-2; l >= 0; l--) {
       r1 = drho_c[l][k] + r1*dx;
       r2 = drho_c[l][k] + r2*dy;
       r3 = drho_c[l][k] + r3*dz;
     }
     dr1d[0][k] = r1;
     dr1d[1][k] = r2;
     dr1d[2][k] = r3;
   }
 }
 
 /* ----------------------------------------------------------------------
    generate coeffients for the weight function of order n
 
               (n-1)
   Wn(x) =     Sum    wn(k,x) , Sum is over every other integer
            k=-(n-1)
   For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1
       k is odd integers if n is even and even integers if n is odd
               ---
              | n-1
              | Sum a(l,j)*(x-k/2)**l   if abs(x-k/2) < 1/2
   wn(k,x) = <  l=0
              |
              |  0                       otherwise
               ---
   a coeffients are packed into the array rho_coeff to eliminate zeros
   rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k) 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::compute_rho_coeff(FFT_SCALAR **coeff , FFT_SCALAR **dcoeff, 
                                  int ord)
 {
   int j,k,l,m;
   FFT_SCALAR s;
 
   FFT_SCALAR **a;
   memory->create2d_offset(a,ord,-ord,ord,"pppm/disp:a");
 
   for (k = -ord; k <= ord; k++) 
     for (l = 0; l < ord; l++)
       a[l][k] = 0.0;
         
   a[0][0] = 1.0;
   for (j = 1; j < ord; j++) {
     for (k = -j; k <= j; k += 2) {
       s = 0.0;
       for (l = 0; l < j; l++) {
 	a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1);
 #ifdef FFT_SINGLE
 	s += powf(0.5,(float) l+1) *
 	  (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1);
 #else
 	s += pow(0.5,(double) l+1) * 
 	  (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1);
 #endif
       }
       a[0][k] = s;
     }
   }
 
   m = (1-ord)/2;
   for (k = -(ord-1); k < ord; k += 2) {
     for (l = 0; l < ord; l++)
       coeff[l][m] = a[l][k];
     for (l = 1; l < ord; l++)
       dcoeff[l-1][m] = l*a[l][k];
     m++;
   }
 
   memory->destroy2d_offset(a,-ord);
 }
 
 /* ----------------------------------------------------------------------
    Slab-geometry correction term to dampen inter-slab interactions between
    periodically repeating slabs.  Yields good approximation to 2D Ewald if
    adequate empty space is left between repeating slabs (J. Chem. Phys.
    111, 3155).  Slabs defined here to be parallel to the xy plane. Also
    extended to non-neutral systems (J. Chem. Phys. 131, 094107).
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::slabcorr(int eflag)
 {
   // compute local contribution to global dipole moment
 
   double *q = atom->q;
   double **x = atom->x;
   double zprd = domain->zprd;
   int nlocal = atom->nlocal;
 
   double dipole = 0.0;
   for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];
 
   // sum local contributions to get global dipole moment
 
   double dipole_all;
   MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
 
   // need to make non-neutral systems and/or
   //  per-atom energy translationally invariant
 
   double dipole_r2 = 0.0;
   if (eflag_atom || fabs(qsum) > SMALL) {
     for (int i = 0; i < nlocal; i++)
       dipole_r2 += q[i]*x[i][2]*x[i][2];
 
     // sum local contributions
 
     double tmp;
     MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
     dipole_r2 = tmp;
   }
 
   // compute corrections
 
   const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
     qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
   const double qscale = force->qqrd2e * scale;
 
   if (eflag_global) energy_1 += qscale * e_slabcorr;
 
   // per-atom energy
 
   if (eflag_atom) {
     double efact = qscale * MY_2PI/volume;
     for (int i = 0; i < nlocal; i++)
       eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
         qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
   }
 
   // add on force corrections
 
   double ffact = qscale * (-4.0*MY_PI/volume);
   double **f = atom->f;
 
   for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]);
 }
 
 /* ----------------------------------------------------------------------
    perform and time the 1d FFTs required for N timesteps
 ------------------------------------------------------------------------- */
 
 int PPPMDisp::timing_1d(int n, double &time1d)
 {
   double time1,time2;
   int mixing = 1;
   if (function[2]) mixing = 4;
   if (function[3]) mixing = nsplit_alloc/2;
 
   if (function[0]) for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
   if (function[1] + function[2] + function[3])
     for (int i = 0; i < 2*nfft_both_6; i++) work1_6[i] = ZEROF;
 
   MPI_Barrier(world);
   time1 = MPI_Wtime();
 
   if (function[0]) {
     for (int i = 0; i < n; i++) {
       fft1->timing1d(work1,nfft_both,1);
       fft2->timing1d(work1,nfft_both,-1);
       if (differentiation_flag != 1){
         fft2->timing1d(work1,nfft_both,-1);
         fft2->timing1d(work1,nfft_both,-1);
       }
     }
   }
 
   MPI_Barrier(world);
   time2 = MPI_Wtime();
   time1d = time2 - time1;
 
   MPI_Barrier(world);
   time1 = MPI_Wtime();
 
   if (function[1] + function[2] + function[3]) {
     for (int i = 0; i < n; i++) {
       fft1_6->timing1d(work1_6,nfft_both_6,1);
       fft2_6->timing1d(work1_6,nfft_both_6,-1);
       if (differentiation_flag != 1){
         fft2_6->timing1d(work1_6,nfft_both_6,-1);
         fft2_6->timing1d(work1_6,nfft_both_6,-1);
       }
     }
   }
 
   MPI_Barrier(world);
   time2 = MPI_Wtime();
   time1d += (time2 - time1)*mixing;
 
   if (differentiation_flag) return 2;
   return 4;
 }
 
 /* ----------------------------------------------------------------------
    perform and time the 3d FFTs required for N timesteps
 ------------------------------------------------------------------------- */
 
 int PPPMDisp::timing_3d(int n, double &time3d)
 {
   double time1,time2;
   int mixing = 1;
   if (function[2]) mixing = 4;
   if (function[3]) mixing = nsplit_alloc/2;
 
   if (function[0]) for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
   if (function[1] + function[2] + function[3]) 
     for (int i = 0; i < 2*nfft_both_6; i++) work1_6[i] = ZEROF;
 
 
 
   MPI_Barrier(world);
   time1 = MPI_Wtime();
 
   if (function[0]) {
     for (int i = 0; i < n; i++) {
       fft1->compute(work1,work1,1);
       fft2->compute(work1,work1,-1);
       if (differentiation_flag != 1) {
         fft2->compute(work1,work1,-1);
         fft2->compute(work1,work1,-1);
       }
     }
   }
 
   MPI_Barrier(world);
   time2 = MPI_Wtime();
   time3d = time2 - time1;
 
   MPI_Barrier(world);
   time1 = MPI_Wtime();
   
   if (function[1] + function[2] + function[3]) {
     for (int i = 0; i < n; i++) {
       fft1_6->compute(work1_6,work1_6,1);
       fft2_6->compute(work1_6,work1_6,-1);
       if (differentiation_flag != 1) {
         fft2_6->compute(work1_6,work1_6,-1);
         fft2_6->compute(work1_6,work1_6,-1);
       }
     }
   }
 
   MPI_Barrier(world);
   time2 = MPI_Wtime();
   time3d += (time2 - time1) * mixing;
 
   if (differentiation_flag) return 2;
   return 4;
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local arrays 
 ------------------------------------------------------------------------- */
 
 double PPPMDisp::memory_usage()
 {
   double bytes = nmax*3 * sizeof(double);
   int mixing = 1;
   int diff = 3;     //depends on differentiation
   int per = 7;      //depends on per atom calculations
   if (differentiation_flag) {
     diff = 1;
     per = 6;
   }
   if (!evflag_atom) per = 0;
   if (function[2]) mixing = 7;
   if (function[3]) mixing = nsplit_alloc;
 
   if (function[0]) {
     int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * 
       (nzhi_out-nzlo_out+1);
     bytes += (1 + diff +  per) * nbrick * sizeof(FFT_SCALAR);     //brick memory
     bytes += 6 * nfft_both * sizeof(double);      // vg
     bytes += nfft_both * sizeof(double);          // greensfn
     bytes += nfft_both * 3 * sizeof(FFT_SCALAR);    // density_FFT, work1, work2 
     bytes += cg->memory_usage();
   }
 
   if (function[1] + function[2] + function[3]) {
     int nbrick = (nxhi_out_6-nxlo_out_6+1) * (nyhi_out_6-nylo_out_6+1) * 
       (nzhi_out_6-nzlo_out_6+1);
     bytes += (1 + diff + per ) * nbrick * sizeof(FFT_SCALAR) * mixing;     // density_brick + vd_brick + per atom bricks
     bytes += 6 * nfft_both_6 * sizeof(double);      // vg
     bytes += nfft_both_6 * sizeof(double);          // greensfn
     bytes += nfft_both_6 * (mixing + 2) * sizeof(FFT_SCALAR);    // density_FFT, work1, work2 
     bytes += cg_6->memory_usage();
   }
   return bytes;
 }
diff --git a/src/USER-MISC/pair_meam_spline.h b/src/USER-MISC/pair_meam_spline.h
index d13968067..d16a321cb 100644
--- a/src/USER-MISC/pair_meam_spline.h
+++ b/src/USER-MISC/pair_meam_spline.h
@@ -1,281 +1,281 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    see LLNL copyright notice at bottom of file
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(meam/spline,PairMEAMSpline)
 
 #else
 
 #ifndef LMP_PAIR_MEAM_SPLINE_H
 #define LMP_PAIR_MEAM_SPLINE_H
 
 #include "pair.h"
 
 namespace LAMMPS_NS {
 
 /// Set this to 1 if you intend to use MEAM potentials with non-uniform spline knots.
 /// Set this to 0 if you intend to use only MEAM potentials with spline knots on a uniform grid.
 ///
 /// With SUPPORT_NON_GRID_SPLINES == 0, the code runs about 50% faster.
 
 #define SPLINE_MEAM_SUPPORT_NON_GRID_SPLINES 0
 
 class PairMEAMSpline : public Pair
 {
 public:
         PairMEAMSpline(class LAMMPS *);
         virtual ~PairMEAMSpline();
         virtual void compute(int, int);
         void settings(int, char **);
         void coeff(int, char **);
         void init_style();
         void init_list(int, class NeighList *);
         double init_one(int, int);
 
         int pack_forward_comm(int, int *, double *, int, int *);
         void unpack_forward_comm(int, int, double *);
         int pack_reverse_comm(int, int, double *);
         void unpack_reverse_comm(int, int *, double *);
         double memory_usage();
 
 protected:
   char **elements;              // names of unique elements
   int *map;                     // mapping from atom types to elements
   int nelements;                // # of unique elements
 
         class SplineFunction {
         public:
 
                 /// Default constructor.
                 SplineFunction() : X(NULL), Xs(NULL), Y(NULL), Y2(NULL), Ydelta(NULL), N(0) {}
 
                 /// Destructor.
                 ~SplineFunction() {
                         delete[] X;
                         delete[] Xs;
                         delete[] Y;
                         delete[] Y2;
                         delete[] Ydelta;
                 }
 
                 /// Initialization of spline function.
-                void init(int _N, double _deriv0, double _derivN) {
-                        N = _N;
+                void init(int _n, double _deriv0, double _derivN) {
+                        N = _n;
                         deriv0 = _deriv0;
                         derivN = _derivN;
                         delete[] X;
                         delete[] Xs;
                         delete[] Y;
                         delete[] Y2;
                         delete[] Ydelta;
                         X = new double[N];
                         Xs = new double[N];
                         Y = new double[N];
                         Y2 = new double[N];
                         Ydelta = new double[N];
                 }
 
                 /// Adds a knot to the spline.
                 void setKnot(int n, double x, double y) { X[n] = x; Y[n] = y; }
 
                 /// Returns the number of knots.
                 int numKnots() const { return N; }
 
                 /// Parses the spline knots from a text file.
                 void parse(FILE* fp, Error* error);
 
                 /// Calculates the second derivatives of the cubic spline.
                 void prepareSpline(Error* error);
 
                 /// Evaluates the spline function at position x.
                 inline double eval(double x) const
                 {
                         x -= xmin;
                         if(x <= 0.0) {  // Left extrapolation.
                                 return Y[0] + deriv0 * x;
                         }
                         else if(x >= xmax_shifted) {  // Right extrapolation.
                                 return Y[N-1] + derivN * (x - xmax_shifted);
                         }
                         else {
 #if SPLINE_MEAM_SUPPORT_NON_GRID_SPLINES
                                 // Do interval search.
                                 int klo = 0;
                                 int khi = N-1;
                                 while(khi - klo > 1) {
                                         int k = (khi + klo) / 2;
                                         if(Xs[k] > x) khi = k;
                                         else klo = k;
                                 }
                                 double h = Xs[khi] - Xs[klo];
                                 // Do spline interpolation.
                                 double a = (Xs[khi] - x)/h;
                                 double b = 1.0 - a; // = (x - X[klo])/h
                                 return a * Y[klo] + b * Y[khi] + ((a*a*a - a) * Y2[klo] + (b*b*b - b) * Y2[khi])*(h*h)/6.0;
 #else
                                 // For a spline with grid points, we can directly calculate the interval X is in.
                                 int klo = (int)(x / h);
                                 int khi = klo + 1;
                                 double a = Xs[khi] - x;
                                 double b = h - a;
                                 return Y[khi] - a * Ydelta[klo] + ((a*a - hsq) * a * Y2[klo] + (b*b - hsq) * b * Y2[khi]);
 #endif
                         }
                 }
 
                 /// Evaluates the spline function and its first derivative at position x.
                 inline double eval(double x, double& deriv) const
                 {
                         x -= xmin;
                         if(x <= 0.0) {  // Left extrapolation.
                                 deriv = deriv0;
                                 return Y[0] + deriv0 * x;
                         }
                         else if(x >= xmax_shifted) {  // Right extrapolation.
                                 deriv = derivN;
                                 return Y[N-1] + derivN * (x - xmax_shifted);
                         }
                         else {
 #if SPLINE_MEAM_SUPPORT_NON_GRID_SPLINES
                                 // Do interval search.
                                 int klo = 0;
                                 int khi = N-1;
                                 while(khi - klo > 1) {
                                         int k = (khi + klo) / 2;
                                         if(Xs[k] > x) khi = k;
                                         else klo = k;
                                 }
                                 double h = Xs[khi] - Xs[klo];
                                 // Do spline interpolation.
                                 double a = (Xs[khi] - x)/h;
                                 double b = 1.0 - a; // = (x - X[klo])/h
                                 deriv = (Y[khi] - Y[klo]) / h + ((3.0*b*b - 1.0) * Y2[khi] - (3.0*a*a - 1.0) * Y2[klo]) * h / 6.0;
                                 return a * Y[klo] + b * Y[khi] + ((a*a*a - a) * Y2[klo] + (b*b*b - b) * Y2[khi]) * (h*h) / 6.0;
 #else
                                 // For a spline with grid points, we can directly calculate the interval X is in.
                                 int klo = (int)(x / h);
                                 int khi = klo + 1;
                                 double a = Xs[khi] - x;
                                 double b = h - a;
                                 deriv = Ydelta[klo] + ((3.0*b*b - hsq) * Y2[khi] - (3.0*a*a - hsq) * Y2[klo]);
                                 return Y[khi] - a * Ydelta[klo] + ((a*a - hsq) * a * Y2[klo] + (b*b - hsq) * b * Y2[khi]);
 #endif
                         }
                 }
 
                 /// Returns the number of bytes used by this function object.
                 double memory_usage() const { return sizeof(*this) + sizeof(X[0]) * N * 3; }
 
                 /// Returns the cutoff radius of this function.
                 double cutoff() const { return X[N-1]; }
 
                 /// Writes a Gnuplot script that plots the spline function.
                 void writeGnuplot(const char* filename, const char* title = NULL) const;
 
                 /// Broadcasts the spline function parameters to all processors.
                 void communicate(MPI_Comm& world, int me);
 
         private:
                 double* X;                                // Positions of spline knots
                 double* Xs;                                // Shifted positions of spline knots
                 double* Y;                                // Function values at spline knots
                 double* Y2;                                // Second derivatives at spline knots
                 double* Ydelta;                        // If this is a grid spline, Ydelta[i] = (Y[i+1]-Y[i])/h
                 int N;                                        // Number of spline knots
                 double deriv0;                        // First derivative at knot 0
                 double derivN;                        // First derivative at knot (N-1)
                 double xmin;                        // The beginning of the interval on which the spline function is defined.
                 double xmax;                        // The end of the interval on which the spline function is defined.
                 int isGridSpline;                // Indicates that all spline knots are on a regular grid.
                 double h;                                // The distance between knots if this is a grid spline with equidistant knots.
                 double hsq;                                // The squared distance between knots if this is a grid spline with equidistant knots.
                 double xmax_shifted;        // The end of the spline interval after it has been shifted to begin at X=0.
         };
 
         /// Helper data structure for potential routine.
         struct MEAM2Body {
                 int tag;
                 double r;
                 double f, fprime;
                 double del[3];
         };
 
         SplineFunction phi;                        // Phi(r_ij)
         SplineFunction rho;                        // Rho(r_ij)
         SplineFunction f;                        // f(r_ij)
         SplineFunction U;                        // U(rho)
         SplineFunction g;                        // g(cos_theta)
         double zero_atom_energy;        // Shift embedding energy by this value to make it zero for a single atom in vacuum.
 
         double cutoff;              // The cutoff radius
 
         double* Uprime_values;                // Used for temporary storage of U'(rho) values
         int nmax;                                        // Size of temporary array.
         int maxNeighbors;                        // The last maximum number of neighbors a single atoms has.
         MEAM2Body* twoBodyInfo;                // Temporary array.
 
         void read_file(const char* filename);
         void allocate();
 };
 
 }
 
 #endif
 #endif
 
 /* ----------------------------------------------------------------------
  * Spline-based Modified Embedded Atom method (MEAM) potential routine.
  *
  * Copyright (2011) Lawrence Livermore National Security, LLC.
  * Produced at the Lawrence Livermore National Laboratory.
  * Written by Alexander Stukowski (<alex@stukowski.com>).
  * LLNL-CODE-525797 All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it under
  * the terms of the GNU General Public License (as published by the Free
  * Software Foundation) version 2, dated June 1991.
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the IMPLIED WARRANTY OF MERCHANTABILITY
  * or FITNESS FOR A PARTICULAR PURPOSE. See the terms and conditions of the
  * GNU General Public License for more details.
  *
  * Our Preamble Notice
  * A. This notice is required to be provided under our contract with the
  * U.S. Department of Energy (DOE). This work was produced at the
  * Lawrence Livermore National Laboratory under Contract No.
  * DE-AC52-07NA27344 with the DOE.
  *
  * B. Neither the United States Government nor Lawrence Livermore National
  * Security, LLC nor any of their employees, makes any warranty, express or
  * implied, or assumes any liability or responsibility for the accuracy,
  * completeness, or usefulness of any information, apparatus, product, or
  * process disclosed, or represents that its use would not infringe
  * privately-owned rights.
  *
  * C. Also, reference herein to any specific commercial products, process,
  * or services by trade name, trademark, manufacturer or otherwise does not
  * necessarily constitute or imply its endorsement, recommendation, or
  * favoring by the United States Government or Lawrence Livermore National
  * Security, LLC. The views and opinions of authors expressed herein do not
  * necessarily state or reflect those of the United States Government or
  * Lawrence Livermore National Security, LLC, and shall not be used for
  * advertising or product endorsement purposes.
  *
  * See file 'pair_spline_meam.cpp' for history of changes.
 ------------------------------------------------------------------------- */
diff --git a/src/USER-MISC/pair_meam_sw_spline.h b/src/USER-MISC/pair_meam_sw_spline.h
index 23dd1724f..383104df4 100644
--- a/src/USER-MISC/pair_meam_sw_spline.h
+++ b/src/USER-MISC/pair_meam_sw_spline.h
@@ -1,510 +1,510 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    see LLNL copyright notice at bottom of file
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(meam/sw/spline,PairMEAMSWSpline)
 
 #else
 
 #ifndef LMP_PAIR_MEAM_SW_SPLINE_H
 #define LMP_PAIR_MEAM_SW_SPLINE_H
 
 #include "pair.h"
 
 namespace LAMMPS_NS {
 
 /// Set this to 1 if you intend to use MEAM potentials with non-uniform spline knots.
 /// Set this to 0 if you intend to use only MEAM potentials with spline knots on a uniform grid.
 ///
 /// With SUPPORT_NON_GRID_SPLINES == 0, the code runs about 50% faster.
 
 #define SPLINE_MEAMSW_SUPPORT_NON_GRID_SPLINES 0
 
 class PairMEAMSWSpline : public Pair
 {
 public:
         PairMEAMSWSpline(class LAMMPS *);
         virtual ~PairMEAMSWSpline();
         virtual void compute(int, int);
         void settings(int, char **);
         void coeff(int, char **);
         void init_style();
         void init_list(int, class NeighList *);
         double init_one(int, int);
 
         int pack_forward_comm(int, int *, double *, int, int *);
         void unpack_forward_comm(int, int, double *);
         int pack_reverse_comm(int, int, double *);
         void unpack_reverse_comm(int, int *, double *);
         double memory_usage();
 
 protected:
   char **elements;              // names of unique elements
   int *map;                     // mapping from atom types to elements
   int nelements;                // # of unique elements
 
         class SplineFunction {
         public:
 
                 /// Default constructor.
                 SplineFunction() : X(NULL), Xs(NULL), Y(NULL), Y2(NULL), Ydelta(NULL), N(0) {}
 
                 /// Destructor.
                 ~SplineFunction() {
                         delete[] X;
                         delete[] Xs;
                         delete[] Y;
                         delete[] Y2;
                         delete[] Ydelta;
                 }
 
                 /// Initialization of spline function.
-                void init(int _N, double _deriv0, double _derivN) {
-                        N = _N;
+                void init(int _n, double _deriv0, double _derivN) {
+                        N = _n;
                         deriv0 = _deriv0;
                         derivN = _derivN;
                         delete[] X;
                         delete[] Xs;
                         delete[] Y;
                         delete[] Y2;
                         delete[] Ydelta;
                         X = new double[N];
                         Xs = new double[N];
                         Y = new double[N];
                         Y2 = new double[N];
                         Ydelta = new double[N];
                 }
 
                 /// Adds a knot to the spline.
                 void setKnot(int n, double x, double y) { X[n] = x; Y[n] = y; }
 
                 /// Returns the number of knots.
                 int numKnots() const { return N; }
 
                 /// Parses the spline knots from a text file.
                 void parse(FILE* fp, Error* error);
 
                 /// Calculates the second derivatives of the cubic spline.
                 void prepareSpline(Error* error);
 
                 /// Evaluates the spline function at position x.
                 inline double eval(double x) const
                 {
                         x -= xmin;
                         if(x <= 0.0) {  // Left extrapolation.
                                 return Y[0] + deriv0 * x;
                         }
                         else if(x >= xmax_shifted) {  // Right extrapolation.
                                 return Y[N-1] + derivN * (x - xmax_shifted);
                         }
                         else {
 #if SPLINE_MEAMSW_SUPPORT_NON_GRID_SPLINES
                                 // Do interval search.
                                 int klo = 0;
                                 int khi = N-1;
                                 while(khi - klo > 1) {
                                         int k = (khi + klo) / 2;
                                         if(Xs[k] > x) khi = k;
                                         else klo = k;
                                 }
                                 double h = Xs[khi] - Xs[klo];
                                 // Do spline interpolation.
                                 double a = (Xs[khi] - x)/h;
                                 double b = 1.0 - a; // = (x - X[klo])/h
                                 return a * Y[klo] + b * Y[khi] + ((a*a*a - a) * Y2[klo] + (b*b*b - b) * Y2[khi])*(h*h)/6.0;
 #else
                                 // For a spline with grid points, we can directly calculate the interval X is in.
                                 //
                                 int klo = (int)(x / h);
                                 if ( klo > N - 2 ) klo = N - 2;
                                 int khi = klo + 1;
                                 double a = Xs[khi] - x;
                                 double b = h - a;
                                 return Y[khi] - a * Ydelta[klo] + ((a*a - hsq) * a * Y2[klo] + (b*b - hsq) * b * Y2[khi]);
 #endif
                         }
                 }
 
                 /// Evaluates the spline function and its first derivative at position x.
                 inline double eval(double x, double& deriv) const
                 {
                         x -= xmin;
                         if(x <= 0.0) {  // Left extrapolation.
                                 deriv = deriv0;
                                 return Y[0] + deriv0 * x;
                         }
                         else if(x >= xmax_shifted) {  // Right extrapolation.
                                 deriv = derivN;
                                 return Y[N-1] + derivN * (x - xmax_shifted);
                         }
                         else {
 #if SPLINE_MEAMSW_SUPPORT_NON_GRID_SPLINES
                                 // Do interval search.
                                 int klo = 0;
                                 int khi = N-1;
                                 while(khi - klo > 1) {
                                         int k = (khi + klo) / 2;
                                         if(Xs[k] > x) khi = k;
                                         else klo = k;
                                 }
                                 double h = Xs[khi] - Xs[klo];
                                 // Do spline interpolation.
                                 double a = (Xs[khi] - x)/h;
                                 double b = 1.0 - a; // = (x - X[klo])/h
                                 deriv = (Y[khi] - Y[klo]) / h + ((3.0*b*b - 1.0) * Y2[khi] - (3.0*a*a - 1.0) * Y2[klo]) * h / 6.0;
                                 return a * Y[klo] + b * Y[khi] + ((a*a*a - a) * Y2[klo] + (b*b*b - b) * Y2[khi]) * (h*h) / 6.0;
 #else
                                 // For a spline with grid points, we can directly calculate the interval X is in.
                                 int klo = (int)(x / h);
                                 if ( klo > N - 2 ) klo = N - 2;
                                 int khi = klo + 1;
                                 double a = Xs[khi] - x;
                                 double b = h - a;
                                 deriv = Ydelta[klo] + ((3.0*b*b - hsq) * Y2[khi] - (3.0*a*a - hsq) * Y2[klo]);
                                 return Y[khi] - a * Ydelta[klo] + ((a*a - hsq) * a * Y2[klo] + (b*b - hsq) * b * Y2[khi]);
 #endif
                         }
                 }
 
                 /// Returns the number of bytes used by this function object.
                 double memory_usage() const { return sizeof(*this) + sizeof(X[0]) * N * 3; }
 
                 /// Returns the cutoff radius of this function.
                 double cutoff() const { return X[N-1]; }
 
                 /// Writes a Gnuplot script that plots the spline function.
                 void writeGnuplot(const char* filename, const char* title = NULL) const;
 
                 /// Broadcasts the spline function parameters to all processors.
                 void communicate(MPI_Comm& world, int me);
 
         private:
                 double* X;                                // Positions of spline knots
                 double* Xs;                                // Shifted positions of spline knots
                 double* Y;                                // Function values at spline knots
                 double* Y2;                                // Second derivatives at spline knots
                 double* Ydelta;                        // If this is a grid spline, Ydelta[i] = (Y[i+1]-Y[i])/h
                 int N;                                        // Number of spline knots
                 double deriv0;                        // First derivative at knot 0
                 double derivN;                        // First derivative at knot (N-1)
                 double xmin;                        // The beginning of the interval on which the spline function is defined.
                 double xmax;                        // The end of the interval on which the spline function is defined.
                 int isGridSpline;                // Indicates that all spline knots are on a regular grid.
                 double h;                                // The distance between knots if this is a grid spline with equidistant knots.
                 double hsq;                                // The squared distance between knots if this is a grid spline with equidistant knots.
                 double xmax_shifted;        // The end of the spline interval after it has been shifted to begin at X=0.
         };
 
         /// Helper data structure for potential routine.
         struct MEAM2Body {
                 int tag;
                 double r;
                 double f, fprime;
                 double F, Fprime;
                 double del[3];
         };
 
         SplineFunction phi;                        // Phi(r_ij)
         SplineFunction rho;                        // Rho(r_ij)
         SplineFunction f;                        // f(r_ij)
         SplineFunction U;                        // U(rho)
         SplineFunction g;                        // g(cos_theta)
         SplineFunction F;                        // F(r_ij)
         SplineFunction G;                        // G(cos_theta)
         double zero_atom_energy;        // Shift embedding energy by this value to make it zero for a single atom in vacuum.
 
         double cutoff;              // The cutoff radius
 
         double* Uprime_values;                // Used for temporary storage of U'(rho) values
         int nmax;                                        // Size of temporary array.
         int maxNeighbors;                        // The last maximum number of neighbors a single atoms has.
         MEAM2Body* twoBodyInfo;                // Temporary array.
 
         void read_file(const char* filename);
         void allocate();
 };
 
 }
 
 #endif
 #endif
 
 /* ----------------------------------------------------------------------
  * Spline-based Modified Embedded Atom Method plus 
  * Stillinger-Weber (MEAM+SW) potential routine.
  *
  * Copyright (2012) Lawrence Livermore National Security, LLC.
  * Produced at the Lawrence Livermore National Laboratory.
  * Written by Robert E. Rudd (<robert.rudd@llnl.gov>).
  * Based on the spline MEAM routine written by Alexander Stukowski 
  * (<alex@stukowski.com>).
  * LLNL-CODE-588032 All rights reserved.
  *
  * The spline-based MEAM+SW format was first devised and used to develop
  * potentials for bcc transition metals by Jeremy Nicklas, Michael Fellinger,
  * and Hyoungki Park at The Ohio State University.
  *
  * This program is free software; you can redistribute it and/or modify it under
  * the terms of the GNU General Public License (as published by the Free
  * Software Foundation) version 2, dated June 1991.
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the IMPLIED WARRANTY OF MERCHANTABILITY
  * or FITNESS FOR A PARTICULAR PURPOSE. See the terms and conditions of the
  * GNU General Public License for more details.
  *
  * Our Preamble Notice
  * A. This notice is required to be provided under our contract with the
  * U.S. Department of Energy (DOE). This work was produced at the
  * Lawrence Livermore National Laboratory under Contract No.
  * DE-AC52-07NA27344 with the DOE.
  *
  * B. Neither the United States Government nor Lawrence Livermore National
  * Security, LLC nor any of their employees, makes any warranty, express or
  * implied, or assumes any liability or responsibility for the accuracy,
  * completeness, or usefulness of any information, apparatus, product, or
  * process disclosed, or represents that its use would not infringe
  * privately-owned rights.
  *
  * C. Also, reference herein to any specific commercial products, process,
  * or services by trade name, trademark, manufacturer or otherwise does not
  * necessarily constitute or imply its endorsement, recommendation, or
  * favoring by the United States Government or Lawrence Livermore National
  * Security, LLC. The views and opinions of authors expressed herein do not
  * necessarily state or reflect those of the United States Government or
  * Lawrence Livermore National Security, LLC, and shall not be used for
  * advertising or product endorsement purposes.
  *
  * The precise terms and conditions for copying, distribution and modification
  * follows.
  *
  * GNU Terms and Conditions for Copying, Distribution, and Modification
  *
  * 0.  This License applies to any program or other work which contains a
  * notice placed by the copyright holder saying it may be distributed under
  * the terms of this General Public License.  The "Program," below, refers to
  * any such program or work, and a "work based on the Program" means either
  * the Program or any derivative work under copyright law: that is to say, a
  * work containing the Program or a portion of it, either verbatim or with
  * modifications and/or translated into another language.  (Hereinafter,
  * translation is included without limitation in the term "modification".)
  * Each licensee is addressed as "you."
  *
  * Activities other than copying, distribution and modification are not
  * covered by this License; they are outside its scope.  The act of running
  * the Program is not restricted, and the output from the Program is covered
  * only if its contents constitute a work based on the Program (independent of
  * having been made by running the Program).  Whether that is true depends on
  * what the Program does.  
  *
  * 1.  You may copy and distribute verbatim copies of the Program's source
  * code as you receive it, in any medium, provided that you conspicuously and
  * appropriately publish on each copy an appropriate copyright notice and
  * disclaimer of warranty; keep intact all the notices that refer to this
  * License and to the absence of any warranty; and give any other recipients
  * of the Program a copy of this License along with the Program.
  *
  * You may charge a fee for the physical act of transferring a copy, and you
  * may at your option offer warranty protection in exchange for a fee.
  *
  * 2.  You may modify your copy or copies of the Program or any portion of it,
  * thus forming a work based on the Program, and copy and distribute such
  * modifications or work under the terms of Section 1 above, provided that you
  * also meet all of these conditions:
  *
  *  a)  You must cause the modified files to carry prominent notices stating
  *  that you changed the files and the date of any change.
  *
  *  b)  You must cause any work that you distribute or publish, that in whole
  *  or in part contains or is derived from the Program or any part thereof, to
  *  be licensed as a whole at no charge to all third parties under the terms
  *  of this License.
  *
  *  c)  If the modified program normally reads commands interactively when
  *  run, you must cause it, when started running for such interactive use in
  *  the most ordinary way, to print or display an announcement including an
  *  appropriate copyright notice and a notice that there is no warranty (or
  *  else, saying that you provide a warranty) and that users may redistribute
  *  the program under these conditions, and telling the user how to view a
  *  copy of this License.  (Exception: if the Program itself is interactive
  *  but does not normally print such an announcement, your work based on the
  *  Program is not required to print an announcement.)
  *
  * These requirements apply to the modified work as a whole.  If
  * identifiable sections of that work are not derived from the Program, and
  * can be reasonably considered independent and separate works in
  * themselves, then this License, and its terms, do not apply to those
  * sections when you distribute them as separate work.  But when you
  * distribute the same section as part of a whole which is a work based on
  * the Program, the distribution of the whole must be on the terms of this
  * License, whose permissions for other licensees extend to the entire
  * whole, and thus to each and every part regardless of who wrote it.
  *
  * Thus, it is not the intent of this section to claim rights or contest
  * your rights to work written entirely by you; rather, the intent is to
  * exercise the right to control the distribution of derivative or
  * collective works based on the Program.
  *
  * In addition, mere aggregation of another work not based on the Program
  * with the Program (or with a work based on the Program) on a volume of a
  * storage or distribution medium does not bring the other work under the
  * scope of this License.
  *
  * 3.  You may copy and distribute the Program (or a work based on it, under
  * Section 2) in object code or executable form under the terms of Sections
  * 1 and 2 above provided that you also do one of the following:
  *
  *  a)  Accompany it with the complete corresponding machine-readable source
  *  code, which must be distributed under the terms of Sections 1 and 2 above
  *  on a medium customarily used for software interchange; or,
  *
  *  b)  Accompany it with a written offer, valid for at least three years,
  *  to give any third party, for a charge no more than your cost of
  *  physically performing source distribution, a complete machine-readable
  *  copy of the corresponding source code, to be distributed under the terms
  *  of Sections 1 and 2 above on a medium customarily used for software
  *  interchange; or,
  *
  *  c)  Accompany it with the information you received as to the offer to
  *  distribute corresponding source code.  (This alternative is allowed only
  *  for noncommercial distribution and only if you received the program in
  *  object code or executable form with such an offer, in accord with
  *  Subsection b above.)
  *
  * The source code for a work means the preferred form the work for making
  * modifications to it.  For an executable work, complete source code means
  * all the source code for all modules it contains, plus any associated
  * interface definition files, plus the scripts used to control compilation
  * and installation of the executable.  However, as a special exception, the
  * source code distributed need not include anything that is normally
  * distributed (in either source or binary form) with the major components
  * (compiler, kernel, and so on) of the operating system on which the
  * executable runs, unless that component itself accompanies the executable.
  *
  * If distribution of executable or object code is made by offering access to
  * copy from a designated place, then offering equivalent access to copy the
  * source code from the same place counts as distribution of the source code,
  * even though third parties are not compelled to copy the source along with
  * the object code.
  *
  * 4.  You may not copy, modify, sublicense, or distribute the Program except
  * as expressly provided under this License.  Any attempt otherwise to copy,
  * modify, sublicense or distribute the Program is void, and will
  * automatically terminate your rights under this License.  However, parties
  * who have received copies, or rights, from you under this License will not
  * have their licenses terminated so long as such parties remain in full
  * compliance.
  *
  * 5.  You are not required to accept this License, since you have not signed
  * it.  However, nothing else grants you permission to modify or distribute
  * the Program or its derivative works.  These actions are prohibited by law
  * if you do not accept this License.  Therefore, by modifying or distributing
  * the Program (or any work based on the Program), you indicate your
  * acceptance of this License to do so, and all its terms and conditions for
  * copying, distributing or modifying the Program or works based on it.
  *
  * 6.  Each time you redistribute the Program (or any work based on the
  * Program), the recipient automatically receives a license from the original
  * licensor to copy, distribute or modify the Program subject to these terms
  * and conditions.  You may not impose any further restrictions on the
  * recipients' exercise of the rights granted herein.  You are not responsible
  * for enforcing compliance by third parties to this License.
  *
  * 7.  If, as a consequence of a court judgment or allegation of patent
  * infringement or for any other reason (not limited to patent 
  * issues), conditions are imposed on you (whether by court 
  * order, agreement or otherwise) that contradict the conditions 
  * of this License, they do not excuse you from the conditions 
  * of this License.  If you cannot distribute so as to satisfy
  * simultaneously your obligations under this License and any other pertinent
  * obligations, then as a consequence you may not distribute the Program at
  * all.  For example, if a patent license would not permit royalty-free
  * redistribution of the Program by all those who receive copies directly or
  * indirectly through you, then the only way you could satisfy both it and
  * this License would be to refrain entirely from distribution of the Program.
  *
  * If any portion of this section is held invalid or unenforceable under any
  * particular circumstance, the balance of the section is intended to apply
  * and the section as a whole is intended to apply in other circumstances.
  *
  * It is not the purpose to this section to induce you to infringe any patents
  * or other property right claims or to contest validity of any such claims;
  * this section has the sole purpose of protecting the integrity of the free
  * software distribution system, which is implemented by public license
  * practices.  Many people have made generous contributions to the wide range
  * of software distributed through that system in reliance on consistent
  * application of that system; it is up to the author/donor to decide if he or
  * she is willing to distribute software through any other system and a
  * licensee cannot impose that choice.
  *
  * This section is intended to make thoroughly clear what is believed to be a
  * consequence of the rest of this License.
  *
  * 8.  If the distribution and/or use of the Program is restricted in certain
  * countries either by patents or by copyrighted interfaces, the original
  * copyright holder who places the Program under this License may add an
  * explicit geographical distribution limitation excluding those countries, so
  * that distribution is permitted only in or among countries not thus
  * excluded.  In such case, this License incorporates the limitation as if
  * written in the body of this License.
  *
  * 9.  The Free Software Foundation may publish revised and/or new versions of
  * the General Public License from time to time.  Such new versions will be
  * similar in spirit to the present version, but may differ in detail to
  * address new problems or concerns.
  *
  * Each version is given a distinguishing version number.  If the Program
  * specifies a version number of this License which applies to it and "any
  * later version," you have the option of following the terms and conditions
  * either of that version of any later version published by the Free Software
  * Foundation.  If the Program does not specify a version number of this
  * License, you may choose any version ever published by the Free Software
  * Foundation.
  *
  * 10.  If you wish to incorporate parts of the Program into other free
  * programs whose distribution conditions are different, write to the author
  * to ask for permission.  For software which is copyrighted by the Free
  * Software Foundation, write to the Free Software Foundation; we sometimes
  * make exceptions for this.  Our decision to grant permission will be guided
  * by the two goals of preserving the free status of all derivatives of our
  * free software and or promoting the sharing and reuse of software generally.
  *
  * NO WARRANTY
  *
  * 11.  BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
  * FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
  * OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
  * PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
  * OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
  * TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
  * PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
  * REPAIR OR CORRECTION.
  *
  * 12.  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
  * WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
  * REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
  * INCLUDING ANY GENERAL, SPECIAL INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
  * OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
  * TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
  * YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
  * PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGES.
  *
  * END OF TERMS AND CONDITIONS 
 ------------------------------------------------------------------------- */
diff --git a/src/compute.cpp b/src/compute.cpp
index 328573d44..54dc57809 100644
--- a/src/compute.cpp
+++ b/src/compute.cpp
@@ -1,232 +1,233 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "mpi.h"
 #include "stdlib.h"
 #include "string.h"
 #include "ctype.h"
 #include "compute.h"
 #include "atom.h"
 #include "domain.h"
 #include "comm.h"
 #include "group.h"
 #include "modify.h"
 #include "fix.h"
 #include "atom_masks.h"
 #include "memory.h"
 #include "error.h"
 #include "force.h"
 
 using namespace LAMMPS_NS;
 
 #define DELTA 4
 #define BIG MAXTAGINT
 
 // allocate space for static class instance variable and initialize it
 
 int Compute::instance_total = 0;
 
 /* ---------------------------------------------------------------------- */
 
 Compute::Compute(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
 {
   instance_me = instance_total++;
 
   if (narg < 3) error->all(FLERR,"Illegal compute command");
 
   // compute ID, group, and style
   // ID must be all alphanumeric chars or underscores
 
   int n = strlen(arg[0]) + 1;
   id = new char[n];
   strcpy(id,arg[0]);
 
   for (int i = 0; i < n-1; i++)
     if (!isalnum(id[i]) && id[i] != '_')
       error->all(FLERR,
                  "Compute ID must be alphanumeric or underscore characters");
 
   igroup = group->find(arg[1]);
   if (igroup == -1) error->all(FLERR,"Could not find compute group ID");
   groupbit = group->bitmask[igroup];
 
   n = strlen(arg[2]) + 1;
   style = new char[n];
   strcpy(style,arg[2]);
 
   // set child class defaults
 
   scalar_flag = vector_flag = array_flag = 0;
   peratom_flag = local_flag = 0;
   size_vector_variable = size_array_rows_variable = 0;
 
   tempflag = pressflag = peflag = 0;
   pressatomflag = peatomflag = 0;
   create_attribute = 0;
   tempbias = 0;
 
   timeflag = 0;
   comm_forward = comm_reverse = 0;
   dynamic = 0;
   dynamic_group_allow = 1;
   cudable = 0;
 
   invoked_scalar = invoked_vector = invoked_array = -1;
   invoked_peratom = invoked_local = -1;
   invoked_flag = 0;
 
   // set modify defaults
 
   extra_dof = domain->dimension;
   dynamic_user = 0;
+  fix_dof = 0;
 
   // setup list of timesteps
 
   ntime = maxtime = 0;
   tlist = NULL;
 
   // data masks
 
   datamask = ALL_MASK;
   datamask_ext = ALL_MASK;
 
   // force init to zero in case these are used as logicals
 
   vector = vector_atom = vector_local = NULL;
   array = array_atom = array_local = NULL;
 }
 
 /* ---------------------------------------------------------------------- */
 
 Compute::~Compute()
 {
   delete [] id;
   delete [] style;
   memory->destroy(tlist);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void Compute::modify_params(int narg, char **arg)
 {
   if (narg == 0) error->all(FLERR,"Illegal compute_modify command");
 
   int iarg = 0;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"extra") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal compute_modify command");
       extra_dof = force->inumeric(FLERR,arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"dynamic") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal compute_modify command");
       if (strcmp(arg[iarg+1],"no") == 0) dynamic_user = 0;
       else if (strcmp(arg[iarg+1],"yes") == 0) dynamic_user = 1;
       else error->all(FLERR,"Illegal compute_modify command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"thermo") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal compute_modify command");
       if (strcmp(arg[iarg+1],"no") == 0) thermoflag = 0;
       else if (strcmp(arg[iarg+1],"yes") == 0) thermoflag = 1;
       else error->all(FLERR,"Illegal compute_modify command");
       iarg += 2;
     } else error->all(FLERR,"Illegal compute_modify command");
   }
 }
 
 /* ----------------------------------------------------------------------
    calculate adjustment in DOF due to fixes
 ------------------------------------------------------------------------- */
 
 void Compute::adjust_dof_fix()
 {
   Fix **fix = modify->fix;
   int nfix = modify->nfix;
 
   fix_dof = 0;
   for (int i = 0; i < nfix; i++)
     if (fix[i]->dof_flag) 
       fix_dof += fix[i]->dof(igroup);
 }
 
 /* ----------------------------------------------------------------------
    reset extra_dof to its default value
 ------------------------------------------------------------------------- */
 
 void Compute::reset_extra_dof()
 {
   extra_dof = domain->dimension;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void Compute::reset_extra_compute_fix(const char *)
 {
   error->all(FLERR,
              "Compute does not allow an extra compute or fix to be reset");
 }
 
 /* ----------------------------------------------------------------------
    add ntimestep to list of timesteps the compute will be called on
    do not add if already in list
    search from top downward, since list of times is in decreasing order
 ------------------------------------------------------------------------- */
 
 void Compute::addstep(bigint ntimestep)
 {
   // i = location in list to insert ntimestep
 
   int i;
   for (i = ntime-1; i >= 0; i--) {
     if (ntimestep == tlist[i]) return;
     if (ntimestep < tlist[i]) break;
   }
   i++;
 
   // extend list as needed
 
   if (ntime == maxtime) {
     maxtime += DELTA;
     memory->grow(tlist,maxtime,"compute:tlist");
   }
 
   // move remainder of list upward and insert ntimestep
 
   for (int j = ntime-1; j >= i; j--) tlist[j+1] = tlist[j];
   tlist[i] = ntimestep;
   ntime++;
 }
 
 /* ----------------------------------------------------------------------
    return 1/0 if ntimestep is or is not in list of calling timesteps
    if value(s) on top of list are less than ntimestep, delete them
    search from top downward, since list of times is in decreasing order
 ------------------------------------------------------------------------- */
 
 int Compute::matchstep(bigint ntimestep)
 {
   for (int i = ntime-1; i >= 0; i--) {
     if (ntimestep < tlist[i]) return 0;
     if (ntimestep == tlist[i]) return 1;
     if (ntimestep > tlist[i]) ntime--;
   }
   return 0;
 }
 
 /* ----------------------------------------------------------------------
    clean out list of timesteps to call the compute on
 ------------------------------------------------------------------------- */
 
 void Compute::clearstep()
 {
   ntime = 0;
 }
diff --git a/src/fix_langevin.cpp b/src/fix_langevin.cpp
index 5a0103a8a..e8f3290be 100644
--- a/src/fix_langevin.cpp
+++ b/src/fix_langevin.cpp
@@ -1,917 +1,919 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Carolyn Phillips (U Mich), reservoir energy tally
                          Aidan Thompson (SNL) GJF formulation
 ------------------------------------------------------------------------- */
 
 #include "mpi.h"
 #include "math.h"
 #include "string.h"
 #include "stdlib.h"
 #include "fix_langevin.h"
 #include "math_extra.h"
 #include "atom.h"
 #include "atom_vec_ellipsoid.h"
 #include "force.h"
 #include "update.h"
 #include "modify.h"
 #include "compute.h"
 #include "domain.h"
 #include "region.h"
 #include "respa.h"
 #include "comm.h"
 #include "input.h"
 #include "variable.h"
 #include "random_mars.h"
 #include "memory.h"
 #include "error.h"
 #include "group.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
 enum{NOBIAS,BIAS};
 enum{CONSTANT,EQUAL,ATOM};
 
 #define SINERTIA 0.4          // moment of inertia prefactor for sphere
 #define EINERTIA 0.2          // moment of inertia prefactor for ellipsoid
 
 /* ---------------------------------------------------------------------- */
 
 FixLangevin::FixLangevin(LAMMPS *lmp, int narg, char **arg) :
   Fix(lmp, narg, arg)
 {
   if (narg < 7) error->all(FLERR,"Illegal fix langevin command");
 
   dynamic_group_allow = 1;
   scalar_flag = 1;
   global_freq = 1;
   extscalar = 1;
   nevery = 1;
 
   tstr = NULL;
   if (strstr(arg[3],"v_") == arg[3]) {
     int n = strlen(&arg[3][2]) + 1;
     tstr = new char[n];
     strcpy(tstr,&arg[3][2]);
   } else {
     t_start = force->numeric(FLERR,arg[3]);
     t_target = t_start;
     tstyle = CONSTANT;
   }
 
   t_stop = force->numeric(FLERR,arg[4]);
   t_period = force->numeric(FLERR,arg[5]);
   seed = force->inumeric(FLERR,arg[6]);
 
   if (t_period <= 0.0) error->all(FLERR,"Fix langevin period must be > 0.0");
   if (seed <= 0) error->all(FLERR,"Illegal fix langevin command");
 
   // initialize Marsaglia RNG with processor-unique seed
 
   random = new RanMars(lmp,seed + comm->me);
 
   // allocate per-type arrays for force prefactors
 
   gfactor1 = new double[atom->ntypes+1];
   gfactor2 = new double[atom->ntypes+1];
   ratio = new double[atom->ntypes+1];
 
   // optional args
 
   for (int i = 1; i <= atom->ntypes; i++) ratio[i] = 1.0;
   ascale = 0.0;
   gjfflag = 0;
   oflag = 0;
   tallyflag = 0;
   zeroflag = 0;
 
   int iarg = 7;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"angmom") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix langevin command");
       if (strcmp(arg[iarg+1],"no") == 0) ascale = 0.0;
       else ascale = force->numeric(FLERR,arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"gjf") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix langevin command");
       if (strcmp(arg[iarg+1],"no") == 0) gjfflag = 0;
       else if (strcmp(arg[iarg+1],"yes") == 0) gjfflag = 1;
       else error->all(FLERR,"Illegal fix langevin command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"omega") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix langevin command");
       if (strcmp(arg[iarg+1],"no") == 0) oflag = 0;
       else if (strcmp(arg[iarg+1],"yes") == 0) oflag = 1;
       else error->all(FLERR,"Illegal fix langevin command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"scale") == 0) {
       if (iarg+3 > narg) error->all(FLERR,"Illegal fix langevin command");
       int itype = force->inumeric(FLERR,arg[iarg+1]);
       double scale = force->numeric(FLERR,arg[iarg+2]);
       if (itype <= 0 || itype > atom->ntypes)
         error->all(FLERR,"Illegal fix langevin command");
       ratio[itype] = scale;
       iarg += 3;
     } else if (strcmp(arg[iarg],"tally") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix langevin command");
       if (strcmp(arg[iarg+1],"no") == 0) tallyflag = 0;
       else if (strcmp(arg[iarg+1],"yes") == 0) tallyflag = 1;
       else error->all(FLERR,"Illegal fix langevin command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"zero") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix langevin command");
       if (strcmp(arg[iarg+1],"no") == 0) zeroflag = 0;
       else if (strcmp(arg[iarg+1],"yes") == 0) zeroflag = 1;
       else error->all(FLERR,"Illegal fix langevin command");
       iarg += 2;
     } else error->all(FLERR,"Illegal fix langevin command");
   }
 
   // set temperature = NULL, user can override via fix_modify if wants bias
 
   id_temp = NULL;
   temperature = NULL;
 
   // flangevin is unallocated until first call to setup()
   // compute_scalar checks for this and returns 0.0 if flangevin is NULL
 
   energy = 0.0;
   flangevin = NULL;
   franprev = NULL;
   tforce = NULL;
   maxatom1 = maxatom2 = 0;
 
   // Setup atom-based array for franprev
   // register with Atom class
   // No need to set peratom_flag
   // as this data is for internal use only
 
   if (gjfflag) {
     nvalues = 3;
     grow_arrays(atom->nmax);
     atom->add_callback(0);
 
   // initialize franprev to zero
 
     int nlocal = atom->nlocal;
     for (int i = 0; i < nlocal; i++) {
       franprev[i][0] = 0.0;
       franprev[i][1] = 0.0;
       franprev[i][2] = 0.0;
     }
   }
 
+  if (tallyflag && zeroflag && comm->me == 0)
+    error->warning(FLERR,"Energy tally does not account for 'zero yes'");
 }
 
 /* ---------------------------------------------------------------------- */
 
 FixLangevin::~FixLangevin()
 {
   delete random;
   delete [] tstr;
   delete [] gfactor1;
   delete [] gfactor2;
   delete [] ratio;
   delete [] id_temp;
   memory->destroy(flangevin);
   memory->destroy(tforce);
 
   if (gjfflag) {
     memory->destroy(franprev);
     atom->delete_callback(id,0);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixLangevin::setmask()
 {
   int mask = 0;
   mask |= POST_FORCE;
   mask |= POST_FORCE_RESPA;
   mask |= END_OF_STEP;
   mask |= THERMO_ENERGY;
   return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixLangevin::init()
 {
   if (oflag && !atom->sphere_flag)
     error->all(FLERR,"Fix langevin omega requires atom style sphere");
   if (ascale && !atom->ellipsoid_flag)
     error->all(FLERR,"Fix langevin angmom requires atom style ellipsoid");
 
   // check variable
 
   if (tstr) {
     tvar = input->variable->find(tstr);
     if (tvar < 0)
       error->all(FLERR,"Variable name for fix langevin does not exist");
     if (input->variable->equalstyle(tvar)) tstyle = EQUAL;
     else if (input->variable->atomstyle(tvar)) tstyle = ATOM;
     else error->all(FLERR,"Variable for fix langevin is invalid style");
   }
 
   // if oflag or ascale set, check that all group particles are finite-size
 
   if (oflag) {
     double *radius = atom->radius;
     int *mask = atom->mask;
     int nlocal = atom->nlocal;
 
     for (int i = 0; i < nlocal; i++)
       if (mask[i] & groupbit)
         if (radius[i] == 0.0)
           error->one(FLERR,"Fix langevin omega requires extended particles");
   }
 
   if (ascale) {
     avec = (AtomVecEllipsoid *) atom->style_match("ellipsoid");
     if (!avec)
       error->all(FLERR,"Fix langevin angmom requires atom style ellipsoid");
 
     int *ellipsoid = atom->ellipsoid;
     int *mask = atom->mask;
     int nlocal = atom->nlocal;
 
     for (int i = 0; i < nlocal; i++)
       if (mask[i] & groupbit)
         if (ellipsoid[i] < 0)
           error->one(FLERR,"Fix langevin angmom requires extended particles");
   }
 
   // set force prefactors
 
   if (!atom->rmass) {
     for (int i = 1; i <= atom->ntypes; i++) {
       gfactor1[i] = -atom->mass[i] / t_period / force->ftm2v;
       gfactor2[i] = sqrt(atom->mass[i]) *
         sqrt(24.0*force->boltz/t_period/update->dt/force->mvv2e) /
         force->ftm2v;
       gfactor1[i] *= 1.0/ratio[i];
       gfactor2[i] *= 1.0/sqrt(ratio[i]);
     }
   }
 
   if (temperature && temperature->tempbias) tbiasflag = BIAS;
   else tbiasflag = NOBIAS;
 
   if (strstr(update->integrate_style,"respa"))
     nlevels_respa = ((Respa *) update->integrate)->nlevels;
 
   if (gjfflag) gjffac = 1.0/(1.0+update->dt/2.0/t_period);
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixLangevin::setup(int vflag)
 {
   if (strstr(update->integrate_style,"verlet"))
     post_force(vflag);
   else {
     ((Respa *) update->integrate)->copy_flevel_f(nlevels_respa-1);
     post_force_respa(vflag,nlevels_respa-1,0);
     ((Respa *) update->integrate)->copy_f_flevel(nlevels_respa-1);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixLangevin::post_force(int vflag)
 {
   double *rmass = atom->rmass;
 
   // enumerate all 2^6 possibilities for template parameters
   // this avoids testing them inside inner loop: 
   // TSTYLEATOM, GJF, TALLY, BIAS, RMASS, ZERO
 
 #ifdef TEMPLATED_FIX_LANGEVIN
   if (tstyle == ATOM)
     if (gjfflag)
       if (tallyflag)
 	if (tbiasflag == BIAS)
 	  if (rmass)
 	    if (zeroflag) post_force_templated<1,1,1,1,1,1>();
             else          post_force_templated<1,1,1,1,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<1,1,1,1,0,1>();
             else          post_force_templated<1,1,1,1,0,0>();
 	else
 	  if (rmass)
 	    if (zeroflag) post_force_templated<1,1,1,0,1,1>();
 	    else          post_force_templated<1,1,1,0,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<1,1,1,0,0,1>();
 	    else          post_force_templated<1,1,1,0,0,0>();
       else
 	if (tbiasflag == BIAS)
 	  if (rmass)
 	    if (zeroflag) post_force_templated<1,1,0,1,1,1>();
 	    else          post_force_templated<1,1,0,1,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<1,1,0,1,0,1>();
 	    else          post_force_templated<1,1,0,1,0,0>();
 	else
 	  if (rmass)
 	    if (zeroflag) post_force_templated<1,1,0,0,1,1>();
 	    else          post_force_templated<1,1,0,0,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<1,1,0,0,0,1>();
 	    else          post_force_templated<1,1,0,0,0,0>();
     else
       if (tallyflag)
 	if (tbiasflag == BIAS)
 	  if (rmass)
 	    if (zeroflag) post_force_templated<1,0,1,1,1,1>();
 	    else          post_force_templated<1,0,1,1,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<1,0,1,1,0,1>();
 	    else          post_force_templated<1,0,1,1,0,0>();
 	else
 	  if (rmass)
 	    if (zeroflag) post_force_templated<1,0,1,0,1,1>();
 	    else          post_force_templated<1,0,1,0,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<1,0,1,0,0,1>();
 	    else          post_force_templated<1,0,1,0,0,0>();
       else
 	if (tbiasflag == BIAS)
 	  if (rmass)
 	    if (zeroflag) post_force_templated<1,0,0,1,1,1>();
 	    else          post_force_templated<1,0,0,1,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<1,0,0,1,0,1>();
 	    else          post_force_templated<1,0,0,1,0,0>();
 	else
 	  if (rmass)
 	    if (zeroflag) post_force_templated<1,0,0,0,1,1>();
 	    else          post_force_templated<1,0,0,0,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<1,0,0,0,0,1>();
 	    else          post_force_templated<1,0,0,0,0,0>();
   else
     if (gjfflag)
       if (tallyflag)
 	if (tbiasflag == BIAS)
 	  if (rmass)
 	    if (zeroflag) post_force_templated<0,1,1,1,1,1>();
 	    else          post_force_templated<0,1,1,1,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<0,1,1,1,0,1>();
 	    else          post_force_templated<0,1,1,1,0,0>();
 	else
 	  if (rmass)
 	    if (zeroflag) post_force_templated<0,1,1,0,1,1>();
 	    else          post_force_templated<0,1,1,0,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<0,1,1,0,0,1>();
 	    else          post_force_templated<0,1,1,0,0,0>();
       else
 	if (tbiasflag == BIAS)
 	  if (rmass)
 	    if (zeroflag) post_force_templated<0,1,0,1,1,1>();
 	    else          post_force_templated<0,1,0,1,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<0,1,0,1,0,1>();
 	    else          post_force_templated<0,1,0,1,0,0>();
 	else
 	  if (rmass)
 	    if (zeroflag) post_force_templated<0,1,0,0,1,1>();
 	    else          post_force_templated<0,1,0,0,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<0,1,0,0,0,1>();
 	    else          post_force_templated<0,1,0,0,0,0>();
     else
       if (tallyflag)
 	if (tbiasflag == BIAS)
 	  if (rmass)
 	    if (zeroflag) post_force_templated<0,0,1,1,1,1>();
 	    else          post_force_templated<0,0,1,1,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<0,0,1,1,0,1>();
 	    else          post_force_templated<0,0,1,1,0,0>();
 	else
 	  if (rmass)
 	    if (zeroflag) post_force_templated<0,0,1,0,1,1>();
 	    else          post_force_templated<0,0,1,0,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<0,0,1,0,0,1>();
 	    else          post_force_templated<0,0,1,0,0,0>();
       else
 	if (tbiasflag == BIAS)
 	  if (rmass)
 	    if (zeroflag) post_force_templated<0,0,0,1,1,1>();
 	    else          post_force_templated<0,0,0,1,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<0,0,0,1,0,1>();
 	    else          post_force_templated<0,0,0,1,0,0>();
 	else
 	  if (rmass)
 	    if (zeroflag) post_force_templated<0,0,0,0,1,1>();
 	    else          post_force_templated<0,0,0,0,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<0,0,0,0,0,1>();
 	    else          post_force_templated<0,0,0,0,0,0>();
 #else
   post_force_untemplated(int(tstyle==ATOM), gjfflag, tallyflag, 
 			 int(tbiasflag==BIAS), int(rmass!=NULL), zeroflag);
 #endif
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixLangevin::post_force_respa(int vflag, int ilevel, int iloop)
 {
   if (ilevel == nlevels_respa-1) post_force(vflag);
 }
 
 /* ----------------------------------------------------------------------
    modify forces using one of the many Langevin styles
 ------------------------------------------------------------------------- */
 
 #ifdef TEMPLATED_FIX_LANGEVIN
 template < int Tp_TSTYLEATOM, int Tp_GJF, int Tp_TALLY, 
 	   int Tp_BIAS, int Tp_RMASS, int Tp_ZERO >
 void FixLangevin::post_force_templated()
 #else
 void FixLangevin::post_force_untemplated
   (int Tp_TSTYLEATOM, int Tp_GJF, int Tp_TALLY, 
    int Tp_BIAS, int Tp_RMASS, int Tp_ZERO)
 #endif
 {
   double gamma1,gamma2;
 
   double **v = atom->v;
   double **f = atom->f;
   double *rmass = atom->rmass;
   int *type = atom->type;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
   // apply damping and thermostat to atoms in group
 
   // for Tp_TSTYLEATOM:
   //   use per-atom per-coord target temperature
   // for Tp_GJF:
   //   use Gronbech-Jensen/Farago algorithm
   //   else use regular algorithm
   // for Tp_TALLY:
   //   store drag plus random forces in flangevin[nlocal][3]
   // for Tp_BIAS:
   //   calculate temperature since some computes require temp
   //   computed on current nlocal atoms to remove bias
   //   test v = 0 since some computes mask non-participating atoms via v = 0
   //   and added force has extra term not multiplied by v = 0
   // for Tp_RMASS:
   //   use per-atom masses
   //   else use per-type masses
   // for Tp_ZERO:
   //   sum random force over all atoms in group
   //   subtract sum/count from each atom in group
 
   double fdrag[3],fran[3],fsum[3],fsumall[3];
   bigint count;
   double fswap;
 
   double boltz = force->boltz;
   double dt = update->dt;
   double mvv2e = force->mvv2e;
   double ftm2v = force->ftm2v;
 
   compute_target();
 
   if (Tp_ZERO) {
     fsum[0] = fsum[1] = fsum[2] = 0.0;
     count = group->count(igroup);
     if (count == 0)
       error->all(FLERR,"Cannot zero Langevin force of 0 atoms");
   }
 
   // reallocate flangevin if necessary
 
   if (Tp_TALLY) {
     if (atom->nlocal > maxatom1) {
       memory->destroy(flangevin);
       maxatom1 = atom->nmax;
       memory->create(flangevin,maxatom1,3,"langevin:flangevin");
     }
   }
 
   if (Tp_BIAS) temperature->compute_scalar();
 
   for (int i = 0; i < nlocal; i++) {
     if (mask[i] & groupbit) {
       if (Tp_TSTYLEATOM) tsqrt = sqrt(tforce[i]);
       if (Tp_RMASS) {
 	gamma1 = -rmass[i] / t_period / ftm2v;
 	gamma2 = sqrt(rmass[i]) * sqrt(24.0*boltz/t_period/dt/mvv2e) / ftm2v;
 	gamma1 *= 1.0/ratio[type[i]];
 	gamma2 *= 1.0/sqrt(ratio[type[i]]) * tsqrt;
       } else {
 	gamma1 = gfactor1[type[i]];
 	gamma2 = gfactor2[type[i]] * tsqrt;
       }
 
       fran[0] = gamma2*(random->uniform()-0.5);
       fran[1] = gamma2*(random->uniform()-0.5);
       fran[2] = gamma2*(random->uniform()-0.5);
 
       if (Tp_BIAS) {
 	temperature->remove_bias(i,v[i]);
 	fdrag[0] = gamma1*v[i][0];
 	fdrag[1] = gamma1*v[i][1];
 	fdrag[2] = gamma1*v[i][2];
 	if (v[i][0] == 0.0) fran[0] = 0.0;
 	if (v[i][1] == 0.0) fran[1] = 0.0;
 	if (v[i][2] == 0.0) fran[2] = 0.0;
 	temperature->restore_bias(i,v[i]);
       } else {
 	fdrag[0] = gamma1*v[i][0];
 	fdrag[1] = gamma1*v[i][1];
 	fdrag[2] = gamma1*v[i][2];
       }
 
       if (Tp_GJF) {
 	fswap = 0.5*(fran[0]+franprev[i][0]);
 	franprev[i][0] = fran[0];
 	fran[0] = fswap;
 	fswap = 0.5*(fran[1]+franprev[i][1]);
 	franprev[i][1] = fran[1];
 	fran[1] = fswap;
 	fswap = 0.5*(fran[2]+franprev[i][2]);
 	franprev[i][2] = fran[2];
 	fran[2] = fswap;
 
 	fdrag[0] *= gjffac;
 	fdrag[1] *= gjffac;
 	fdrag[2] *= gjffac;
 	fran[0] *= gjffac;
 	fran[1] *= gjffac;
 	fran[2] *= gjffac;
 	f[i][0] *= gjffac;
 	f[i][1] *= gjffac;
 	f[i][2] *= gjffac;
       }
 
       f[i][0] += fdrag[0] + fran[0];
       f[i][1] += fdrag[1] + fran[1];
       f[i][2] += fdrag[2] + fran[2];
 
       if (Tp_TALLY) {
 	flangevin[i][0] = fdrag[0] + fran[0];
 	flangevin[i][1] = fdrag[1] + fran[1];
 	flangevin[i][2] = fdrag[2] + fran[2];
       }
 
       if (Tp_ZERO) {
 	fsum[0] += fran[0];
 	fsum[1] += fran[1];
 	fsum[2] += fran[2];
       }
     }
   }
 
   // set total force to zero
 
   if (Tp_ZERO) {
     MPI_Allreduce(fsum,fsumall,3,MPI_DOUBLE,MPI_SUM,world);
     fsumall[0] /= count;
     fsumall[1] /= count;
     fsumall[2] /= count;
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
         f[i][0] -= fsumall[0];
         f[i][1] -= fsumall[1];
         f[i][2] -= fsumall[2];
       }
     }
   }
 
   // thermostat omega and angmom
 
   if (oflag) omega_thermostat();
   if (ascale) angmom_thermostat();
 }
 
 /* ----------------------------------------------------------------------
    set current t_target and t_sqrt
 ------------------------------------------------------------------------- */
 
 void FixLangevin::compute_target()
 {
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
   double delta = update->ntimestep - update->beginstep;
   if (delta != 0.0) delta /= update->endstep - update->beginstep;
 
   // if variable temp, evaluate variable, wrap with clear/add
   // reallocate tforce array if necessary
 
   if (tstyle == CONSTANT) {
     t_target = t_start + delta * (t_stop-t_start);
     tsqrt = sqrt(t_target);
   } else {
     modify->clearstep_compute();
     if (tstyle == EQUAL) {
       t_target = input->variable->compute_equal(tvar);
       if (t_target < 0.0)
         error->one(FLERR,"Fix langevin variable returned negative temperature");
       tsqrt = sqrt(t_target);
     } else {
       if (nlocal > maxatom2) {
         maxatom2 = atom->nmax;
         memory->destroy(tforce);
         memory->create(tforce,maxatom2,"langevin:tforce");
       }
       input->variable->compute_atom(tvar,igroup,tforce,1,0);
       for (int i = 0; i < nlocal; i++)
         if (mask[i] & groupbit)
             if (tforce[i] < 0.0)
               error->one(FLERR,
                          "Fix langevin variable returned negative temperature");
     }
     modify->addstep_compute(update->ntimestep + 1);
   }
 }
 
 /* ----------------------------------------------------------------------
    thermostat rotational dof via omega
 ------------------------------------------------------------------------- */
 
 void FixLangevin::omega_thermostat()
 {
   double gamma1,gamma2;
 
   double boltz = force->boltz;
   double dt = update->dt;
   double mvv2e = force->mvv2e;
   double ftm2v = force->ftm2v;
 
   double **torque = atom->torque;
   double **omega = atom->omega;
   double *radius = atom->radius;
   double *rmass = atom->rmass;
   int *mask = atom->mask;
   int *type = atom->type;
   int nlocal = atom->nlocal;
 
   // rescale gamma1/gamma2 by 10/3 & sqrt(10/3) for spherical particles
   // does not affect rotational thermosatting
   // gives correct rotational diffusivity behavior
 
   double tendivthree = 10.0/3.0;
   double tran[3];
   double inertiaone;
   
   for (int i = 0; i < nlocal; i++) {
     if ((mask[i] & groupbit) && (radius[i] > 0.0)) {
       inertiaone = SINERTIA*radius[i]*radius[i]*rmass[i];
       if (tstyle == ATOM) tsqrt = sqrt(tforce[i]);
       gamma1 = -tendivthree*inertiaone / t_period / ftm2v;
       gamma2 = sqrt(inertiaone) * sqrt(80.0*boltz/t_period/dt/mvv2e) / ftm2v;
       gamma1 *= 1.0/ratio[type[i]];
       gamma2 *= 1.0/sqrt(ratio[type[i]]) * tsqrt;
       tran[0] = gamma2*(random->uniform()-0.5);
       tran[1] = gamma2*(random->uniform()-0.5);
       tran[2] = gamma2*(random->uniform()-0.5);
       torque[i][0] += gamma1*omega[i][0] + tran[0];
       torque[i][1] += gamma1*omega[i][1] + tran[1];
       torque[i][2] += gamma1*omega[i][2] + tran[2];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    thermostat rotational dof via angmom
 ------------------------------------------------------------------------- */
 
 void FixLangevin::angmom_thermostat()
 {
   double gamma1,gamma2;
 
   double boltz = force->boltz;
   double dt = update->dt;
   double mvv2e = force->mvv2e;
   double ftm2v = force->ftm2v;
 
   AtomVecEllipsoid::Bonus *bonus = avec->bonus;
   double **torque = atom->torque;
   double **angmom = atom->angmom;
   double *rmass = atom->rmass;
   int *ellipsoid = atom->ellipsoid;
   int *mask = atom->mask;
   int *type = atom->type;
   int nlocal = atom->nlocal;
 
   // rescale gamma1/gamma2 by ascale for aspherical particles
   // does not affect rotational thermosatting
   // gives correct rotational diffusivity behavior if (nearly) spherical
   // any value will be incorrect for rotational diffusivity if aspherical
 
   double inertia[3],omega[3],tran[3];
   double *shape,*quat;
 
   for (int i = 0; i < nlocal; i++) {
     if (mask[i] & groupbit) {
       shape = bonus[ellipsoid[i]].shape;
       inertia[0] = EINERTIA*rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]);
       inertia[1] = EINERTIA*rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]);
       inertia[2] = EINERTIA*rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]);
       quat = bonus[ellipsoid[i]].quat;
       MathExtra::mq_to_omega(angmom[i],quat,inertia,omega);
 
       if (tstyle == ATOM) tsqrt = sqrt(tforce[i]);
       gamma1 = -ascale / t_period / ftm2v;
       gamma2 = sqrt(ascale*24.0*boltz/t_period/dt/mvv2e) / ftm2v;
       gamma1 *= 1.0/ratio[type[i]];
       gamma2 *= 1.0/sqrt(ratio[type[i]]) * tsqrt;
       tran[0] = sqrt(inertia[0])*gamma2*(random->uniform()-0.5);
       tran[1] = sqrt(inertia[1])*gamma2*(random->uniform()-0.5);
       tran[2] = sqrt(inertia[2])*gamma2*(random->uniform()-0.5);
       torque[i][0] += inertia[0]*gamma1*omega[0] + tran[0];
       torque[i][1] += inertia[1]*gamma1*omega[1] + tran[1];
       torque[i][2] += inertia[2]*gamma1*omega[2] + tran[2];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    tally energy transfer to thermal reservoir
 ------------------------------------------------------------------------- */
 
 void FixLangevin::end_of_step()
 {
   if (!tallyflag) return;
 
   double **v = atom->v;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
   energy_onestep = 0.0;
 
   for (int i = 0; i < nlocal; i++)
     if (mask[i] & groupbit)
       energy_onestep += flangevin[i][0]*v[i][0] + flangevin[i][1]*v[i][1] +
         flangevin[i][2]*v[i][2];
 
   energy += energy_onestep*update->dt;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixLangevin::reset_target(double t_new)
 {
   t_target = t_start = t_stop = t_new;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixLangevin::reset_dt()
 {
   if (atom->mass) {
     for (int i = 1; i <= atom->ntypes; i++) {
       gfactor2[i] = sqrt(atom->mass[i]) *
         sqrt(24.0*force->boltz/t_period/update->dt/force->mvv2e) /
         force->ftm2v;
       gfactor2[i] *= 1.0/sqrt(ratio[i]);
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixLangevin::modify_param(int narg, char **arg)
 {
   if (strcmp(arg[0],"temp") == 0) {
     if (narg < 2) error->all(FLERR,"Illegal fix_modify command");
     delete [] id_temp;
     int n = strlen(arg[1]) + 1;
     id_temp = new char[n];
     strcpy(id_temp,arg[1]);
 
     int icompute = modify->find_compute(id_temp);
     if (icompute < 0)
       error->all(FLERR,"Could not find fix_modify temperature ID");
     temperature = modify->compute[icompute];
 
     if (temperature->tempflag == 0)
       error->all(FLERR,
                  "Fix_modify temperature ID does not compute temperature");
     if (temperature->igroup != igroup && comm->me == 0)
       error->warning(FLERR,"Group for fix_modify temp != fix group");
     return 2;
   }
   return 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double FixLangevin::compute_scalar()
 {
   if (!tallyflag || flangevin == NULL) return 0.0;
 
   // capture the very first energy transfer to thermal reservoir
 
   double **v = atom->v;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
   if (update->ntimestep == update->beginstep) {
     energy_onestep = 0.0;
     for (int i = 0; i < nlocal; i++)
       if (mask[i] & groupbit)
         energy_onestep += flangevin[i][0]*v[i][0] + flangevin[i][1]*v[i][1] +
           flangevin[i][2]*v[i][2];
     energy = 0.5*energy_onestep*update->dt;
   }
 
   // convert midstep energy back to previous fullstep energy
 
   double energy_me = energy - 0.5*energy_onestep*update->dt;
 
   double energy_all;
   MPI_Allreduce(&energy_me,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
   return -energy_all;
 }
 
 /* ----------------------------------------------------------------------
    extract thermostat properties
 ------------------------------------------------------------------------- */
 
 void *FixLangevin::extract(const char *str, int &dim)
 {
   dim = 0;
   if (strcmp(str,"t_target") == 0) {
     return &t_target;
   }
   return NULL;
 }
 
 /* ----------------------------------------------------------------------
    memory usage of tally array
 ------------------------------------------------------------------------- */
 
 double FixLangevin::memory_usage()
 {
   double bytes = 0.0;
   if (gjfflag) bytes += atom->nmax*3 * sizeof(double);
   if (tallyflag) bytes += atom->nmax*3 * sizeof(double);
   if (tforce) bytes += atom->nmax * sizeof(double);
   return bytes;
 }
 
 /* ----------------------------------------------------------------------
    allocate atom-based array for franprev 
 ------------------------------------------------------------------------- */
 
 void FixLangevin::grow_arrays(int nmax)
 {
   memory->grow(franprev,nmax,3,"fix_langevin:franprev");
 }
 
 /* ----------------------------------------------------------------------
    copy values within local atom-based array
 ------------------------------------------------------------------------- */
 
 void FixLangevin::copy_arrays(int i, int j, int delflag)
 {
   for (int m = 0; m < nvalues; m++)
     franprev[j][m] = franprev[i][m];
 }
 
 /* ----------------------------------------------------------------------
    pack values in local atom-based array for exchange with another proc
 ------------------------------------------------------------------------- */
 
 int FixLangevin::pack_exchange(int i, double *buf)
 {
   for (int m = 0; m < nvalues; m++) buf[m] = franprev[i][m];
   return nvalues;
 }
 
 /* ----------------------------------------------------------------------
    unpack values in local atom-based array from exchange with another proc
 ------------------------------------------------------------------------- */
 
 int FixLangevin::unpack_exchange(int nlocal, double *buf)
 {
   for (int m = 0; m < nvalues; m++) franprev[nlocal][m] = buf[m];
   return nvalues;
 }
diff --git a/src/fix_langevin.h b/src/fix_langevin.h
index ecfca919a..b73600010 100644
--- a/src/fix_langevin.h
+++ b/src/fix_langevin.h
@@ -1,153 +1,159 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef FIX_CLASS
 
 FixStyle(langevin,FixLangevin)
 
 #else
 
 #ifndef LMP_FIX_LANGEVIN_H
 #define LMP_FIX_LANGEVIN_H
 
 #include "fix.h"
 
 namespace LAMMPS_NS {
 
 class FixLangevin : public Fix {
  public:
   FixLangevin(class LAMMPS *, int, char **);
   virtual ~FixLangevin();
   int setmask();
   void init();
   void setup(int);
   virtual void post_force(int);
   void post_force_respa(int, int, int);
   virtual void end_of_step();
   void reset_target(double);
   void reset_dt();
   int modify_param(int, char **);
   virtual double compute_scalar();
   double memory_usage();
   virtual void *extract(const char *, int &);
   void grow_arrays(int);
   void copy_arrays(int, int, int);
   int pack_exchange(int, double *);
   int unpack_exchange(int, double *);
 
  protected:
   int gjfflag,oflag,tallyflag,zeroflag,tbiasflag;
   double ascale;
   double t_start,t_stop,t_period,t_target;
   double *gfactor1,*gfactor2,*ratio;
   double energy,energy_onestep;
   double tsqrt;
   int tstyle,tvar;
   double gjffac;
   char *tstr;
 
   class AtomVecEllipsoid *avec;
 
   int maxatom1,maxatom2;
   double **flangevin;
   double *tforce;
   double **franprev;
   int nvalues;
 
   char *id_temp;
   class Compute *temperature;
 
   int nlevels_respa;
   class RanMars *random;
   int seed;
 
   // comment next line to turn off templating
 #define TEMPLATED_FIX_LANGEVIN
 #ifdef TEMPLATED_FIX_LANGEVIN
   template < int Tp_TSTYLEATOM, int Tp_GJF, int Tp_TALLY, 
 	     int Tp_BIAS, int Tp_RMASS, int Tp_ZERO > 
   void post_force_templated();
 #else
   void post_force_untemplated(int, int, int, 
 			      int, int, int);
 #endif
   void omega_thermostat();
   void angmom_thermostat();
   void compute_target();
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Fix langevin period must be > 0.0
 
 The time window for temperature relaxation must be > 0
 
+W: Energy tally does not account for 'zero yes'
+
+The energy removed by using the 'zero yes' flag is not accounted
+for in the energy tally and thus energy conservation cannot be
+monitored in this case.
+
 E: Fix langevin omega requires atom style sphere
 
 Self-explanatory.
 
 E: Fix langevin angmom requires atom style ellipsoid
 
 Self-explanatory.
 
 E: Variable name for fix langevin does not exist
 
 Self-explanatory.
 
 E: Variable for fix langevin is invalid style
 
 It must be an equal-style variable.
 
 E: Fix langevin omega requires extended particles
 
 One of the particles has radius 0.0.
 
 E: Fix langevin angmom requires extended particles
 
 This fix option cannot be used with point paritlces.
 
 E: Cannot zero Langevin force of 0 atoms
 
 The group has zero atoms, so you cannot request its force
 be zeroed.
 
 E: Fix langevin variable returned negative temperature
 
 Self-explanatory.
 
 E: Could not find fix_modify temperature ID
 
 The compute ID for computing temperature does not exist.
 
 E: Fix_modify temperature ID does not compute temperature
 
 The compute ID assigned to the fix must compute temperature.
 
 W: Group for fix_modify temp != fix group
 
 The fix_modify command is specifying a temperature computation that
 computes a temperature on a different group of atoms than the fix
 itself operates on.  This is probably not what you want to do.
 
 */
diff --git a/src/fix_move.cpp b/src/fix_move.cpp
index 88aa0e184..e7ff80162 100644
--- a/src/fix_move.cpp
+++ b/src/fix_move.cpp
@@ -1,1018 +1,1021 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "string.h"
 #include "stdlib.h"
 #include "math.h"
 #include "fix_move.h"
 #include "atom.h"
 #include "group.h"
 #include "update.h"
 #include "modify.h"
 #include "force.h"
 #include "domain.h"
 #include "lattice.h"
 #include "comm.h"
 #include "respa.h"
 #include "input.h"
 #include "variable.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;
 
 enum{LINEAR,WIGGLE,ROTATE,VARIABLE};
 enum{EQUAL,ATOM};
 
 /* ---------------------------------------------------------------------- */
 
 FixMove::FixMove(LAMMPS *lmp, int narg, char **arg) :
   Fix(lmp, narg, arg)
 {
   if (narg < 4) error->all(FLERR,"Illegal fix move command");
 
   restart_global = 1;
   restart_peratom = 1;
   peratom_flag = 1;
   size_peratom_cols = 3;
   peratom_freq = 1;
   time_integrate = 1;
   create_attribute = 1;
+  displaceflag = 0;
+  velocityflag = 0;
+  maxatom = 0;
 
   // parse args
 
   int iarg;
   xvarstr = yvarstr = zvarstr = NULL;
   vxvarstr = vyvarstr = vzvarstr = NULL;
 
   if (strcmp(arg[3],"linear") == 0) {
     if (narg < 7) error->all(FLERR,"Illegal fix move command");
     iarg = 7;
     mstyle = LINEAR;
     if (strcmp(arg[4],"NULL") == 0) vxflag = 0;
     else {
       vxflag = 1;
       vx = force->numeric(FLERR,arg[4]);
     }
     if (strcmp(arg[5],"NULL") == 0) vyflag = 0;
     else {
       vyflag = 1;
       vy = force->numeric(FLERR,arg[5]);
     }
     if (strcmp(arg[6],"NULL") == 0) vzflag = 0;
     else {
       vzflag = 1;
       vz = force->numeric(FLERR,arg[6]);
     }
 
   } else if (strcmp(arg[3],"wiggle") == 0) {
     if (narg < 8) error->all(FLERR,"Illegal fix move command");
     iarg = 8;
     mstyle = WIGGLE;
     if (strcmp(arg[4],"NULL") == 0) axflag = 0;
     else {
       axflag = 1;
       ax = force->numeric(FLERR,arg[4]);
     }
     if (strcmp(arg[5],"NULL") == 0) ayflag = 0;
     else {
       ayflag = 1;
       ay = force->numeric(FLERR,arg[5]);
     }
     if (strcmp(arg[6],"NULL") == 0) azflag = 0;
     else {
       azflag = 1;
       az = force->numeric(FLERR,arg[6]);
     }
     period = force->numeric(FLERR,arg[7]);
 
   } else if (strcmp(arg[3],"rotate") == 0) {
     if (narg < 11) error->all(FLERR,"Illegal fix move command");
     iarg = 11;
     mstyle = ROTATE;
     point[0] = force->numeric(FLERR,arg[4]);
     point[1] = force->numeric(FLERR,arg[5]);
     point[2] = force->numeric(FLERR,arg[6]);
     axis[0] = force->numeric(FLERR,arg[7]);
     axis[1] = force->numeric(FLERR,arg[8]);
     axis[2] = force->numeric(FLERR,arg[9]);
     period = force->numeric(FLERR,arg[10]);
 
   } else if (strcmp(arg[3],"variable") == 0) {
     if (narg < 10) error->all(FLERR,"Illegal fix move command");
     iarg = 10;
     mstyle = VARIABLE;
     if (strcmp(arg[4],"NULL") == 0) xvarstr = NULL;
     else if (strstr(arg[4],"v_") == arg[4]) {
       int n = strlen(&arg[4][2]) + 1;
       xvarstr = new char[n];
       strcpy(xvarstr,&arg[4][2]);
     } else error->all(FLERR,"Illegal fix move command");
     if (strcmp(arg[5],"NULL") == 0) yvarstr = NULL;
     else if (strstr(arg[5],"v_") == arg[5]) {
       int n = strlen(&arg[5][2]) + 1;
       yvarstr = new char[n];
       strcpy(yvarstr,&arg[5][2]);
     } else error->all(FLERR,"Illegal fix move command");
     if (strcmp(arg[6],"NULL") == 0) zvarstr = NULL;
     else if (strstr(arg[6],"v_") == arg[6]) {
       int n = strlen(&arg[6][2]) + 1;
       zvarstr = new char[n];
       strcpy(zvarstr,&arg[6][2]);
     } else error->all(FLERR,"Illegal fix move command");
     if (strcmp(arg[7],"NULL") == 0) vxvarstr = NULL;
     else if (strstr(arg[7],"v_") == arg[7]) {
       int n = strlen(&arg[7][2]) + 1;
       vxvarstr = new char[n];
       strcpy(vxvarstr,&arg[7][2]);
     } else error->all(FLERR,"Illegal fix move command");
     if (strcmp(arg[8],"NULL") == 0) vyvarstr = NULL;
     else if (strstr(arg[8],"v_") == arg[8]) {
       int n = strlen(&arg[8][2]) + 1;
       vyvarstr = new char[n];
       strcpy(vyvarstr,&arg[8][2]);
     } else error->all(FLERR,"Illegal fix move command");
     if (strcmp(arg[9],"NULL") == 0) vzvarstr = NULL;
     else if (strstr(arg[9],"v_") == arg[9]) {
       int n = strlen(&arg[9][2]) + 1;
       vzvarstr = new char[n];
       strcpy(vzvarstr,&arg[9][2]);
     } else error->all(FLERR,"Illegal fix move command");
 
   } else error->all(FLERR,"Illegal fix move command");
 
   // optional args
 
   int scaleflag = 1;
 
   while (iarg < narg) {
     if (strcmp(arg[iarg],"units") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix move command");
       if (strcmp(arg[iarg+1],"box") == 0) scaleflag = 0;
       else if (strcmp(arg[iarg+1],"lattice") == 0) scaleflag = 1;
       else error->all(FLERR,"Illegal fix move command");
       iarg += 2;
     } else error->all(FLERR,"Illegal fix move command");
   }
 
   // error checks and warnings
 
   if (domain->dimension == 2) {
     if (mstyle == LINEAR && vzflag && vz != 0.0)
       error->all(FLERR,"Fix move cannot set linear z motion for 2d problem");
     if (mstyle == WIGGLE && azflag && az != 0.0)
       error->all(FLERR,"Fix move cannot set wiggle z motion for 2d problem");
     if (mstyle == ROTATE && (axis[0] != 0.0 || axis[1] != 0.0))
       error->all(FLERR,
                  "Fix move cannot rotate aroung non z-axis for 2d problem");
     if (mstyle == VARIABLE && (zvarstr || vzvarstr))
       error->all(FLERR,
                  "Fix move cannot define z or vz variable for 2d problem");
   }
 
   if (atom->angmom_flag && comm->me == 0)
     error->warning(FLERR,"Fix move does not update angular momentum");
   if (atom->ellipsoid_flag && comm->me == 0)
     error->warning(FLERR,"Fix move does not update quaternions");
 
   // setup scaling and apply scaling factors to velocity & amplitude
 
   if ((mstyle == LINEAR || mstyle == WIGGLE || mstyle == ROTATE) &&
       scaleflag) {
     double xscale,yscale,zscale;
     if (scaleflag) {
       xscale = domain->lattice->xlattice;
       yscale = domain->lattice->ylattice;
       zscale = domain->lattice->zlattice;
     }
     else xscale = yscale = zscale = 1.0;
 
     if (mstyle == LINEAR) {
       if (vxflag) vx *= xscale;
       if (vyflag) vy *= yscale;
       if (vzflag) vz *= zscale;
     } else if (mstyle == WIGGLE) {
       if (axflag) ax *= xscale;
       if (ayflag) ay *= yscale;
       if (azflag) az *= zscale;
     } else if (mstyle == ROTATE) {
       point[0] *= xscale;
       point[1] *= yscale;
       point[2] *= zscale;
     }
   }
 
   // set omega_rotate from period
 
   if (mstyle == WIGGLE || mstyle == ROTATE) omega_rotate = 2.0*MY_PI / period;
 
   // runit = unit vector along rotation axis
 
   if (mstyle == ROTATE) {
     double len = sqrt(axis[0]*axis[0] + axis[1]*axis[1] + axis[2]*axis[2]);
     if (len == 0.0)
       error->all(FLERR,"Zero length rotation vector with fix move");
     runit[0] = axis[0]/len;
     runit[1] = axis[1]/len;
     runit[2] = axis[2]/len;
   }
 
   // set omega_flag if particles store omega
 
   omega_flag = atom->omega_flag;
 
   // perform initial allocation of atom-based array
   // register with Atom class
 
   xoriginal = NULL;
   grow_arrays(atom->nmax);
   atom->add_callback(0);
   atom->add_callback(1);
 
   displace = velocity = NULL;
 
   // xoriginal = initial unwrapped positions of atoms
 
   double **x = atom->x;
   imageint *image = atom->image;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
   for (int i = 0; i < nlocal; i++) {
     if (mask[i] & groupbit) domain->unmap(x[i],image[i],xoriginal[i]);
     else xoriginal[i][0] = xoriginal[i][1] = xoriginal[i][2] = 0.0;
   }
 
   time_origin = update->ntimestep;
 }
 
 /* ---------------------------------------------------------------------- */
 
 FixMove::~FixMove()
 {
   // unregister callbacks to this fix from Atom class
 
   atom->delete_callback(id,0);
   atom->delete_callback(id,1);
 
   // delete locally stored arrays
 
   memory->destroy(xoriginal);
   memory->destroy(displace);
   memory->destroy(velocity);
 
   delete [] xvarstr;
   delete [] yvarstr;
   delete [] zvarstr;
   delete [] vxvarstr;
   delete [] vyvarstr;
   delete [] vzvarstr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixMove::setmask()
 {
   int mask = 0;
   mask |= INITIAL_INTEGRATE;
   mask |= INITIAL_INTEGRATE_RESPA;
   mask |= FINAL_INTEGRATE;
   mask |= FINAL_INTEGRATE_RESPA;
   return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixMove::init()
 {
   dt = update->dt;
   dtv = update->dt;
   dtf = 0.5 * update->dt * force->ftm2v;
 
   // set indices and style of all variables
 
   displaceflag = velocityflag = 0;
 
   if (mstyle == VARIABLE) {
     if (xvarstr) {
       xvar = input->variable->find(xvarstr);
       if (xvar < 0) error->all(FLERR,
                                "Variable name for fix move does not exist");
       if (input->variable->equalstyle(xvar)) xvarstyle = EQUAL;
       else if (input->variable->atomstyle(xvar)) xvarstyle = ATOM;
       else error->all(FLERR,"Variable for fix move is invalid style");
     }
     if (yvarstr) {
       yvar = input->variable->find(yvarstr);
       if (yvar < 0) error->all(FLERR,
                                "Variable name for fix move does not exist");
       if (input->variable->equalstyle(yvar)) yvarstyle = EQUAL;
       else if (input->variable->atomstyle(yvar)) yvarstyle = ATOM;
       else error->all(FLERR,"Variable for fix move is invalid style");
     }
     if (zvarstr) {
       zvar = input->variable->find(zvarstr);
       if (zvar < 0) error->all(FLERR,
                                "Variable name for fix move does not exist");
       if (input->variable->equalstyle(zvar)) zvarstyle = EQUAL;
       else if (input->variable->atomstyle(zvar)) zvarstyle = ATOM;
       else error->all(FLERR,"Variable for fix move is invalid style");
     }
     if (vxvarstr) {
       vxvar = input->variable->find(vxvarstr);
       if (vxvar < 0) error->all(FLERR,
                                 "Variable name for fix move does not exist");
       if (input->variable->equalstyle(vxvar)) vxvarstyle = EQUAL;
       else if (input->variable->atomstyle(vxvar)) vxvarstyle = ATOM;
       else error->all(FLERR,"Variable for fix move is invalid style");
     }
     if (vyvarstr) {
       vyvar = input->variable->find(vyvarstr);
       if (vyvar < 0) error->all(FLERR,
                                 "Variable name for fix move does not exist");
       if (input->variable->equalstyle(vyvar)) vyvarstyle = EQUAL;
       else if (input->variable->atomstyle(vyvar)) vyvarstyle = ATOM;
       else error->all(FLERR,"Variable for fix move is invalid style");
     }
     if (vzvarstr) {
       vzvar = input->variable->find(vzvarstr);
       if (vzvar < 0) error->all(FLERR,
                                 "Variable name for fix move does not exist");
       if (input->variable->equalstyle(vzvar)) vzvarstyle = EQUAL;
       else if (input->variable->atomstyle(vzvar)) vzvarstyle = ATOM;
       else error->all(FLERR,"Variable for fix move is invalid style");
     }
 
     if (xvarstr && xvarstyle == ATOM) displaceflag = 1;
     if (yvarstr && yvarstyle == ATOM) displaceflag = 1;
     if (zvarstr && zvarstyle == ATOM) displaceflag = 1;
     if (vxvarstr && vxvarstyle == ATOM) velocityflag = 1;
     if (vyvarstr && vyvarstyle == ATOM) velocityflag = 1;
     if (vzvarstr && vzvarstyle == ATOM) velocityflag = 1;
   }
 
   maxatom = atom->nmax;
   memory->destroy(displace);
   memory->destroy(velocity);
   if (displaceflag) memory->create(displace,maxatom,3,"move:displace");
   else displace = NULL;
   if (velocityflag) memory->create(velocity,maxatom,3,"move:velocity");
   else velocity = NULL;
 
   if (strstr(update->integrate_style,"respa"))
     nlevels_respa = ((Respa *) update->integrate)->nlevels;
 }
 
 /* ----------------------------------------------------------------------
    set x,v of particles
 ------------------------------------------------------------------------- */
 
 void FixMove::initial_integrate(int vflag)
 {
   double dtfm;
   double xold[3],a[3],b[3],c[3],d[3],disp[3];
   double ddotr,dx,dy,dz;
 
   double delta = (update->ntimestep - time_origin) * dt;
 
   double **x = atom->x;
   double **v = atom->v;
   double **f = atom->f;
   double **omega = atom->omega;
   double *rmass = atom->rmass;
   double *mass = atom->mass;
   int *type = atom->type;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
   // for linear: X = X0 + V*dt
 
   if (mstyle == LINEAR) {
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
         xold[0] = x[i][0];
         xold[1] = x[i][1];
         xold[2] = x[i][2];
 
         if (vxflag) {
           v[i][0] = vx;
           x[i][0] = xoriginal[i][0] + vx*delta;
         } else if (rmass) {
           dtfm = dtf / rmass[i];
           v[i][0] += dtfm * f[i][0];
           x[i][0] += dtv * v[i][0];
         } else {
           dtfm = dtf / mass[type[i]];
           v[i][0] += dtfm * f[i][0];
           x[i][0] += dtv * v[i][0];
         }
 
         if (vyflag) {
           v[i][1] = vy;
           x[i][1] = xoriginal[i][1] + vy*delta;
         } else if (rmass) {
           dtfm = dtf / rmass[i];
           v[i][1] += dtfm * f[i][1];
           x[i][1] += dtv * v[i][1];
         } else {
           dtfm = dtf / mass[type[i]];
           v[i][1] += dtfm * f[i][1];
           x[i][1] += dtv * v[i][1];
         }
 
         if (vzflag) {
           v[i][2] = vz;
           x[i][2] = xoriginal[i][2] + vz*delta;
         } else if (rmass) {
           dtfm = dtf / rmass[i];
           v[i][2] += dtfm * f[i][2];
           x[i][2] += dtv * v[i][2];
         } else {
           dtfm = dtf / mass[type[i]];
           v[i][2] += dtfm * f[i][2];
           x[i][2] += dtv * v[i][2];
         }
 
         domain->remap_near(x[i],xold);
       }
     }
 
   // for wiggle: X = X0 + A sin(w*dt)
 
   } else if (mstyle == WIGGLE) {
     double arg = omega_rotate * delta;
     double sine = sin(arg);
     double cosine = cos(arg);
 
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
         xold[0] = x[i][0];
         xold[1] = x[i][1];
         xold[2] = x[i][2];
 
         if (axflag) {
           v[i][0] = ax*omega_rotate*cosine;
           x[i][0] = xoriginal[i][0] + ax*sine;
         } else if (rmass) {
           dtfm = dtf / rmass[i];
           v[i][0] += dtfm * f[i][0];
           x[i][0] += dtv * v[i][0];
         } else {
           dtfm = dtf / mass[type[i]];
           v[i][0] += dtfm * f[i][0];
           x[i][0] += dtv * v[i][0];
         }
 
         if (ayflag) {
           v[i][1] = ay*omega_rotate*cosine;
           x[i][1] = xoriginal[i][1] + ay*sine;
         } else if (rmass) {
           dtfm = dtf / rmass[i];
           v[i][1] += dtfm * f[i][1];
           x[i][1] += dtv * v[i][1];
         } else {
           dtfm = dtf / mass[type[i]];
           v[i][1] += dtfm * f[i][1];
           x[i][1] += dtv * v[i][1];
         }
 
         if (azflag) {
           v[i][2] = az*omega_rotate*cosine;
           x[i][2] = xoriginal[i][2] + az*sine;
         } else if (rmass) {
           dtfm = dtf / rmass[i];
           v[i][2] += dtfm * f[i][2];
           x[i][2] += dtv * v[i][2];
         } else {
           dtfm = dtf / mass[type[i]];
           v[i][2] += dtfm * f[i][2];
           x[i][2] += dtv * v[i][2];
         }
 
         domain->remap_near(x[i],xold);
       }
     }
 
   // for rotate by right-hand rule around omega:
   // P = point = vector = point of rotation
   // R = vector = axis of rotation
   // w = omega of rotation (from period)
   // X0 = xoriginal = initial coord of atom
   // R0 = runit = unit vector for R
   // D = X0 - P = vector from P to X0
   // C = (D dot R0) R0 = projection of atom coord onto R line
   // A = D - C = vector from R line to X0
   // B = R0 cross A = vector perp to A in plane of rotation
   // A,B define plane of circular rotation around R line
   // X = P + C + A cos(w*dt) + B sin(w*dt)
   // V = w R0 cross (A cos(w*dt) + B sin(w*dt))
 
   } else if (mstyle == ROTATE) {
     double arg = omega_rotate * delta;
     double sine = sin(arg);
     double cosine = cos(arg);
 
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
         xold[0] = x[i][0];
         xold[1] = x[i][1];
         xold[2] = x[i][2];
 
         d[0] = xoriginal[i][0] - point[0];
         d[1] = xoriginal[i][1] - point[1];
         d[2] = xoriginal[i][2] - point[2];
         ddotr = d[0]*runit[0] + d[1]*runit[1] + d[2]*runit[2];
         c[0] = ddotr*runit[0];
         c[1] = ddotr*runit[1];
         c[2] = ddotr*runit[2];
         a[0] = d[0] - c[0];
         a[1] = d[1] - c[1];
         a[2] = d[2] - c[2];
         b[0] = runit[1]*a[2] - runit[2]*a[1];
         b[1] = runit[2]*a[0] - runit[0]*a[2];
         b[2] = runit[0]*a[1] - runit[1]*a[0];
         disp[0] = a[0]*cosine  + b[0]*sine;
         disp[1] = a[1]*cosine  + b[1]*sine;
         disp[2] = a[2]*cosine  + b[2]*sine;
 
         x[i][0] = point[0] + c[0] + disp[0];
         x[i][1] = point[1] + c[1] + disp[1];
         x[i][2] = point[2] + c[2] + disp[2];
         v[i][0] = omega_rotate * (runit[1]*disp[2] - runit[2]*disp[1]);
         v[i][1] = omega_rotate * (runit[2]*disp[0] - runit[0]*disp[2]);
         v[i][2] = omega_rotate * (runit[0]*disp[1] - runit[1]*disp[0]);
         if (omega_flag) {
           omega[i][0] = omega_rotate*runit[0];
           omega[i][1] = omega_rotate*runit[1];
           omega[i][2] = omega_rotate*runit[2];
         }
 
         domain->remap_near(x[i],xold);
       }
     }
 
   // for variable: compute x,v from variables
 
   } else if (mstyle == VARIABLE) {
 
     // reallocate displace and velocity arrays as necessary
 
     if ((displaceflag || velocityflag) && nlocal > maxatom) {
       maxatom = atom->nmax;
       if (displaceflag) {
         memory->destroy(displace);
         memory->create(displace,maxatom,3,"move:displace");
       }
       if (velocityflag) {
         memory->destroy(velocity);
         memory->create(velocity,maxatom,3,"move:velocity");
       }
     }
 
     // pre-compute variable values, wrap with clear/add
 
     modify->clearstep_compute();
 
     if (xvarstr) {
       if (xvarstyle == EQUAL) dx = input->variable->compute_equal(xvar);
       else input->variable->compute_atom(xvar,igroup,&displace[0][0],3,0);
     }
     if (yvarstr) {
       if (yvarstyle == EQUAL) dy = input->variable->compute_equal(yvar);
       else input->variable->compute_atom(yvar,igroup,&displace[0][1],3,0);
     }
     if (zvarstr) {
       if (zvarstyle == EQUAL) dz = input->variable->compute_equal(zvar);
       else input->variable->compute_atom(zvar,igroup,&displace[0][2],3,0);
     }
     if (vxvarstr) {
       if (vxvarstyle == EQUAL) vx = input->variable->compute_equal(vxvar);
       else input->variable->compute_atom(vxvar,igroup,&velocity[0][0],3,0);
     }
     if (vyvarstr) {
       if (vyvarstyle == EQUAL) vy = input->variable->compute_equal(vyvar);
       else input->variable->compute_atom(vyvar,igroup,&velocity[0][1],3,0);
     }
     if (vzvarstr) {
       if (vzvarstyle == EQUAL) vz = input->variable->compute_equal(vzvar);
       else input->variable->compute_atom(vzvar,igroup,&velocity[0][2],3,0);
     }
 
     modify->addstep_compute(update->ntimestep + 1);
 
     // update x,v
 
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
         xold[0] = x[i][0];
         xold[1] = x[i][1];
         xold[2] = x[i][2];
 
         if (xvarstr && vxvarstr) {
           if (vxvarstyle == EQUAL) v[i][0] = vx;
           else v[i][0] = velocity[i][0];
           if (xvarstyle == EQUAL) x[i][0] = xoriginal[i][0] + dx;
           else x[i][0] = xoriginal[i][0] + displace[i][0];
         } else if (xvarstr) {
           if (xvarstyle == EQUAL) x[i][0] = xoriginal[i][0] + dx;
           else x[i][0] = xoriginal[i][0] + displace[i][0];
         } else if (vxvarstr) {
           if (vxvarstyle == EQUAL) v[i][0] = vx;
           else v[i][0] = velocity[i][0];
           if (rmass) {
             dtfm = dtf / rmass[i];
             x[i][0] += dtv * v[i][0];
           } else {
             dtfm = dtf / mass[type[i]];
             x[i][0] += dtv * v[i][0];
           }
         } else {
           if (rmass) {
             dtfm = dtf / rmass[i];
             v[i][0] += dtfm * f[i][0];
             x[i][0] += dtv * v[i][0];
           } else {
             dtfm = dtf / mass[type[i]];
             v[i][0] += dtfm * f[i][0];
             x[i][0] += dtv * v[i][0];
           }
         }
 
         if (yvarstr && vyvarstr) {
           if (vyvarstyle == EQUAL) v[i][1] = vy;
           else v[i][1] = velocity[i][1];
           if (yvarstyle == EQUAL) x[i][1] = xoriginal[i][1] + dy;
           else x[i][1] = xoriginal[i][1] + displace[i][1];
         } else if (yvarstr) {
           if (yvarstyle == EQUAL) x[i][1] = xoriginal[i][1] + dy;
           else x[i][1] = xoriginal[i][1] + displace[i][1];
         } else if (vyvarstr) {
           if (vyvarstyle == EQUAL) v[i][1] = vy;
           else v[i][1] = velocity[i][1];
           if (rmass) {
             dtfm = dtf / rmass[i];
             x[i][1] += dtv * v[i][1];
           } else {
             dtfm = dtf / mass[type[i]];
             x[i][1] += dtv * v[i][1];
           }
         } else {
           if (rmass) {
             dtfm = dtf / rmass[i];
             v[i][1] += dtfm * f[i][1];
             x[i][1] += dtv * v[i][1];
           } else {
             dtfm = dtf / mass[type[i]];
             v[i][1] += dtfm * f[i][1];
             x[i][1] += dtv * v[i][1];
           }
         }
 
         if (zvarstr && vzvarstr) {
           if (vzvarstyle == EQUAL) v[i][2] = vz;
           else v[i][2] = velocity[i][2];
           if (zvarstyle == EQUAL) x[i][2] = xoriginal[i][2] + dz;
           else x[i][2] = xoriginal[i][2] + displace[i][2];
         } else if (zvarstr) {
           if (zvarstyle == EQUAL) x[i][2] = xoriginal[i][2] + dz;
           else x[i][2] = xoriginal[i][2] + displace[i][2];
         } else if (vzvarstr) {
           if (vzvarstyle == EQUAL) v[i][2] = vz;
           else v[i][2] = velocity[i][2];
           if (rmass) {
             dtfm = dtf / rmass[i];
             x[i][2] += dtv * v[i][2];
           } else {
             dtfm = dtf / mass[type[i]];
             x[i][2] += dtv * v[i][2];
           }
         } else {
           if (rmass) {
             dtfm = dtf / rmass[i];
             v[i][2] += dtfm * f[i][2];
             x[i][2] += dtv * v[i][2];
           } else {
             dtfm = dtf / mass[type[i]];
             v[i][2] += dtfm * f[i][2];
             x[i][2] += dtv * v[i][2];
           }
         }
 
         domain->remap_near(x[i],xold);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    final NVE of particles with NULL components
 ------------------------------------------------------------------------- */
 
 void FixMove::final_integrate()
 {
   double dtfm;
 
   int xflag = 1;
   if (mstyle == LINEAR && vxflag) xflag = 0;
   else if (mstyle == WIGGLE && axflag) xflag = 0;
   else if (mstyle == ROTATE) xflag = 0;
   else if (mstyle == VARIABLE && (xvarstr || vxvarstr)) xflag = 0;
 
   int yflag = 1;
   if (mstyle == LINEAR && vyflag) yflag = 0;
   else if (mstyle == WIGGLE && ayflag) yflag = 0;
   else if (mstyle == ROTATE) yflag = 0;
   else if (mstyle == VARIABLE && (yvarstr || vyvarstr)) yflag = 0;
 
   int zflag = 1;
   if (mstyle == LINEAR && vzflag) zflag = 0;
   else if (mstyle == WIGGLE && azflag) zflag = 0;
   else if (mstyle == ROTATE) zflag = 0;
   else if (mstyle == VARIABLE && (zvarstr || vzvarstr)) zflag = 0;
 
   double **v = atom->v;
   double **f = atom->f;
   double *rmass = atom->rmass;
   double *mass = atom->mass;
   int *type = atom->type;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
   for (int i = 0; i < nlocal; i++) {
     if (mask[i] & groupbit) {
       if (xflag) {
         if (rmass) {
           dtfm = dtf / rmass[i];
           v[i][0] += dtfm * f[i][0];
         } else {
           dtfm = dtf / mass[type[i]];
           v[i][0] += dtfm * f[i][0];
         }
       }
 
       if (yflag) {
         if (rmass) {
           dtfm = dtf / rmass[i];
           v[i][1] += dtfm * f[i][1];
         } else {
           dtfm = dtf / mass[type[i]];
           v[i][1] += dtfm * f[i][1];
         }
       }
 
       if (zflag) {
         if (rmass) {
           dtfm = dtf / rmass[i];
           v[i][2] += dtfm * f[i][2];
         } else {
           dtfm = dtf / mass[type[i]];
           v[i][2] += dtfm * f[i][2];
         }
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixMove::initial_integrate_respa(int vflag, int ilevel, int iloop)
 {
   // outermost level - update v and x
   // all other levels - nothing
 
   if (ilevel == nlevels_respa-1) initial_integrate(vflag);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixMove::final_integrate_respa(int ilevel, int iloop)
 {
   if (ilevel == nlevels_respa-1) final_integrate();
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based array
 ------------------------------------------------------------------------- */
 
 double FixMove::memory_usage()
 {
   double bytes = atom->nmax*3 * sizeof(double);
   if (displaceflag) bytes += atom->nmax*3 * sizeof(double);
   if (velocityflag) bytes += atom->nmax*3 * sizeof(double);
   return bytes;
 }
 
 /* ----------------------------------------------------------------------
    pack entire state of Fix into one write
 ------------------------------------------------------------------------- */
 
 void FixMove::write_restart(FILE *fp)
 {
   int n = 0;
   double list[1];
   list[n++] = time_origin;
 
   if (comm->me == 0) {
     int size = n * sizeof(double);
     fwrite(&size,sizeof(int),1,fp);
     fwrite(list,sizeof(double),n,fp);
   }
 }
 
 /* ----------------------------------------------------------------------
    use state info from restart file to restart the Fix
 ------------------------------------------------------------------------- */
 
 void FixMove::restart(char *buf)
 {
   int n = 0;
   double *list = (double *) buf;
 
   time_origin = static_cast<int> (list[n++]);
 }
 
 /* ----------------------------------------------------------------------
    allocate atom-based array
 ------------------------------------------------------------------------- */
 
 void FixMove::grow_arrays(int nmax)
 {
   memory->grow(xoriginal,nmax,3,"move:xoriginal");
   array_atom = xoriginal;
 }
 
 /* ----------------------------------------------------------------------
    copy values within local atom-based array
 ------------------------------------------------------------------------- */
 
 void FixMove::copy_arrays(int i, int j, int delflag)
 {
   xoriginal[j][0] = xoriginal[i][0];
   xoriginal[j][1] = xoriginal[i][1];
   xoriginal[j][2] = xoriginal[i][2];
 }
 
 /* ----------------------------------------------------------------------
    initialize one atom's array values, called when atom is created
 ------------------------------------------------------------------------- */
 
 void FixMove::set_arrays(int i)
 {
   double **x = atom->x;
   imageint *image = atom->image;
   int *mask = atom->mask;
 
   // particle not in group
 
   if (!(mask[i] & groupbit)) {
     xoriginal[i][0] = xoriginal[i][1] = xoriginal[i][2] = 0.0;
     return;
   }
 
   // current time still equal fix creation time
 
   if (update->ntimestep == time_origin) {
     domain->unmap(x[i],image[i],xoriginal[i]);
     return;
   }
 
   // backup particle to time_origin
 
   if (mstyle == VARIABLE)
     error->all(FLERR,"Cannot add atoms to fix move variable");
 
   domain->unmap(x[i],image[i],xoriginal[i]);
   double delta = (update->ntimestep - time_origin) * update->dt;
 
   if (mstyle == LINEAR) {
     if (vxflag) xoriginal[i][0] -= vx * delta;
     if (vyflag) xoriginal[i][1] -= vy * delta;
     if (vzflag) xoriginal[i][2] -= vz * delta;
   } else if (mstyle == WIGGLE) {
     double arg = omega_rotate * delta;
     double sine = sin(arg);
     if (axflag) xoriginal[i][0] -= ax*sine;
     if (ayflag) xoriginal[i][1] -= ay*sine;
     if (azflag) xoriginal[i][2] -= az*sine;
   } else if (mstyle == ROTATE) {
     double a[3],b[3],c[3],d[3],disp[3],ddotr;
     double arg = - omega_rotate * delta;
     double sine = sin(arg);
     double cosine = cos(arg);
     d[0] = x[i][0] - point[0];
     d[1] = x[i][1] - point[1];
     d[2] = x[i][2] - point[2];
     ddotr = d[0]*runit[0] + d[1]*runit[1] + d[2]*runit[2];
     c[0] = ddotr*runit[0];
     c[1] = ddotr*runit[1];
     c[2] = ddotr*runit[2];
 
     a[0] = d[0] - c[0];
     a[1] = d[1] - c[1];
     a[2] = d[2] - c[2];
     b[0] = runit[1]*a[2] - runit[2]*a[1];
     b[1] = runit[2]*a[0] - runit[0]*a[2];
     b[2] = runit[0]*a[1] - runit[1]*a[0];
     disp[0] = a[0]*cosine  + b[0]*sine;
     disp[1] = a[1]*cosine  + b[1]*sine;
     disp[2] = a[2]*cosine  + b[2]*sine;
 
     xoriginal[i][0] = point[0] + c[0] + disp[0];
     xoriginal[i][1] = point[1] + c[1] + disp[1];
     xoriginal[i][2] = point[2] + c[2] + disp[2];
   }
 }
 
 /* ----------------------------------------------------------------------
    pack values in local atom-based array for exchange with another proc
 ------------------------------------------------------------------------- */
 
 int FixMove::pack_exchange(int i, double *buf)
 {
   buf[0] = xoriginal[i][0];
   buf[1] = xoriginal[i][1];
   buf[2] = xoriginal[i][2];
   return 3;
 }
 
 /* ----------------------------------------------------------------------
    unpack values in local atom-based array from exchange with another proc
 ------------------------------------------------------------------------- */
 
 int FixMove::unpack_exchange(int nlocal, double *buf)
 {
   xoriginal[nlocal][0] = buf[0];
   xoriginal[nlocal][1] = buf[1];
   xoriginal[nlocal][2] = buf[2];
   return 3;
 }
 
 /* ----------------------------------------------------------------------
    pack values in local atom-based arrays for restart file
 ------------------------------------------------------------------------- */
 
 int FixMove::pack_restart(int i, double *buf)
 {
   buf[0] = 4;
   buf[1] = xoriginal[i][0];
   buf[2] = xoriginal[i][1];
   buf[3] = xoriginal[i][2];
   return 4;
 }
 
 /* ----------------------------------------------------------------------
    unpack values from atom->extra array to restart the fix
 ------------------------------------------------------------------------- */
 
 void FixMove::unpack_restart(int nlocal, int nth)
 {
   double **extra = atom->extra;
 
   // skip to Nth set of extra values
 
   int m = 0;
   for (int i = 0; i < nth; i++) m += static_cast<int> (extra[nlocal][m]);
   m++;
 
   xoriginal[nlocal][0] = extra[nlocal][m++];
   xoriginal[nlocal][1] = extra[nlocal][m++];
   xoriginal[nlocal][2] = extra[nlocal][m++];
 }
 
 /* ----------------------------------------------------------------------
    maxsize of any atom's restart data
 ------------------------------------------------------------------------- */
 
 int FixMove::maxsize_restart()
 {
   return 4;
 }
 
 /* ----------------------------------------------------------------------
    size of atom nlocal's restart data
 ------------------------------------------------------------------------- */
 
 int FixMove::size_restart(int nlocal)
 {
   return 4;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixMove::reset_dt()
 {
   error->all(FLERR,"Resetting timestep size is not allowed with fix move");
 }
diff --git a/src/fix_nve_limit.cpp b/src/fix_nve_limit.cpp
index 2104d622a..42ea5a676 100644
--- a/src/fix_nve_limit.cpp
+++ b/src/fix_nve_limit.cpp
@@ -1,239 +1,240 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "fix_nve_limit.h"
 #include "atom.h"
 #include "force.h"
 #include "update.h"
 #include "respa.h"
 #include "modify.h"
 #include "comm.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
 /* ---------------------------------------------------------------------- */
 
 FixNVELimit::FixNVELimit(LAMMPS *lmp, int narg, char **arg) :
   Fix(lmp, narg, arg)
 {
   if (narg != 4) error->all(FLERR,"Illegal fix nve/limit command");
 
   time_integrate = 1;
   scalar_flag = 1;
   global_freq = 1;
   extscalar = 1;
 
   xlimit = force->numeric(FLERR,arg[3]);
 
   ncount = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixNVELimit::setmask()
 {
   int mask = 0;
   mask |= INITIAL_INTEGRATE;
   mask |= FINAL_INTEGRATE;
   mask |= INITIAL_INTEGRATE_RESPA;
   mask |= FINAL_INTEGRATE_RESPA;
   return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixNVELimit::init()
 {
   dtv = update->dt;
   dtf = 0.5 * update->dt * force->ftm2v;
   vlimitsq = (xlimit/dtv) * (xlimit/dtv);
   ncount = 0;
 
   if (strstr(update->integrate_style,"respa"))
     step_respa = ((Respa *) update->integrate)->step;
 
   // warn if using fix shake, which will lead to invalid constraint forces
 
   for (int i = 0; i < modify->nfix; i++)
-    if (strcmp(modify->fix[i]->style,"shake") == 0) {
+    if ((strcmp(modify->fix[i]->style,"shake") == 0)
+        || (strcmp(modify->fix[i]->style,"rattle") == 0)) {
       if (comm->me == 0)
-        error->warning(FLERR,"Should not use fix nve/limit with fix shake");
+        error->warning(FLERR,"Should not use fix nve/limit with fix shake or fix rattle");
     }
 }
 
 /* ----------------------------------------------------------------------
    allow for both per-type and per-atom mass
 ------------------------------------------------------------------------- */
 
 void FixNVELimit::initial_integrate(int vflag)
 {
   double dtfm,vsq,scale;
 
   double **x = atom->x;
   double **v = atom->v;
   double **f = atom->f;
   double *mass = atom->mass;
   double *rmass = atom->rmass;
   int *type = atom->type;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
   if (igroup == atom->firstgroup) nlocal = atom->nfirst;
 
   if (rmass) {
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
         dtfm = dtf / rmass[i];
         v[i][0] += dtfm * f[i][0];
         v[i][1] += dtfm * f[i][1];
         v[i][2] += dtfm * f[i][2];
 
         vsq = v[i][0]*v[i][0] + v[i][1]*v[i][1] + v[i][2]*v[i][2];
         if (vsq > vlimitsq) {
           ncount++;
           scale = sqrt(vlimitsq/vsq);
           v[i][0] *= scale;
           v[i][1] *= scale;
           v[i][2] *= scale;
         }
 
         x[i][0] += dtv * v[i][0];
         x[i][1] += dtv * v[i][1];
         x[i][2] += dtv * v[i][2];
       }
     }
 
   } else {
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
         dtfm = dtf / mass[type[i]];
         v[i][0] += dtfm * f[i][0];
         v[i][1] += dtfm * f[i][1];
         v[i][2] += dtfm * f[i][2];
 
         vsq = v[i][0]*v[i][0] + v[i][1]*v[i][1] + v[i][2]*v[i][2];
         if (vsq > vlimitsq) {
           ncount++;
           scale = sqrt(vlimitsq/vsq);
           v[i][0] *= scale;
           v[i][1] *= scale;
           v[i][2] *= scale;
         }
 
         x[i][0] += dtv * v[i][0];
         x[i][1] += dtv * v[i][1];
         x[i][2] += dtv * v[i][2];
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixNVELimit::final_integrate()
 {
   double dtfm,vsq,scale;
 
   double **v = atom->v;
   double **f = atom->f;
   double *mass = atom->mass;
   double *rmass = atom->rmass;
   int *type = atom->type;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
   if (igroup == atom->firstgroup) nlocal = atom->nfirst;
 
   if (rmass) {
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
         dtfm = dtf / rmass[i];
         v[i][0] += dtfm * f[i][0];
         v[i][1] += dtfm * f[i][1];
         v[i][2] += dtfm * f[i][2];
 
         vsq = v[i][0]*v[i][0] + v[i][1]*v[i][1] + v[i][2]*v[i][2];
         if (vsq > vlimitsq) {
           ncount++;
           scale = sqrt(vlimitsq/vsq);
           v[i][0] *= scale;
           v[i][1] *= scale;
           v[i][2] *= scale;
         }
       }
     }
 
   } else {
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
         dtfm = dtf / mass[type[i]];
         v[i][0] += dtfm * f[i][0];
         v[i][1] += dtfm * f[i][1];
         v[i][2] += dtfm * f[i][2];
 
         vsq = v[i][0]*v[i][0] + v[i][1]*v[i][1] + v[i][2]*v[i][2];
         if (vsq > vlimitsq) {
           ncount++;
           scale = sqrt(vlimitsq/vsq);
           v[i][0] *= scale;
           v[i][1] *= scale;
           v[i][2] *= scale;
         }
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixNVELimit::initial_integrate_respa(int vflag, int ilevel, int iloop)
 {
   dtv = step_respa[ilevel];
   dtf = 0.5 * step_respa[ilevel] * force->ftm2v;
 
   if (ilevel == 0) initial_integrate(vflag);
   else final_integrate();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixNVELimit::final_integrate_respa(int ilevel, int iloop)
 {
   dtf = 0.5 * step_respa[ilevel] * force->ftm2v;
   final_integrate();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixNVELimit::reset_dt()
 {
   dtv = update->dt;
   dtf = 0.5 * update->dt * force->ftm2v;
   vlimitsq = (xlimit/dtv) * (xlimit/dtv);
 }
 
 /* ----------------------------------------------------------------------
    energy of indenter interaction
 ------------------------------------------------------------------------- */
 
 double FixNVELimit::compute_scalar()
 {
   double one = ncount;
   double all;
   MPI_Allreduce(&one,&all,1,MPI_DOUBLE,MPI_SUM,world);
   return all;
 }
diff --git a/src/fix_temp_csvr.cpp b/src/fix_temp_csvr.cpp
index 09ffb951d..6c9a04001 100644
--- a/src/fix_temp_csvr.cpp
+++ b/src/fix_temp_csvr.cpp
@@ -1,339 +1,339 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
-   Based on code by Paulo Raiteri (Curtin U) and Giovanni Bussi (SISSA)
+   Based on code by Paolo Raiteri (Curtin U) and Giovanni Bussi (SISSA)
 ------------------------------------------------------------------------- */
 
 #include "string.h"
 #include "stdlib.h"
 #include "math.h"
 #include "fix_temp_csvr.h"
 #include "atom.h"
 #include "force.h"
 #include "memory.h"
 #include "comm.h"
 #include "input.h"
 #include "variable.h"
 #include "group.h"
 #include "update.h"
 #include "modify.h"
 #include "compute.h"
 #include "random_mars.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
 enum{NOBIAS,BIAS};
 enum{CONSTANT,EQUAL};
 
 double FixTempCSVR::gamdev(const int ia)
 {
   int j;
   double am,e,s,v1,v2,x,y;
 
   if (ia < 1) return 0.0;
   if (ia < 6) {
     x=1.0;
     for (j=1; j<=ia; j++)
       x *= random->uniform();
     x = -log(x);
   } else {
   restart:
     do {
       do {
         do {
           v1 = random->uniform();
           v2 = 2.0*random->uniform() - 1.0;
         } while (v1*v1 + v2*v2 > 1.0);
 
         y=v2/v1;
         am=ia-1;
         s=sqrt(2.0*am+1.0);
         x=s*y+am;
       } while (x <= 0.0);
 
       if (am*log(x/am)-s*y < -700 || v1<0.00001) {
         goto restart;
       }
 
       e=(1.0+y*y)*exp(am*log(x/am)-s*y);
     } while (random->uniform() > e);
   }
   return x;
 }
 
 /* -------------------------------------------------------------------
   returns the sum of n independent gaussian noises squared
   (i.e. equivalent to summing the square of the return values of nn
    calls to gasdev)
 ---------------------------------------------------------------------- */
 double FixTempCSVR::sumnoises(int nn) {
   if (nn == 0) {
     return 0.0;
   } else if (nn == 1) {
     const double rr = random->gaussian();
     return rr*rr;
   } else if (nn % 2 == 0) {
     return 2.0 * gamdev(nn / 2);
   } else {
     const double rr = random->gaussian();
     return  2.0 * gamdev((nn-1) / 2) + rr*rr;
   }
   return 0.0;
 }
 
 /* -------------------------------------------------------------------
   returns the scaling factor for velocities to thermalize
   the system so it samples the canonical ensemble
 ---------------------------------------------------------------------- */
 
 double FixTempCSVR::resamplekin(double ekin_old, double ekin_new){
   const double tdof = temperature->dof;
   const double c1 = exp(-update->dt/t_period);
   const double c2 = (1.0-c1)*ekin_new/ekin_old/tdof;
   const double r1 = random->gaussian();
   const double r2 = sumnoises(tdof - 1);
 
   const double scale = c1 + c2*(r1*r1+r2) + 2.0*r1*sqrt(c1*c2);
   return sqrt(scale);
 }
 
 /* ---------------------------------------------------------------------- */
 
 FixTempCSVR::FixTempCSVR(LAMMPS *lmp, int narg, char **arg) :
   Fix(lmp, narg, arg)
 {
   if (narg != 7) error->all(FLERR,"Illegal fix temp/csvr command");
 
   // CSVR thermostat should be applied every step
 
   nevery = 1;
   scalar_flag = 1;
   global_freq = nevery;
   dynamic_group_allow = 1;
   extscalar = 1;
 
   tstr = NULL;
   if (strstr(arg[3],"v_") == arg[3]) {
     int n = strlen(&arg[3][2]) + 1;
     tstr = new char[n];
     strcpy(tstr,&arg[3][2]);
     tstyle = EQUAL;
   } else {
     t_start = force->numeric(FLERR,arg[3]);
     t_target = t_start;
     tstyle = CONSTANT;
   }
 
   t_stop = force->numeric(FLERR,arg[4]);
   t_period = force->numeric(FLERR,arg[5]);
   int seed = force->inumeric(FLERR,arg[6]);
 
   // error checks
 
   if (t_period <= 0.0) error->all(FLERR,"Illegal fix temp/csvr command");
   if (seed <= 0) error->all(FLERR,"Illegal fix temp/csvr command");
 
   random = new RanMars(lmp,seed + comm->me);
 
   // create a new compute temp style
   // id = fix-ID + temp, compute group = fix group
 
   int n = strlen(id) + 6;
   id_temp = new char[n];
   strcpy(id_temp,id);
   strcat(id_temp,"_temp");
 
   char **newarg = new char*[3];
   newarg[0] = id_temp;
   newarg[1] = group->names[igroup];
   newarg[2] = (char *) "temp";
   modify->add_compute(3,newarg);
   delete [] newarg;
   tflag = 1;
 
   nmax = -1;
   energy = 0.0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 FixTempCSVR::~FixTempCSVR()
 {
   delete [] tstr;
 
   // delete temperature if fix created it
 
   if (tflag) modify->delete_compute(id_temp);
   delete [] id_temp;
 
   delete random;
   nmax = -1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixTempCSVR::setmask()
 {
   int mask = 0;
   mask |= END_OF_STEP;
   mask |= THERMO_ENERGY;
   return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixTempCSVR::init()
 {
 
   // check variable
 
   if (tstr) {
     tvar = input->variable->find(tstr);
     if (tvar < 0)
       error->all(FLERR,"Variable name for fix temp/csvr does not exist");
     if (input->variable->equalstyle(tvar)) tstyle = EQUAL;
     else error->all(FLERR,"Variable for fix temp/csvr is invalid style");
   }
 
   int icompute = modify->find_compute(id_temp);
   if (icompute < 0)
     error->all(FLERR,"Temperature ID for fix temp/csvr does not exist");
   temperature = modify->compute[icompute];
 
   if (temperature->tempbias) which = BIAS;
   else which = NOBIAS;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixTempCSVR::end_of_step()
 {
 
   // set current t_target
   // if variable temp, evaluate variable, wrap with clear/add
 
   double delta = update->ntimestep - update->beginstep;
 
   if (delta != 0.0) delta /= update->endstep - update->beginstep;
   if (tstyle == CONSTANT)
     t_target = t_start + delta * (t_stop-t_start);
   else {
     modify->clearstep_compute();
     t_target = input->variable->compute_equal(tvar);
     if (t_target < 0.0)
       error->one(FLERR,
                  "Fix temp/csvr variable returned negative temperature");
     modify->addstep_compute(update->ntimestep + nevery);
   }
 
   const double t_current = temperature->compute_scalar();
   const double efactor = 0.5 * temperature->dof * force->boltz;
   const double ekin_old = t_current * efactor;
   const double ekin_new = t_target * efactor;
 
   // compute velocity scaling factor on root node and broadcast
   double lamda;
   if (comm->me == 0) {
     lamda = resamplekin(ekin_old, ekin_new);
   }
   MPI_Bcast(&lamda,1,MPI_DOUBLE,0,world);
 
   double * const * const v = atom->v;
   const int * const mask = atom->mask;
   const int nlocal = atom->nlocal;
 
   if (which == NOBIAS) {
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
         v[i][0] *= lamda;
         v[i][1] *= lamda;
         v[i][2] *= lamda;
       }
     }
   } else {
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
         temperature->remove_bias(i,v[i]);
         v[i][0] *= lamda;
         v[i][1] *= lamda;
         v[i][2] *= lamda;
         temperature->restore_bias(i,v[i]);
       }
     }
   }
 
   // tally the kinetic energy transferred between heat bath and system
 
   energy += ekin_old * (1.0 - lamda*lamda);
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixTempCSVR::modify_param(int narg, char **arg)
 {
   if (strcmp(arg[0],"temp") == 0) {
     if (narg < 2) error->all(FLERR,"Illegal fix_modify command");
     if (tflag) {
       modify->delete_compute(id_temp);
       tflag = 0;
     }
     delete [] id_temp;
     int n = strlen(arg[1]) + 1;
     id_temp = new char[n];
     strcpy(id_temp,arg[1]);
 
     int icompute = modify->find_compute(id_temp);
     if (icompute < 0)
       error->all(FLERR,"Could not find fix_modify temperature ID");
     temperature = modify->compute[icompute];
 
     if (temperature->tempflag == 0)
       error->all(FLERR,
                  "Fix_modify temperature ID does not compute temperature");
     if (temperature->igroup != igroup && comm->me == 0)
       error->warning(FLERR,"Group for fix_modify temp != fix group");
     return 2;
   }
   return 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixTempCSVR::reset_target(double t_new)
 {
   t_target = t_start = t_stop = t_new;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double FixTempCSVR::compute_scalar()
 {
   return energy;
 }
 
 /* ----------------------------------------------------------------------
    extract thermostat properties
 ------------------------------------------------------------------------- */
 
 void *FixTempCSVR::extract(const char *str, int &dim)
 {
   dim=0;
   if (strcmp(str,"t_target") == 0) {
     return &t_target;
   }
   return NULL;
 }
diff --git a/src/pair.cpp b/src/pair.cpp
index 14af9f38a..75f1e6aa0 100644
--- a/src/pair.cpp
+++ b/src/pair.cpp
@@ -1,1650 +1,1654 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Paul Crozier (SNL)
 ------------------------------------------------------------------------- */
 
 #include "mpi.h"
 #include "ctype.h"
 #include "float.h"
 #include "limits.h"
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair.h"
 #include "atom.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "domain.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "update.h"
 #include "accelerator_cuda.h"
 #include "suffix.h"
 #include "atom_masks.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define EWALD_F 1.12837917
 
 enum{NONE,RLINEAR,RSQ,BMP};
 
 // allocate space for static class instance variable and initialize it
 
 int Pair::instance_total = 0;
 
 /* ---------------------------------------------------------------------- */
 
 Pair::Pair(LAMMPS *lmp) : Pointers(lmp)
 {
   instance_me = instance_total++;
 
   THIRD = 1.0/3.0;
 
   eng_vdwl = eng_coul = 0.0;
 
   comm_forward = comm_reverse = comm_reverse_off = 0;
 
   single_enable = 1;
   restartinfo = 1;
   respa_enable = 0;
   one_coeff = 0;
   no_virial_fdotr_compute = 0;
   writedata = 0;
   ghostneigh = 0;
 
   nextra = 0;
   pvector = NULL;
   single_extra = 0;
   svector = NULL;
 
   ewaldflag = pppmflag = msmflag = dispersionflag = tip4pflag = dipoleflag = 0;
   reinitflag = 1;
 
   // pair_modify settingsx
 
   compute_flag = 1;
   manybody_flag = 0;
   offset_flag = 0;
   mix_flag = GEOMETRIC;
   tail_flag = 0;
   etail = ptail = etail_ij = ptail_ij = 0.0;
   ncoultablebits = 12;
   ndisptablebits = 12;
   tabinner = sqrt(2.0);
   tabinner_disp = sqrt(2.0);
 
   allocated = 0;
   suffix_flag = Suffix::NONE;
 
   maxeatom = maxvatom = 0;
   eatom = NULL;
   vatom = NULL;
 
   // CUDA and KOKKOS per-fix data masks
 
   datamask = ALL_MASK;
   datamask_ext = ALL_MASK;
 
   execution_space = Host;
   datamask_read = ALL_MASK;
   datamask_modify = ALL_MASK;
 
   copymode = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 Pair::~Pair()
 {
   if (copymode) return;
 
   memory->destroy(eatom);
   memory->destroy(vatom);
 }
 
 /* ----------------------------------------------------------------------
    modify parameters of the pair style
    pair_hybrid has its own version of this routine
      to apply modifications to each of its sub-styles
 ------------------------------------------------------------------------- */
 
 void Pair::modify_params(int narg, char **arg)
 {
   if (narg == 0) error->all(FLERR,"Illegal pair_modify command");
 
   int iarg = 0;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"mix") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal pair_modify command");
       if (strcmp(arg[iarg+1],"geometric") == 0) mix_flag = GEOMETRIC;
       else if (strcmp(arg[iarg+1],"arithmetic") == 0) mix_flag = ARITHMETIC;
       else if (strcmp(arg[iarg+1],"sixthpower") == 0) mix_flag = SIXTHPOWER;
       else error->all(FLERR,"Illegal pair_modify command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"shift") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal pair_modify command");
       if (strcmp(arg[iarg+1],"yes") == 0) offset_flag = 1;
       else if (strcmp(arg[iarg+1],"no") == 0) offset_flag = 0;
       else error->all(FLERR,"Illegal pair_modify command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"table") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal pair_modify command");
       ncoultablebits = force->inumeric(FLERR,arg[iarg+1]);
       if (ncoultablebits > sizeof(float)*CHAR_BIT)
         error->all(FLERR,"Too many total bits for bitmapped lookup table");
       iarg += 2;
     } else if (strcmp(arg[iarg],"table/disp") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal pair_modify command");
       ndisptablebits = force->inumeric(FLERR,arg[iarg+1]);
       if (ndisptablebits > sizeof(float)*CHAR_BIT)
         error->all(FLERR,"Too many total bits for bitmapped lookup table");
       iarg += 2;
     } else if (strcmp(arg[iarg],"tabinner") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal pair_modify command");
       tabinner = force->numeric(FLERR,arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"tabinner/disp") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal pair_modify command");
       tabinner_disp = force->numeric(FLERR,arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"tail") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal pair_modify command");
       if (strcmp(arg[iarg+1],"yes") == 0) tail_flag = 1;
       else if (strcmp(arg[iarg+1],"no") == 0) tail_flag = 0;
       else error->all(FLERR,"Illegal pair_modify command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"compute") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal pair_modify command");
       if (strcmp(arg[iarg+1],"yes") == 0) compute_flag = 1;
       else if (strcmp(arg[iarg+1],"no") == 0) compute_flag = 0;
       else error->all(FLERR,"Illegal pair_modify command");
       iarg += 2;
     } else error->all(FLERR,"Illegal pair_modify command");
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void Pair::init()
 {
   int i,j;
 
   if (offset_flag && tail_flag)
     error->all(FLERR,"Cannot have both pair_modify shift and tail set to yes");
   if (tail_flag && domain->dimension == 2)
     error->all(FLERR,"Cannot use pair tail corrections with 2d simulations");
   if (tail_flag && domain->nonperiodic && comm->me == 0)
     error->warning(FLERR,"Using pair tail corrections with nonperiodic system");
+  if (!compute_flag && tail_flag)
+    error->warning(FLERR,"Using pair tail corrections with compute set to no");
+  if (!compute_flag && offset_flag)
+    error->warning(FLERR,"Using pair potential shift with compute set to no");
 
   // for manybody potentials
   // check if bonded exclusions could invalidate the neighbor list
 
   if (manybody_flag && atom->molecular) {
     int flag = 0;
     if (atom->nbonds > 0 && force->special_lj[1] == 0.0 && 
         force->special_coul[1] == 0.0) flag = 1;
     if (atom->nangles > 0 && force->special_lj[2] == 0.0 && 
         force->special_coul[2] == 0.0) flag = 1;
     if (atom->ndihedrals > 0 && force->special_lj[3] == 0.0 && 
         force->special_coul[3] == 0.0) flag = 1;
     if (flag && comm->me == 0)
       error->warning(FLERR,"Using a manybody potential with "
                      "bonds/angles/dihedrals and special_bond exclusions");
   }
 
   // I,I coeffs must be set
   // init_one() will check if I,J is set explicitly or inferred by mixing
 
   if (!allocated) error->all(FLERR,"All pair coeffs are not set");
 
   for (i = 1; i <= atom->ntypes; i++)
     if (setflag[i][i] == 0) error->all(FLERR,"All pair coeffs are not set");
 
   // style-specific initialization
 
   init_style();
 
   // call init_one() for each I,J
   // set cutsq for each I,J, used to neighbor
   // cutforce = max of all I,J cutoffs
 
   cutforce = 0.0;
   etail = ptail = 0.0;
   double cut;
 
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       cut = init_one(i,j);
       cutsq[i][j] = cutsq[j][i] = cut*cut;
       cutforce = MAX(cutforce,cut);
       if (tail_flag) {
         etail += etail_ij;
         ptail += ptail_ij;
         if (i != j) {
           etail += etail_ij;
           ptail += ptail_ij;
         }
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    reset all type-based params by invoking init_one() for each I,J
    called by fix adapt after it changes one or more params
 ------------------------------------------------------------------------- */
 
 void Pair::reinit()
 {
   // generalize this error message if reinit() is used by more than fix adapt
 
   if (!reinitflag)
     error->all(FLERR,"Fix adapt interface to this pair style not supported");
 
   etail = ptail = 0.0;
 
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++) {
       init_one(i,j);
       if (tail_flag) {
         etail += etail_ij;
         ptail += ptail_ij;
         if (i != j) {
           etail += etail_ij;
           ptail += ptail_ij;
         }
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    init specific to a pair style
    specific pair style can override this function
      if needs its own error checks
      if needs another kind of neighbor list
    request default neighbor list = half list
 ------------------------------------------------------------------------- */
 
 void Pair::init_style()
 {
   neighbor->request(this,instance_me);
 }
 
 /* ----------------------------------------------------------------------
    neighbor callback to inform pair style of neighbor list to use
    specific pair style can override this function
 ------------------------------------------------------------------------- */
 
 void Pair::init_list(int which, NeighList *ptr)
 {
   list = ptr;
 }
 
 /* ----------------------------------------------------------------------
    setup Coulomb force tables used in compute routines
 ------------------------------------------------------------------------- */
 
 void Pair::init_tables(double cut_coul, double *cut_respa)
 {
   int masklo,maskhi;
   double r,grij,expm2,derfc,egamma,fgamma,rsw;
   double qqrd2e = force->qqrd2e;
 
   if (force->kspace == NULL)
     error->all(FLERR,"Pair style requires a KSpace style");
   double g_ewald = force->kspace->g_ewald;
   
   double cut_coulsq = cut_coul * cut_coul;
   
   tabinnersq = tabinner*tabinner;
   init_bitmap(tabinner,cut_coul,ncoultablebits,
               masklo,maskhi,ncoulmask,ncoulshiftbits);
 
   int ntable = 1;
   for (int i = 0; i < ncoultablebits; i++) ntable *= 2;
 
   // linear lookup tables of length N = 2^ncoultablebits
   // stored value = value at lower edge of bin
   // d values = delta from lower edge to upper edge of bin
 
   if (ftable) free_tables();
 
   memory->create(rtable,ntable,"pair:rtable");
   memory->create(ftable,ntable,"pair:ftable");
   memory->create(ctable,ntable,"pair:ctable");
   memory->create(etable,ntable,"pair:etable");
   memory->create(drtable,ntable,"pair:drtable");
   memory->create(dftable,ntable,"pair:dftable");
   memory->create(dctable,ntable,"pair:dctable");
   memory->create(detable,ntable,"pair:detable");
 
   if (cut_respa == NULL) {
     vtable = ptable = dvtable = dptable = NULL;
   } else {
     memory->create(vtable,ntable,"pair:vtable");
     memory->create(ptable,ntable,"pair:ptable");
     memory->create(dvtable,ntable,"pair:dvtable");
     memory->create(dptable,ntable,"pair:dptable");
   }
 
   union_int_float_t rsq_lookup;
   union_int_float_t minrsq_lookup;
   int itablemin;
   minrsq_lookup.i = 0 << ncoulshiftbits;
   minrsq_lookup.i |= maskhi;
 
   for (int i = 0; i < ntable; i++) {
     rsq_lookup.i = i << ncoulshiftbits;
     rsq_lookup.i |= masklo;
     if (rsq_lookup.f < tabinnersq) {
       rsq_lookup.i = i << ncoulshiftbits;
       rsq_lookup.i |= maskhi;
     }
     r = sqrtf(rsq_lookup.f);
     if (msmflag) {
       egamma = 1.0 - (r/cut_coul)*force->kspace->gamma(r/cut_coul);
       fgamma = 1.0 + (rsq_lookup.f/cut_coulsq)*
         force->kspace->dgamma(r/cut_coul);
     } else {
       grij = g_ewald * r;
       expm2 = exp(-grij*grij);
       derfc = erfc(grij);
     }
     if (cut_respa == NULL) {
       rtable[i] = rsq_lookup.f;
       ctable[i] = qqrd2e/r;
       if (msmflag) {
         ftable[i] = qqrd2e/r * fgamma;
         etable[i] = qqrd2e/r * egamma;
       } else {
         ftable[i] = qqrd2e/r * (derfc + EWALD_F*grij*expm2);
         etable[i] = qqrd2e/r * derfc;
       }
     } else {
       rtable[i] = rsq_lookup.f;
       ctable[i] = 0.0;
       ptable[i] = qqrd2e/r;
       if (msmflag) {
         ftable[i] = qqrd2e/r * (fgamma - 1.0);
         etable[i] = qqrd2e/r * egamma;
         vtable[i] = qqrd2e/r * fgamma;
       } else {
         ftable[i] = qqrd2e/r * (derfc + EWALD_F*grij*expm2 - 1.0);
         etable[i] = qqrd2e/r * derfc;
         vtable[i] = qqrd2e/r * (derfc + EWALD_F*grij*expm2);
       }
       if (rsq_lookup.f > cut_respa[2]*cut_respa[2]) {
         if (rsq_lookup.f < cut_respa[3]*cut_respa[3]) {
           rsw = (r - cut_respa[2])/(cut_respa[3] - cut_respa[2]);
           ftable[i] += qqrd2e/r * rsw*rsw*(3.0 - 2.0*rsw);
           ctable[i] = qqrd2e/r * rsw*rsw*(3.0 - 2.0*rsw);
         } else {
           if (msmflag) ftable[i] = qqrd2e/r * fgamma;
           else ftable[i] = qqrd2e/r * (derfc + EWALD_F*grij*expm2);
           ctable[i] = qqrd2e/r;
         }
       }
     }
     minrsq_lookup.f = MIN(minrsq_lookup.f,rsq_lookup.f);
   }
 
   tabinnersq = minrsq_lookup.f;
 
   int ntablem1 = ntable - 1;
 
   for (int i = 0; i < ntablem1; i++) {
     drtable[i] = 1.0/(rtable[i+1] - rtable[i]);
     dftable[i] = ftable[i+1] - ftable[i];
     dctable[i] = ctable[i+1] - ctable[i];
     detable[i] = etable[i+1] - etable[i];
   }
 
   if (cut_respa) {
     for (int i = 0; i < ntablem1; i++) {
       dvtable[i] = vtable[i+1] - vtable[i];
       dptable[i] = ptable[i+1] - ptable[i];
     }
   }
 
   // get the delta values for the last table entries
   // tables are connected periodically between 0 and ntablem1
 
   drtable[ntablem1] = 1.0/(rtable[0] - rtable[ntablem1]);
   dftable[ntablem1] = ftable[0] - ftable[ntablem1];
   dctable[ntablem1] = ctable[0] - ctable[ntablem1];
   detable[ntablem1] = etable[0] - etable[ntablem1];
   if (cut_respa) {
     dvtable[ntablem1] = vtable[0] - vtable[ntablem1];
     dptable[ntablem1] = ptable[0] - ptable[ntablem1];
   }
 
   // get the correct delta values at itablemax
   // smallest r is in bin itablemin
   // largest r is in bin itablemax, which is itablemin-1,
   //   or ntablem1 if itablemin=0
   // deltas at itablemax only needed if corresponding rsq < cut*cut
   // if so, compute deltas between rsq and cut*cut
 
   double f_tmp,c_tmp,e_tmp,p_tmp,v_tmp;
   p_tmp = 0.0;
   v_tmp = 0.0;
   itablemin = minrsq_lookup.i & ncoulmask;
   itablemin >>= ncoulshiftbits;
   int itablemax = itablemin - 1;
   if (itablemin == 0) itablemax = ntablem1;
   rsq_lookup.i = itablemax << ncoulshiftbits;
   rsq_lookup.i |= maskhi;
 
   if (rsq_lookup.f < cut_coulsq) {
     rsq_lookup.f = cut_coulsq;
     r = sqrtf(rsq_lookup.f);
     if (msmflag) {
       egamma = 1.0 - (r/cut_coul)*force->kspace->gamma(r/cut_coul);
       fgamma = 1.0 + (rsq_lookup.f/cut_coulsq)*
         force->kspace->dgamma(r/cut_coul);
     } else {
       grij = g_ewald * r;
       expm2 = exp(-grij*grij);
       derfc = erfc(grij);
     }
     if (cut_respa == NULL) {
       c_tmp = qqrd2e/r;
       if (msmflag) {
         f_tmp = qqrd2e/r * fgamma;
         e_tmp = qqrd2e/r * egamma;
       } else {
         f_tmp = qqrd2e/r * (derfc + EWALD_F*grij*expm2);
         e_tmp = qqrd2e/r * derfc;
       }
     } else {
       c_tmp = 0.0;
       p_tmp = qqrd2e/r;
       if (msmflag) {
         f_tmp = qqrd2e/r * (fgamma - 1.0);
         e_tmp = qqrd2e/r * egamma;
         v_tmp = qqrd2e/r * fgamma;
       } else {
         f_tmp = qqrd2e/r * (derfc + EWALD_F*grij*expm2 - 1.0);
         e_tmp = qqrd2e/r * derfc;
         v_tmp = qqrd2e/r * (derfc + EWALD_F*grij*expm2);
       }
       if (rsq_lookup.f > cut_respa[2]*cut_respa[2]) {
         if (rsq_lookup.f < cut_respa[3]*cut_respa[3]) {
           rsw = (r - cut_respa[2])/(cut_respa[3] - cut_respa[2]);
           f_tmp += qqrd2e/r * rsw*rsw*(3.0 - 2.0*rsw);
           c_tmp = qqrd2e/r * rsw*rsw*(3.0 - 2.0*rsw);
         } else {
           if (msmflag) f_tmp = qqrd2e/r * fgamma;
           else f_tmp = qqrd2e/r * (derfc + EWALD_F*grij*expm2);
           c_tmp = qqrd2e/r;
         }
       }
     }
 
     drtable[itablemax] = 1.0/(rsq_lookup.f - rtable[itablemax]);
     dftable[itablemax] = f_tmp - ftable[itablemax];
     dctable[itablemax] = c_tmp - ctable[itablemax];
     detable[itablemax] = e_tmp - etable[itablemax];
     if (cut_respa) {
       dvtable[itablemax] = v_tmp - vtable[itablemax];
       dptable[itablemax] = p_tmp - ptable[itablemax];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
  setup force tables for dispersion used in compute routines
  ------------------------------------------------------------------------- */
 
 void Pair::init_tables_disp(double cut_lj_global)
 {
   int masklo,maskhi;
   double rsq;
   double g_ewald_6 = force->kspace->g_ewald_6;
   double g2 = g_ewald_6*g_ewald_6, g6 = g2*g2*g2, g8 = g6*g2;
   
   tabinnerdispsq = tabinner_disp*tabinner_disp;
   init_bitmap(tabinner_disp,cut_lj_global,ndisptablebits,
               masklo,maskhi,ndispmask,ndispshiftbits);
   
   int ntable = 1;
   for (int i = 0; i < ndisptablebits; i++) ntable *= 2;
   
   // linear lookup tables of length N = 2^ndisptablebits
   // stored value = value at lower edge of bin
   // d values = delta from lower edge to upper edge of bin
   
   if (fdisptable) free_disp_tables();
   
   memory->create(rdisptable,ntable,"pair:rdisptable");
   memory->create(fdisptable,ntable,"pair:fdisptable");
   memory->create(edisptable,ntable,"pair:edisptable");
   memory->create(drdisptable,ntable,"pair:drdisptable");
   memory->create(dfdisptable,ntable,"pair:dfdisptable");
   memory->create(dedisptable,ntable,"pair:dedisptable");
   
   union_int_float_t rsq_lookup;
   union_int_float_t minrsq_lookup;
   int itablemin;
   minrsq_lookup.i = 0 << ndispshiftbits;
   minrsq_lookup.i |= maskhi;
   
   for (int i = 0; i < ntable; i++) {
     rsq_lookup.i = i << ndispshiftbits;
     rsq_lookup.i |= masklo;
     if (rsq_lookup.f < tabinnerdispsq) {
       rsq_lookup.i = i << ndispshiftbits;
       rsq_lookup.i |= maskhi;
     }
     rsq = rsq_lookup.f;
     register double x2 = g2*rsq, a2 = 1.0/x2;
     x2 = a2*exp(-x2);
     
     rdisptable[i] = rsq_lookup.f;
     fdisptable[i] = g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq;
     edisptable[i] = g6*((a2+1.0)*a2+0.5)*x2;
     
     minrsq_lookup.f = MIN(minrsq_lookup.f,rsq_lookup.f);
   }
   
   tabinnerdispsq = minrsq_lookup.f;
   
   int ntablem1 = ntable - 1;
   
   for (int i = 0; i < ntablem1; i++) {
     drdisptable[i] = 1.0/(rdisptable[i+1] - rdisptable[i]);
     dfdisptable[i] = fdisptable[i+1] - fdisptable[i];
     dedisptable[i] = edisptable[i+1] - edisptable[i];
   }
   
   // get the delta values for the last table entries
   // tables are connected periodically between 0 and ntablem1
   
   drdisptable[ntablem1] = 1.0/(rdisptable[0] - rdisptable[ntablem1]);
   dfdisptable[ntablem1] = fdisptable[0] - fdisptable[ntablem1];
   dedisptable[ntablem1] = edisptable[0] - edisptable[ntablem1];
   
   // get the correct delta values at itablemax
   // smallest r is in bin itablemin
   // largest r is in bin itablemax, which is itablemin-1,
   //   or ntablem1 if itablemin=0
   // deltas at itablemax only needed if corresponding rsq < cut*cut
   // if so, compute deltas between rsq and cut*cut
   
   double f_tmp,e_tmp;
   double cut_lj_globalsq;
   itablemin = minrsq_lookup.i & ndispmask;
   itablemin >>= ndispshiftbits;
   int itablemax = itablemin - 1;
   if (itablemin == 0) itablemax = ntablem1;
   rsq_lookup.i = itablemax << ndispshiftbits;
   rsq_lookup.i |= maskhi;
   
   if (rsq_lookup.f < (cut_lj_globalsq = cut_lj_global * cut_lj_global)) {
     rsq_lookup.f = cut_lj_globalsq;
     
     register double x2 = g2*rsq, a2 = 1.0/x2;
     x2 = a2*exp(-x2);
     f_tmp = g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq;
     e_tmp = g6*((a2+1.0)*a2+0.5)*x2;
     
     drdisptable[itablemax] = 1.0/(rsq_lookup.f - rdisptable[itablemax]);
     dfdisptable[itablemax] = f_tmp - fdisptable[itablemax];
     dedisptable[itablemax] = e_tmp - edisptable[itablemax];
   }
 }
 
 /* ----------------------------------------------------------------------
    free memory for tables used in Coulombic pair computations
 ------------------------------------------------------------------------- */
 
 void Pair::free_tables()
 {
   memory->destroy(rtable);
   memory->destroy(drtable);
   memory->destroy(ftable);
   memory->destroy(dftable);
   memory->destroy(ctable);
   memory->destroy(dctable);
   memory->destroy(etable);
   memory->destroy(detable);
   memory->destroy(vtable);
   memory->destroy(dvtable);
   memory->destroy(ptable);
   memory->destroy(dptable);
 }
 
 /* ----------------------------------------------------------------------
   free memory for tables used in pair computations for dispersion
   ------------------------------------------------------------------------- */
 
 void Pair::free_disp_tables()
 {
   memory->destroy(rdisptable);
   memory->destroy(drdisptable);
   memory->destroy(fdisptable);
   memory->destroy(dfdisptable);
   memory->destroy(edisptable);
   memory->destroy(dedisptable);
 }
 /* ----------------------------------------------------------------------
    mixing of pair potential prefactors (epsilon)
 ------------------------------------------------------------------------- */
 
 double Pair::mix_energy(double eps1, double eps2, double sig1, double sig2)
 {
   if (mix_flag == GEOMETRIC)
     return sqrt(eps1*eps2);
   else if (mix_flag == ARITHMETIC)
     return sqrt(eps1*eps2);
   else if (mix_flag == SIXTHPOWER)
     return (2.0 * sqrt(eps1*eps2) *
       pow(sig1,3.0) * pow(sig2,3.0) / (pow(sig1,6.0) + pow(sig2,6.0)));
   else return 0.0;
 }
 
 /* ----------------------------------------------------------------------
    mixing of pair potential distances (sigma, cutoff)
 ------------------------------------------------------------------------- */
 
 double Pair::mix_distance(double sig1, double sig2)
 {
   if (mix_flag == GEOMETRIC)
     return sqrt(sig1*sig2);
   else if (mix_flag == ARITHMETIC)
     return (0.5 * (sig1+sig2));
   else if (mix_flag == SIXTHPOWER)
     return pow((0.5 * (pow(sig1,6.0) + pow(sig2,6.0))),1.0/6.0);
   else return 0.0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void Pair::compute_dummy(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = 0;
 }
 
 /* ----------------------------------------------------------------------
    setup for energy, virial computation
    see integrate::ev_set() for values of eflag (0-3) and vflag (0-6)
 ------------------------------------------------------------------------- */
 
 void Pair::ev_setup(int eflag, int vflag)
 {
   int i,n;
 
   evflag = 1;
 
   eflag_either = eflag;
   eflag_global = eflag % 2;
   eflag_atom = eflag / 2;
 
   vflag_either = vflag;
   vflag_global = vflag % 4;
   vflag_atom = vflag / 4;
 
   // reallocate per-atom arrays if necessary
 
   if (eflag_atom && atom->nmax > maxeatom) {
     maxeatom = atom->nmax;
     memory->destroy(eatom);
     memory->create(eatom,comm->nthreads*maxeatom,"pair:eatom");
   }
   if (vflag_atom && atom->nmax > maxvatom) {
     maxvatom = atom->nmax;
     memory->destroy(vatom);
     memory->create(vatom,comm->nthreads*maxvatom,6,"pair:vatom");
   }
 
   // zero accumulators
   // use force->newton instead of newton_pair
   //   b/c some bonds/dihedrals call pair::ev_tally with pairwise info
 
   if (eflag_global) eng_vdwl = eng_coul = 0.0;
   if (vflag_global) for (i = 0; i < 6; i++) virial[i] = 0.0;
   if (eflag_atom) {
     n = atom->nlocal;
     if (force->newton) n += atom->nghost;
     for (i = 0; i < n; i++) eatom[i] = 0.0;
   }
   if (vflag_atom) {
     n = atom->nlocal;
     if (force->newton) n += atom->nghost;
     for (i = 0; i < n; i++) {
       vatom[i][0] = 0.0;
       vatom[i][1] = 0.0;
       vatom[i][2] = 0.0;
       vatom[i][3] = 0.0;
       vatom[i][4] = 0.0;
       vatom[i][5] = 0.0;
     }
   }
 
   // if vflag_global = 2 and pair::compute() calls virial_fdotr_compute()
   // compute global virial via (F dot r) instead of via pairwise summation
   // unset other flags as appropriate
 
   if (vflag_global == 2 && no_virial_fdotr_compute == 0) {
     vflag_fdotr = 1;
     vflag_global = 0;
     if (vflag_atom == 0) vflag_either = 0;
     if (vflag_either == 0 && eflag_either == 0) evflag = 0;
   } else vflag_fdotr = 0;
 
   if (lmp->cuda) lmp->cuda->evsetup_eatom_vatom(eflag_atom,vflag_atom);
 }
 
 /* ----------------------------------------------------------------------
    set all flags to zero for energy, virial computation
    called by some complicated many-body potentials that use individual flags
    to insure no holdover of flags from previous timestep
 ------------------------------------------------------------------------- */
 
 void Pair::ev_unset()
 {
   evflag = 0;
 
   eflag_either = 0;
   eflag_global = 0;
   eflag_atom = 0;
 
   vflag_either = 0;
   vflag_global = 0;
   vflag_atom = 0;
   vflag_fdotr = 0;
 }
 
 /* ----------------------------------------------------------------------
    tally eng_vdwl and virial into global and per-atom accumulators
    need i < nlocal test since called by bond_quartic and dihedral_charmm
 ------------------------------------------------------------------------- */
 
 void Pair::ev_tally(int i, int j, int nlocal, int newton_pair,
                     double evdwl, double ecoul, double fpair,
                     double delx, double dely, double delz)
 {
   double evdwlhalf,ecoulhalf,epairhalf,v[6];
 
   if (eflag_either) {
     if (eflag_global) {
       if (newton_pair) {
         eng_vdwl += evdwl;
         eng_coul += ecoul;
       } else {
         evdwlhalf = 0.5*evdwl;
         ecoulhalf = 0.5*ecoul;
         if (i < nlocal) {
           eng_vdwl += evdwlhalf;
           eng_coul += ecoulhalf;
         }
         if (j < nlocal) {
           eng_vdwl += evdwlhalf;
           eng_coul += ecoulhalf;
         }
       }
     }
     if (eflag_atom) {
       epairhalf = 0.5 * (evdwl + ecoul);
       if (newton_pair || i < nlocal) eatom[i] += epairhalf;
       if (newton_pair || j < nlocal) eatom[j] += epairhalf;
     }
   }
 
   if (vflag_either) {
     v[0] = delx*delx*fpair;
     v[1] = dely*dely*fpair;
     v[2] = delz*delz*fpair;
     v[3] = delx*dely*fpair;
     v[4] = delx*delz*fpair;
     v[5] = dely*delz*fpair;
 
     if (vflag_global) {
       if (newton_pair) {
         virial[0] += v[0];
         virial[1] += v[1];
         virial[2] += v[2];
         virial[3] += v[3];
         virial[4] += v[4];
         virial[5] += v[5];
       } else {
         if (i < nlocal) {
           virial[0] += 0.5*v[0];
           virial[1] += 0.5*v[1];
           virial[2] += 0.5*v[2];
           virial[3] += 0.5*v[3];
           virial[4] += 0.5*v[4];
           virial[5] += 0.5*v[5];
         }
         if (j < nlocal) {
           virial[0] += 0.5*v[0];
           virial[1] += 0.5*v[1];
           virial[2] += 0.5*v[2];
           virial[3] += 0.5*v[3];
           virial[4] += 0.5*v[4];
           virial[5] += 0.5*v[5];
         }
       }
     }
 
     if (vflag_atom) {
       if (newton_pair || i < nlocal) {
         vatom[i][0] += 0.5*v[0];
         vatom[i][1] += 0.5*v[1];
         vatom[i][2] += 0.5*v[2];
         vatom[i][3] += 0.5*v[3];
         vatom[i][4] += 0.5*v[4];
         vatom[i][5] += 0.5*v[5];
       }
       if (newton_pair || j < nlocal) {
         vatom[j][0] += 0.5*v[0];
         vatom[j][1] += 0.5*v[1];
         vatom[j][2] += 0.5*v[2];
         vatom[j][3] += 0.5*v[3];
         vatom[j][4] += 0.5*v[4];
         vatom[j][5] += 0.5*v[5];
       }
     }
   }
 }
  
 /* ----------------------------------------------------------------------
    tally eng_vdwl and virial into global and per-atom accumulators
    can use this version with full neighbor lists
 ------------------------------------------------------------------------- */
 
 void Pair::ev_tally_full(int i, double evdwl, double ecoul, double fpair,
                          double delx, double dely, double delz)
 {
   double v[6];
 
   if (eflag_either) {
     if (eflag_global) {
       eng_vdwl += 0.5*evdwl;
       eng_coul += 0.5*ecoul;
     }
     if (eflag_atom) eatom[i] += 0.5 * (evdwl + ecoul);
   }
 
   if (vflag_either) {
     v[0] = 0.5*delx*delx*fpair;
     v[1] = 0.5*dely*dely*fpair;
     v[2] = 0.5*delz*delz*fpair;
     v[3] = 0.5*delx*dely*fpair;
     v[4] = 0.5*delx*delz*fpair;
     v[5] = 0.5*dely*delz*fpair;
 
     if (vflag_global) {
       virial[0] += v[0];
       virial[1] += v[1];
       virial[2] += v[2];
       virial[3] += v[3];
       virial[4] += v[4];
       virial[5] += v[5];
     }
 
     if (vflag_atom) {
       vatom[i][0] += v[0];
       vatom[i][1] += v[1];
       vatom[i][2] += v[2];
       vatom[i][3] += v[3];
       vatom[i][4] += v[4];
       vatom[i][5] += v[5];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    tally eng_vdwl and virial into global and per-atom accumulators
    for virial, have delx,dely,delz and fx,fy,fz
 ------------------------------------------------------------------------- */
 
 void Pair::ev_tally_xyz(int i, int j, int nlocal, int newton_pair,
                         double evdwl, double ecoul,
                         double fx, double fy, double fz,
                         double delx, double dely, double delz)
 {
   double evdwlhalf,ecoulhalf,epairhalf,v[6];
 
   if (eflag_either) {
     if (eflag_global) {
       if (newton_pair) {
         eng_vdwl += evdwl;
         eng_coul += ecoul;
       } else {
         evdwlhalf = 0.5*evdwl;
         ecoulhalf = 0.5*ecoul;
         if (i < nlocal) {
           eng_vdwl += evdwlhalf;
           eng_coul += ecoulhalf;
         }
         if (j < nlocal) {
           eng_vdwl += evdwlhalf;
           eng_coul += ecoulhalf;
         }
       }
     }
     if (eflag_atom) {
       epairhalf = 0.5 * (evdwl + ecoul);
       if (newton_pair || i < nlocal) eatom[i] += epairhalf;
       if (newton_pair || j < nlocal) eatom[j] += epairhalf;
     }
   }
 
   if (vflag_either) {
     v[0] = delx*fx;
     v[1] = dely*fy;
     v[2] = delz*fz;
     v[3] = delx*fy;
     v[4] = delx*fz;
     v[5] = dely*fz;
 
     if (vflag_global) {
       if (newton_pair) {
         virial[0] += v[0];
         virial[1] += v[1];
         virial[2] += v[2];
         virial[3] += v[3];
         virial[4] += v[4];
         virial[5] += v[5];
       } else {
         if (i < nlocal) {
           virial[0] += 0.5*v[0];
           virial[1] += 0.5*v[1];
           virial[2] += 0.5*v[2];
           virial[3] += 0.5*v[3];
           virial[4] += 0.5*v[4];
           virial[5] += 0.5*v[5];
         }
         if (j < nlocal) {
           virial[0] += 0.5*v[0];
           virial[1] += 0.5*v[1];
           virial[2] += 0.5*v[2];
           virial[3] += 0.5*v[3];
           virial[4] += 0.5*v[4];
           virial[5] += 0.5*v[5];
         }
       }
     }
 
     if (vflag_atom) {
       if (newton_pair || i < nlocal) {
         vatom[i][0] += 0.5*v[0];
         vatom[i][1] += 0.5*v[1];
         vatom[i][2] += 0.5*v[2];
         vatom[i][3] += 0.5*v[3];
         vatom[i][4] += 0.5*v[4];
         vatom[i][5] += 0.5*v[5];
       }
       if (newton_pair || j < nlocal) {
         vatom[j][0] += 0.5*v[0];
         vatom[j][1] += 0.5*v[1];
         vatom[j][2] += 0.5*v[2];
         vatom[j][3] += 0.5*v[3];
         vatom[j][4] += 0.5*v[4];
         vatom[j][5] += 0.5*v[5];
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    tally eng_vdwl and virial into global and per-atom accumulators
    for virial, have delx,dely,delz and fx,fy,fz
    called when using full neighbor lists
 ------------------------------------------------------------------------- */
 
 void Pair::ev_tally_xyz_full(int i, double evdwl, double ecoul,
                              double fx, double fy, double fz,
                              double delx, double dely, double delz)
 {
   double evdwlhalf,ecoulhalf,epairhalf,v[6];
 
   if (eflag_either) {
     if (eflag_global) {
       evdwlhalf = 0.5*evdwl;
       ecoulhalf = 0.5*ecoul;
       eng_vdwl += evdwlhalf;
       eng_coul += ecoulhalf;
     }
     if (eflag_atom) {
       epairhalf = 0.5 * (evdwl + ecoul);
       eatom[i] += epairhalf;
     }
   }
 
   if (vflag_either) {
     v[0] = 0.5*delx*fx;
     v[1] = 0.5*dely*fy;
     v[2] = 0.5*delz*fz;
     v[3] = 0.5*delx*fy;
     v[4] = 0.5*delx*fz;
     v[5] = 0.5*dely*fz;
 
     if (vflag_global) {
       virial[0] += v[0];
       virial[1] += v[1];
       virial[2] += v[2];
       virial[3] += v[3];
       virial[4] += v[4];
       virial[5] += v[5];
     }
 
     if (vflag_atom) {
       vatom[i][0] += v[0];
       vatom[i][1] += v[1];
       vatom[i][2] += v[2];
       vatom[i][3] += v[3];
       vatom[i][4] += v[4];
       vatom[i][5] += v[5];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    tally eng_vdwl and virial into global and per-atom accumulators
    called by SW and hbond potentials, newton_pair is always on
    virial = riFi + rjFj + rkFk = (rj-ri) Fj + (rk-ri) Fk = drji*fj + drki*fk
  ------------------------------------------------------------------------- */
 
 void Pair::ev_tally3(int i, int j, int k, double evdwl, double ecoul,
                      double *fj, double *fk, double *drji, double *drki)
 {
   double epairthird,v[6];
 
   if (eflag_either) {
     if (eflag_global) {
       eng_vdwl += evdwl;
       eng_coul += ecoul;
     }
     if (eflag_atom) {
       epairthird = THIRD * (evdwl + ecoul);
       eatom[i] += epairthird;
       eatom[j] += epairthird;
       eatom[k] += epairthird;
     }
   }
 
   if (vflag_either) {
     v[0] = drji[0]*fj[0] + drki[0]*fk[0];
     v[1] = drji[1]*fj[1] + drki[1]*fk[1];
     v[2] = drji[2]*fj[2] + drki[2]*fk[2];
     v[3] = drji[0]*fj[1] + drki[0]*fk[1];
     v[4] = drji[0]*fj[2] + drki[0]*fk[2];
     v[5] = drji[1]*fj[2] + drki[1]*fk[2];
 
     if (vflag_global) {
       virial[0] += v[0];
       virial[1] += v[1];
       virial[2] += v[2];
       virial[3] += v[3];
       virial[4] += v[4];
       virial[5] += v[5];
     }
 
     if (vflag_atom) {
       vatom[i][0] += THIRD*v[0]; vatom[i][1] += THIRD*v[1];
       vatom[i][2] += THIRD*v[2]; vatom[i][3] += THIRD*v[3];
       vatom[i][4] += THIRD*v[4]; vatom[i][5] += THIRD*v[5];
 
       vatom[j][0] += THIRD*v[0]; vatom[j][1] += THIRD*v[1];
       vatom[j][2] += THIRD*v[2]; vatom[j][3] += THIRD*v[3];
       vatom[j][4] += THIRD*v[4]; vatom[j][5] += THIRD*v[5];
 
       vatom[k][0] += THIRD*v[0]; vatom[k][1] += THIRD*v[1];
       vatom[k][2] += THIRD*v[2]; vatom[k][3] += THIRD*v[3];
       vatom[k][4] += THIRD*v[4]; vatom[k][5] += THIRD*v[5];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    tally eng_vdwl and virial into global and per-atom accumulators
    called by AIREBO potential, newton_pair is always on
  ------------------------------------------------------------------------- */
 
 void Pair::ev_tally4(int i, int j, int k, int m, double evdwl,
                      double *fi, double *fj, double *fk,
                      double *drim, double *drjm, double *drkm)
 {
   double epairfourth,v[6];
 
   if (eflag_either) {
     if (eflag_global) eng_vdwl += evdwl;
     if (eflag_atom) {
       epairfourth = 0.25 * evdwl;
       eatom[i] += epairfourth;
       eatom[j] += epairfourth;
       eatom[k] += epairfourth;
       eatom[m] += epairfourth;
     }
   }
 
   if (vflag_atom) {
     v[0] = 0.25 * (drim[0]*fi[0] + drjm[0]*fj[0] + drkm[0]*fk[0]);
     v[1] = 0.25 * (drim[1]*fi[1] + drjm[1]*fj[1] + drkm[1]*fk[1]);
     v[2] = 0.25 * (drim[2]*fi[2] + drjm[2]*fj[2] + drkm[2]*fk[2]);
     v[3] = 0.25 * (drim[0]*fi[1] + drjm[0]*fj[1] + drkm[0]*fk[1]);
     v[4] = 0.25 * (drim[0]*fi[2] + drjm[0]*fj[2] + drkm[0]*fk[2]);
     v[5] = 0.25 * (drim[1]*fi[2] + drjm[1]*fj[2] + drkm[1]*fk[2]);
 
     vatom[i][0] += v[0]; vatom[i][1] += v[1]; vatom[i][2] += v[2];
     vatom[i][3] += v[3]; vatom[i][4] += v[4]; vatom[i][5] += v[5];
     vatom[j][0] += v[0]; vatom[j][1] += v[1]; vatom[j][2] += v[2];
     vatom[j][3] += v[3]; vatom[j][4] += v[4]; vatom[j][5] += v[5];
     vatom[k][0] += v[0]; vatom[k][1] += v[1]; vatom[k][2] += v[2];
     vatom[k][3] += v[3]; vatom[k][4] += v[4]; vatom[k][5] += v[5];
     vatom[m][0] += v[0]; vatom[m][1] += v[1]; vatom[m][2] += v[2];
     vatom[m][3] += v[3]; vatom[m][4] += v[4]; vatom[m][5] += v[5];
   }
 }
 
 /* ----------------------------------------------------------------------
    tally ecoul and virial into each of atoms in list
    called by TIP4P potential, newton_pair is always on
    weight assignments by alpha, so contribution is all to O atom as alpha -> 0.0
    key = 0 if neither atom = water O
    key = 1 if first atom = water O
    key = 2 if second atom = water O
    key = 3 if both atoms = water O
  ------------------------------------------------------------------------- */
 
 void Pair::ev_tally_tip4p(int key, int *list, double *v,
                           double ecoul, double alpha)
 {
   int i;
 
   if (eflag_either) {
     if (eflag_global) eng_coul += ecoul;
     if (eflag_atom) {
       if (key == 0) {
         eatom[list[0]] += 0.5*ecoul;
         eatom[list[1]] += 0.5*ecoul;
       } else if (key == 1) {
         eatom[list[0]] += 0.5*ecoul*(1-alpha);
         eatom[list[1]] += 0.25*ecoul*alpha;
         eatom[list[2]] += 0.25*ecoul*alpha;
         eatom[list[3]] += 0.5*ecoul;
       } else if (key == 2) {
         eatom[list[0]] += 0.5*ecoul;
         eatom[list[1]] += 0.5*ecoul*(1-alpha);
         eatom[list[2]] += 0.25*ecoul*alpha;
         eatom[list[3]] += 0.25*ecoul*alpha;
       } else {
         eatom[list[0]] += 0.5*ecoul*(1-alpha);
         eatom[list[1]] += 0.25*ecoul*alpha;
         eatom[list[2]] += 0.25*ecoul*alpha;
         eatom[list[3]] += 0.5*ecoul*(1-alpha);
         eatom[list[4]] += 0.25*ecoul*alpha;
         eatom[list[5]] += 0.25*ecoul*alpha;
       }
     }
   }
 
   if (vflag_either) {
     if (vflag_global) {
       virial[0] += v[0];
       virial[1] += v[1];
       virial[2] += v[2];
       virial[3] += v[3];
       virial[4] += v[4];
       virial[5] += v[5];
     }
 
     if (vflag_atom) {
       if (key == 0) {
         for (i = 0; i <= 5; i++) {
           vatom[list[0]][i] += 0.5*v[i];
           vatom[list[1]][i] += 0.5*v[i];
         }
       } else if (key == 1) {
         for (i = 0; i <= 5; i++) {
           vatom[list[0]][i] += 0.5*v[i]*(1-alpha);
           vatom[list[1]][i] += 0.25*v[i]*alpha;
           vatom[list[2]][i] += 0.25*v[i]*alpha;
           vatom[list[3]][i] += 0.5*v[i];
         }
       } else if (key == 2) {
         for (i = 0; i <= 5; i++) {
           vatom[list[0]][i] += 0.5*v[i];
           vatom[list[1]][i] += 0.5*v[i]*(1-alpha);
           vatom[list[2]][i] += 0.25*v[i]*alpha;
           vatom[list[3]][i] += 0.25*v[i]*alpha;
         }
       } else {
         for (i = 0; i <= 5; i++) {
           vatom[list[0]][i] += 0.5*v[i]*(1-alpha);
           vatom[list[1]][i] += 0.25*v[i]*alpha;
           vatom[list[2]][i] += 0.25*v[i]*alpha;
           vatom[list[3]][i] += 0.5*v[i]*(1-alpha);
           vatom[list[4]][i] += 0.25*v[i]*alpha;
           vatom[list[5]][i] += 0.25*v[i]*alpha;
         }
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    tally virial into per-atom accumulators
    called by REAX/C potential, newton_pair is always on
    fi is magnitude of force on atom i
 ------------------------------------------------------------------------- */
 
 void Pair::v_tally(int i, double *fi, double *deli)
 {
   double v[6];
 
   v[0] = 0.5*deli[0]*fi[0];
   v[1] = 0.5*deli[1]*fi[1];
   v[2] = 0.5*deli[2]*fi[2];
   v[3] = 0.5*deli[0]*fi[1];
   v[4] = 0.5*deli[0]*fi[2];
   v[5] = 0.5*deli[1]*fi[2];
 
   vatom[i][0] += v[0]; vatom[i][1] += v[1]; vatom[i][2] += v[2];
   vatom[i][3] += v[3]; vatom[i][4] += v[4]; vatom[i][5] += v[5];
 }
 
 /* ----------------------------------------------------------------------
    tally virial into per-atom accumulators
    called by AIREBO potential, newton_pair is always on
    fpair is magnitude of force on atom I
 ------------------------------------------------------------------------- */
 
 void Pair::v_tally2(int i, int j, double fpair, double *drij)
 {
   double v[6];
 
   v[0] = 0.5 * drij[0]*drij[0]*fpair;
   v[1] = 0.5 * drij[1]*drij[1]*fpair;
   v[2] = 0.5 * drij[2]*drij[2]*fpair;
   v[3] = 0.5 * drij[0]*drij[1]*fpair;
   v[4] = 0.5 * drij[0]*drij[2]*fpair;
   v[5] = 0.5 * drij[1]*drij[2]*fpair;
 
   vatom[i][0] += v[0]; vatom[i][1] += v[1]; vatom[i][2] += v[2];
   vatom[i][3] += v[3]; vatom[i][4] += v[4]; vatom[i][5] += v[5];
   vatom[j][0] += v[0]; vatom[j][1] += v[1]; vatom[j][2] += v[2];
   vatom[j][3] += v[3]; vatom[j][4] += v[4]; vatom[j][5] += v[5];
 }
 
 /* ----------------------------------------------------------------------
    tally virial into per-atom accumulators
    called by AIREBO and Tersoff potential, newton_pair is always on
 ------------------------------------------------------------------------- */
 
 void Pair::v_tally3(int i, int j, int k,
                     double *fi, double *fj, double *drik, double *drjk)
 {
   double v[6];
 
   v[0] = THIRD * (drik[0]*fi[0] + drjk[0]*fj[0]);
   v[1] = THIRD * (drik[1]*fi[1] + drjk[1]*fj[1]);
   v[2] = THIRD * (drik[2]*fi[2] + drjk[2]*fj[2]);
   v[3] = THIRD * (drik[0]*fi[1] + drjk[0]*fj[1]);
   v[4] = THIRD * (drik[0]*fi[2] + drjk[0]*fj[2]);
   v[5] = THIRD * (drik[1]*fi[2] + drjk[1]*fj[2]);
 
   vatom[i][0] += v[0]; vatom[i][1] += v[1]; vatom[i][2] += v[2];
   vatom[i][3] += v[3]; vatom[i][4] += v[4]; vatom[i][5] += v[5];
   vatom[j][0] += v[0]; vatom[j][1] += v[1]; vatom[j][2] += v[2];
   vatom[j][3] += v[3]; vatom[j][4] += v[4]; vatom[j][5] += v[5];
   vatom[k][0] += v[0]; vatom[k][1] += v[1]; vatom[k][2] += v[2];
   vatom[k][3] += v[3]; vatom[k][4] += v[4]; vatom[k][5] += v[5];
 }
 
 /* ----------------------------------------------------------------------
    tally virial into per-atom accumulators
    called by AIREBO potential, newton_pair is always on
 ------------------------------------------------------------------------- */
 
 void Pair::v_tally4(int i, int j, int k, int m,
                     double *fi, double *fj, double *fk,
                     double *drim, double *drjm, double *drkm)
 {
   double v[6];
 
   v[0] = 0.25 * (drim[0]*fi[0] + drjm[0]*fj[0] + drkm[0]*fk[0]);
   v[1] = 0.25 * (drim[1]*fi[1] + drjm[1]*fj[1] + drkm[1]*fk[1]);
   v[2] = 0.25 * (drim[2]*fi[2] + drjm[2]*fj[2] + drkm[2]*fk[2]);
   v[3] = 0.25 * (drim[0]*fi[1] + drjm[0]*fj[1] + drkm[0]*fk[1]);
   v[4] = 0.25 * (drim[0]*fi[2] + drjm[0]*fj[2] + drkm[0]*fk[2]);
   v[5] = 0.25 * (drim[1]*fi[2] + drjm[1]*fj[2] + drkm[1]*fk[2]);
 
   vatom[i][0] += v[0]; vatom[i][1] += v[1]; vatom[i][2] += v[2];
   vatom[i][3] += v[3]; vatom[i][4] += v[4]; vatom[i][5] += v[5];
   vatom[j][0] += v[0]; vatom[j][1] += v[1]; vatom[j][2] += v[2];
   vatom[j][3] += v[3]; vatom[j][4] += v[4]; vatom[j][5] += v[5];
   vatom[k][0] += v[0]; vatom[k][1] += v[1]; vatom[k][2] += v[2];
   vatom[k][3] += v[3]; vatom[k][4] += v[4]; vatom[k][5] += v[5];
   vatom[m][0] += v[0]; vatom[m][1] += v[1]; vatom[m][2] += v[2];
   vatom[m][3] += v[3]; vatom[m][4] += v[4]; vatom[m][5] += v[5];
 }
 
 /* ----------------------------------------------------------------------
    tally virial into global and per-atom accumulators
    called by pair lubricate potential with 6 tensor components
 ------------------------------------------------------------------------- */
 
 void Pair::v_tally_tensor(int i, int j, int nlocal, int newton_pair,
                           double vxx, double vyy, double vzz,
                           double vxy, double vxz, double vyz)
 {
   double v[6];
 
   v[0] = vxx;
   v[1] = vyy;
   v[2] = vzz;
   v[3] = vxy;
   v[4] = vxz;
   v[5] = vyz;
 
   if (vflag_global) {
     if (newton_pair) {
       virial[0] += v[0];
       virial[1] += v[1];
       virial[2] += v[2];
       virial[3] += v[3];
       virial[4] += v[4];
       virial[5] += v[5];
     } else {
       if (i < nlocal) {
         virial[0] += 0.5*v[0];
         virial[1] += 0.5*v[1];
         virial[2] += 0.5*v[2];
         virial[3] += 0.5*v[3];
         virial[4] += 0.5*v[4];
         virial[5] += 0.5*v[5];
       }
       if (j < nlocal) {
         virial[0] += 0.5*v[0];
         virial[1] += 0.5*v[1];
         virial[2] += 0.5*v[2];
         virial[3] += 0.5*v[3];
         virial[4] += 0.5*v[4];
         virial[5] += 0.5*v[5];
       }
     }
   }
 
   if (vflag_atom) {
     if (newton_pair || i < nlocal) {
       vatom[i][0] += 0.5*v[0];
       vatom[i][1] += 0.5*v[1];
       vatom[i][2] += 0.5*v[2];
       vatom[i][3] += 0.5*v[3];
       vatom[i][4] += 0.5*v[4];
       vatom[i][5] += 0.5*v[5];
     }
     if (newton_pair || j < nlocal) {
       vatom[j][0] += 0.5*v[0];
       vatom[j][1] += 0.5*v[1];
       vatom[j][2] += 0.5*v[2];
       vatom[j][3] += 0.5*v[3];
       vatom[j][4] += 0.5*v[4];
       vatom[j][5] += 0.5*v[5];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    compute global pair virial via summing F dot r over own & ghost atoms
    at this point, only pairwise forces have been accumulated in atom->f
 ------------------------------------------------------------------------- */
 
 void Pair::virial_fdotr_compute()
 {
   double **x = atom->x;
   double **f = atom->f;
 
   // sum over force on all particles including ghosts
 
   if (neighbor->includegroup == 0) {
     int nall = atom->nlocal + atom->nghost;
     for (int i = 0; i < nall; i++) {
       virial[0] += f[i][0]*x[i][0];
       virial[1] += f[i][1]*x[i][1];
       virial[2] += f[i][2]*x[i][2];
       virial[3] += f[i][1]*x[i][0];
       virial[4] += f[i][2]*x[i][0];
       virial[5] += f[i][2]*x[i][1];
     }
 
   // neighbor includegroup flag is set
   // sum over force on initial nfirst particles and ghosts
 
   } else {
     int nall = atom->nfirst;
     for (int i = 0; i < nall; i++) {
       virial[0] += f[i][0]*x[i][0];
       virial[1] += f[i][1]*x[i][1];
       virial[2] += f[i][2]*x[i][2];
       virial[3] += f[i][1]*x[i][0];
       virial[4] += f[i][2]*x[i][0];
       virial[5] += f[i][2]*x[i][1];
     }
 
     nall = atom->nlocal + atom->nghost;
     for (int i = atom->nlocal; i < nall; i++) {
       virial[0] += f[i][0]*x[i][0];
       virial[1] += f[i][1]*x[i][1];
       virial[2] += f[i][2]*x[i][2];
       virial[3] += f[i][1]*x[i][0];
       virial[4] += f[i][2]*x[i][0];
       virial[5] += f[i][2]*x[i][1];
     }
   }
   
   // prevent multiple calls to update the virial
   // when a hybrid pair style uses both a gpu and non-gpu pair style
   // or when respa is used with gpu pair styles
 
   vflag_fdotr = 0;
 }
 
 /* ----------------------------------------------------------------------
    write a table of pair potential energy/force vs distance to a file
 ------------------------------------------------------------------------- */
 
 void Pair::write_file(int narg, char **arg)
 {
   if (narg < 8) error->all(FLERR,"Illegal pair_write command");
   if (single_enable == 0)
     error->all(FLERR,"Pair style does not support pair_write");
 
   // parse arguments
 
   int itype = force->inumeric(FLERR,arg[0]);
   int jtype = force->inumeric(FLERR,arg[1]);
   if (itype < 1 || itype > atom->ntypes || jtype < 1 || jtype > atom->ntypes)
     error->all(FLERR,"Invalid atom types in pair_write command");
 
   int n = force->inumeric(FLERR,arg[2]);
 
   int style = NONE;
   if (strcmp(arg[3],"r") == 0) style = RLINEAR;
   else if (strcmp(arg[3],"rsq") == 0) style = RSQ;
   else if (strcmp(arg[3],"bitmap") == 0) style = BMP;
   else error->all(FLERR,"Invalid style in pair_write command");
 
   double inner = force->numeric(FLERR,arg[4]);
   double outer = force->numeric(FLERR,arg[5]);
   if (inner <= 0.0 || inner >= outer)
     error->all(FLERR,"Invalid cutoffs in pair_write command");
 
   // open file in append mode
   // print header in format used by pair_style table
 
   int me;
   MPI_Comm_rank(world,&me);
   FILE *fp;
   if (me == 0) {
     fp = fopen(arg[6],"a");
     if (fp == NULL) error->one(FLERR,"Cannot open pair_write file");
     fprintf(fp,"# Pair potential %s for atom types %d %d: i,r,energy,force\n",
             force->pair_style,itype,jtype);
     if (style == RLINEAR)
       fprintf(fp,"\n%s\nN %d R %g %g\n\n",arg[7],n,inner,outer);
     if (style == RSQ)
       fprintf(fp,"\n%s\nN %d RSQ %g %g\n\n",arg[7],n,inner,outer);
   }
 
   // initialize potentials before evaluating pair potential
   // insures all pair coeffs are set and force constants
   // also initialize neighbor so that neighbor requests are processed
   // NOTE: might be safest to just do lmp->init()
 
   force->init();
   neighbor->init();
 
   // if pair style = any of EAM, swap in dummy fp vector
 
   double eamfp[2];
   eamfp[0] = eamfp[1] = 0.0;
   double *eamfp_hold;
 
   Pair *epair = force->pair_match("eam",0);
   if (epair) epair->swap_eam(eamfp,&eamfp_hold);
 
   // if atom style defines charge, swap in dummy q vec
 
   double q[2];
   q[0] = q[1] = 1.0;
   if (narg == 10) {
     q[0] = force->numeric(FLERR,arg[8]);
     q[1] = force->numeric(FLERR,arg[9]);
   }
   double *q_hold;
 
   if (atom->q) {
     q_hold = atom->q;
     atom->q = q;
   }
 
   // evaluate energy and force at each of N distances
 
   int masklo,maskhi,nmask,nshiftbits;
   if (style == BMP) {
     init_bitmap(inner,outer,n,masklo,maskhi,nmask,nshiftbits);
     int ntable = 1 << n;
     if (me == 0)
       fprintf(fp,"\n%s\nN %d BITMAP %g %g\n\n",arg[7],ntable,inner,outer);
     n = ntable;
   }
 
   double r,e,f,rsq;
   union_int_float_t rsq_lookup;
 
   for (int i = 0; i < n; i++) {
     if (style == RLINEAR) {
       r = inner + (outer-inner) * i/(n-1);
       rsq = r*r;
     } else if (style == RSQ) {
       rsq = inner*inner + (outer*outer - inner*inner) * i/(n-1);
       r = sqrt(rsq);
     } else if (style == BMP) {
       rsq_lookup.i = i << nshiftbits;
       rsq_lookup.i |= masklo;
       if (rsq_lookup.f < inner*inner) {
         rsq_lookup.i = i << nshiftbits;
         rsq_lookup.i |= maskhi;
       }
       rsq = rsq_lookup.f;
       r = sqrt(rsq);
     }
 
     if (rsq < cutsq[itype][jtype]) {
       e = single(0,1,itype,jtype,rsq,1.0,1.0,f);
       f *= r;
     } else e = f = 0.0;
     if (me == 0) fprintf(fp,"%d %g %g %g\n",i+1,r,e,f);
   }
 
   // restore original vecs that were swapped in for
 
   double *tmp;
   if (epair) epair->swap_eam(eamfp_hold,&tmp);
   if (atom->q) atom->q = q_hold;
 
   if (me == 0) fclose(fp);
 }
 
 /* ----------------------------------------------------------------------
    define bitmap parameters based on inner and outer cutoffs
 ------------------------------------------------------------------------- */
 
 void Pair::init_bitmap(double inner, double outer, int ntablebits,
              int &masklo, int &maskhi, int &nmask, int &nshiftbits)
 {
   if (sizeof(int) != sizeof(float))
     error->all(FLERR,"Bitmapped lookup tables require int/float be same size");
 
   if (ntablebits > sizeof(float)*CHAR_BIT)
     error->all(FLERR,"Too many total bits for bitmapped lookup table");
 
   if (inner >= outer)
     error->warning(FLERR,"Table inner cutoff >= outer cutoff");
 
   int nlowermin = 1;
   while (!((pow(double(2),(double)nlowermin) <= inner*inner) &&
            (pow(double(2),(double)nlowermin+1.0) > inner*inner))) {
     if (pow(double(2),(double)nlowermin) <= inner*inner) nlowermin++;
     else nlowermin--;
   }
 
   int nexpbits = 0;
   double required_range = outer*outer / pow(double(2),(double)nlowermin);
   double available_range = 2.0;
 
   while (available_range < required_range) {
     nexpbits++;
     available_range = pow(double(2),pow(double(2),(double)nexpbits));
   }
 
   int nmantbits = ntablebits - nexpbits;
 
   if (nexpbits > sizeof(float)*CHAR_BIT - FLT_MANT_DIG)
     error->all(FLERR,"Too many exponent bits for lookup table");
   if (nmantbits+1 > FLT_MANT_DIG)
     error->all(FLERR,"Too many mantissa bits for lookup table");
   if (nmantbits < 3) error->all(FLERR,"Too few bits for lookup table");
 
   nshiftbits = FLT_MANT_DIG - (nmantbits+1);
 
   nmask = 1;
   for (int j = 0; j < ntablebits+nshiftbits; j++) nmask *= 2;
   nmask -= 1;
 
   union_int_float_t rsq_lookup;
   rsq_lookup.f = outer*outer;
   maskhi = rsq_lookup.i & ~(nmask);
   rsq_lookup.f = inner*inner;
   masklo = rsq_lookup.i & ~(nmask);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double Pair::memory_usage()
 {
   double bytes = comm->nthreads*maxeatom * sizeof(double);
   bytes += comm->nthreads*maxvatom*6 * sizeof(double);
   return bytes;
 }
 
diff --git a/src/special.cpp b/src/special.cpp
index 420a8388a..e685a54a2 100644
--- a/src/special.cpp
+++ b/src/special.cpp
@@ -1,1121 +1,1129 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "mpi.h"
 #include "stdio.h"
 #include "special.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "force.h"
 #include "comm.h"
 #include "accelerator_kokkos.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 // allocate space for static class variable
 
 Special *Special::sptr;
 
 /* ---------------------------------------------------------------------- */
 
 Special::Special(LAMMPS *lmp) : Pointers(lmp)
 {
   MPI_Comm_rank(world,&me);
   MPI_Comm_size(world,&nprocs);
 
   onetwo = onethree = onefour = NULL;
 }
 
 /* ---------------------------------------------------------------------- */
 
 Special::~Special()
 {
   memory->destroy(onetwo);
   memory->destroy(onethree);
   memory->destroy(onefour);
 }
 
 /* ----------------------------------------------------------------------
    create 1-2, 1-3, 1-4 lists of topology neighbors
    store in onetwo, onethree, onefour for each atom
    store 3 counters in nspecial[i]
 ------------------------------------------------------------------------- */
 
 void Special::build()
 {
   int i,j,k,size;
   int max,maxall,nbuf;
   tagint *buf;
 
   MPI_Barrier(world);
 
   int nlocal = atom->nlocal;
 
   tagint *tag = atom->tag;
   int *num_bond = atom->num_bond;
   tagint **bond_atom = atom->bond_atom;
   int **nspecial = atom->nspecial;
 
-  if (me == 0 && screen) fprintf(screen,"Finding 1-2 1-3 1-4 neighbors ...\n");
+  if (me == 0 && screen) {
+    const double * const special_lj   = force->special_lj;
+    const double * const special_coul = force->special_coul;
+    fprintf(screen,"Finding 1-2 1-3 1-4 neighbors ...\n"
+                   " Special bond factors lj:   %-10g %-10g %-10g\n"
+                   " Special bond factors coul: %-10g %-10g %-10g\n",
+                   special_lj[1],special_lj[2],special_lj[3],
+                   special_coul[1],special_coul[2],special_coul[3]);
+  }
 
   // initialize nspecial counters to 0
 
   for (i = 0; i < nlocal; i++) {
     nspecial[i][0] = 0;
     nspecial[i][1] = 0;
     nspecial[i][2] = 0;
   }
 
   // -----------------------------------------------------
   // compute nspecial[i][0] = # of 1-2 neighbors of atom i
   // -----------------------------------------------------
 
   // bond partners stored by atom itself
 
   for (i = 0; i < nlocal; i++) nspecial[i][0] = num_bond[i];
 
   // if newton_bond off, then done
   // else only counted 1/2 of all bonds, so count other half
 
   if (force->newton_bond) {
 
     // nbufmax = largest buffer needed to hold info from any proc
     // info for each atom = global tag of 2nd atom in each bond
 
     nbuf = 0;
     for (i = 0; i < nlocal; i++) nbuf += num_bond[i];
     memory->create(buf,nbuf,"special:buf");
 
     // fill buffer with global tags of bond partners of my atoms
 
     size = 0;
     for (i = 0; i < nlocal; i++)
       for (j = 0; j < num_bond[i]; j++)
         buf[size++] = bond_atom[i][j];
 
     // cycle buffer around ring of procs back to self
     // when receive buffer, scan tags for atoms I own
     // when find one, increment nspecial count for that atom
 
     sptr = this;
     comm->ring(size,sizeof(tagint),buf,1,ring_one,NULL);
 
     memory->destroy(buf);
   }
 
   // ----------------------------------------------------
   // create onetwo[i] = list of 1-2 neighbors for atom i
   // ----------------------------------------------------
 
   max = 0;
   for (i = 0; i < nlocal; i++) max = MAX(max,nspecial[i][0]);
 
   MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world);
 
   if (me == 0) {
     if (screen) fprintf(screen,"  %d = max # of 1-2 neighbors\n",maxall);
     if (logfile) fprintf(logfile,"  %d = max # of 1-2 neighbors\n",maxall);
   }
 
   memory->create(onetwo,nlocal,maxall,"special:onetwo");
 
   // count = accumulating counter
 
   memory->create(count,nlocal,"special:count");
   for (i = 0; i < nlocal; i++) count[i] = 0;
 
   // add bond partners stored by atom to onetwo list
 
   for (i = 0; i < nlocal; i++)
     for (j = 0; j < num_bond[i]; j++)
       onetwo[i][count[i]++] = bond_atom[i][j];
 
   // if newton_bond off, then done
   // else only stored 1/2 of all bonds, so store other half
 
   if (force->newton_bond) {
 
     // nbufmax = largest buffer needed to hold info from any proc
     // info for each atom = 2 global tags in each bond
 
     nbuf = 0;
     for (i = 0; i < nlocal; i++) nbuf += 2*num_bond[i];
     memory->create(buf,nbuf,"special:buf");
 
     // fill buffer with global tags of both atoms in bond
 
     size = 0;
     for (i = 0; i < nlocal; i++)
       for (j = 0; j < num_bond[i]; j++) {
         buf[size++] = tag[i];
         buf[size++] = bond_atom[i][j];
       }
 
     // cycle buffer around ring of procs back to self
     // when receive buffer, scan 2nd-atom tags for atoms I own
     // when find one, add 1st-atom tag to onetwo list for 2nd atom
 
     sptr = this;
     comm->ring(size,sizeof(tagint),buf,2,ring_two,NULL);
 
     memory->destroy(buf);
   }
 
   memory->destroy(count);
 
   // -----------------------------------------------------
   // done if special_bond weights for 1-3, 1-4 are set to 1.0
   // -----------------------------------------------------
 
   if (force->special_lj[2] == 1.0 && force->special_coul[2] == 1.0 &&
       force->special_lj[3] == 1.0 && force->special_coul[3] == 1.0) {
     dedup();
     combine();
     return;
   }
 
   // -----------------------------------------------------
   // compute nspecial[i][1] = # of 1-3 neighbors of atom i
   // -----------------------------------------------------
 
   // nbufmax = largest buffer needed to hold info from any proc
   // info for each atom = 2 scalars + list of 1-2 neighbors
 
   nbuf = 0;
   for (i = 0; i < nlocal; i++) nbuf += 2 + nspecial[i][0];
   memory->create(buf,nbuf,"special:buf");
 
   // fill buffer with:
   // (1) = counter for 1-3 neighbors, initialized to 0
   // (2) = # of 1-2 neighbors
   // (3:N) = list of 1-2 neighbors
 
   size = 0;
   for (i = 0; i < nlocal; i++) {
     buf[size++] = 0;
     buf[size++] = nspecial[i][0];
     for (j = 0; j < nspecial[i][0]; j++) buf[size++] = onetwo[i][j];
   }
 
   // cycle buffer around ring of procs back to self
   // when receive buffer, scan list of 1-2 neighbors for atoms I own
   // when find one, increment 1-3 count by # of 1-2 neighbors of my atom,
   //   subtracting one since my list will contain original atom
 
   sptr = this;
   comm->ring(size,sizeof(tagint),buf,3,ring_three,buf);
 
   // extract count from buffer that has cycled back to me
   // nspecial[i][1] = # of 1-3 neighbors of atom i
 
   j = 0;
   for (i = 0; i < nlocal; i++) {
     nspecial[i][1] = buf[j];
     j += 2 + nspecial[i][0];
   }
 
   memory->destroy(buf);
 
   // ----------------------------------------------------
   // create onethree[i] = list of 1-3 neighbors for atom i
   // ----------------------------------------------------
 
   max = 0;
   for (i = 0; i < nlocal; i++) max = MAX(max,nspecial[i][1]);
   MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world);
 
   if (me == 0) {
     if (screen) fprintf(screen,"  %d = max # of 1-3 neighbors\n",maxall);
     if (logfile) fprintf(logfile,"  %d = max # of 1-3 neighbors\n",maxall);
   }
 
   memory->create(onethree,nlocal,maxall,"special:onethree");
 
   // nbufmax = largest buffer needed to hold info from any proc
   // info for each atom = 4 scalars + list of 1-2 neighs + list of 1-3 neighs
 
   nbuf = 0;
   for (i = 0; i < nlocal; i++) nbuf += 4 + nspecial[i][0] + nspecial[i][1];
   memory->create(buf,nbuf,"special:buf");
 
   // fill buffer with:
   // (1) = global tag of original atom
   // (2) = # of 1-2 neighbors
   // (3) = # of 1-3 neighbors
   // (4) = counter for 1-3 neighbors, initialized to 0
   // (5:N) = list of 1-2 neighbors
   // (N+1:2N) space for list of 1-3 neighbors
 
   size = 0;
   for (i = 0; i < nlocal; i++) {
     buf[size++] = tag[i];
     buf[size++] = nspecial[i][0];
     buf[size++] = nspecial[i][1];
     buf[size++] = 0;
     for (j = 0; j < nspecial[i][0]; j++) buf[size++] = onetwo[i][j];
     size += nspecial[i][1];
   }
 
   // cycle buffer around ring of procs back to self
   // when receive buffer, scan list of 1-2 neighbors for atoms I own
   // when find one, add its neighbors to 1-3 list
   //   increment the count in buf(i+4)
   //   exclude the atom whose tag = original
   //   this process may include duplicates but they will be culled later
 
   sptr = this;
   comm->ring(size,sizeof(tagint),buf,4,ring_four,buf);
 
   // fill onethree with buffer values that have been returned to me
   // sanity check: accumulated buf[i+3] count should equal
   //   nspecial[i][1] for each atom
 
   j = 0;
   for (i = 0; i < nlocal; i++) {
     if (buf[j+3] != nspecial[i][1])
       error->one(FLERR,"1-3 bond count is inconsistent");
     j += 4 + nspecial[i][0];
     for (k = 0; k < nspecial[i][1]; k++)
       onethree[i][k] = buf[j++];
   }
 
   memory->destroy(buf);
 
   // done if special_bond weights for 1-4 are set to 1.0
 
   if (force->special_lj[3] == 1.0 && force->special_coul[3] == 1.0) {
     dedup();
     if (force->special_angle) angle_trim();
     combine();
     return;
   }
 
   // -----------------------------------------------------
   // compute nspecial[i][2] = # of 1-4 neighbors of atom i
   // -----------------------------------------------------
 
   // nbufmax = largest buffer needed to hold info from any proc
   // info for each atom = 2 scalars + list of 1-3 neighbors
 
   nbuf = 0;
   for (i = 0; i < nlocal; i++) nbuf += 2 + nspecial[i][1];
   memory->create(buf,nbuf,"special:buf");
 
   // fill buffer with:
   // (1) = counter for 1-4 neighbors, initialized to 0
   // (2) = # of 1-3 neighbors
   // (3:N) = list of 1-3 neighbors
 
   size = 0;
   for (i = 0; i < nlocal; i++) {
     buf[size++] = 0;
     buf[size++] = nspecial[i][1];
     for (j = 0; j < nspecial[i][1]; j++) buf[size++] = onethree[i][j];
   }
 
   // cycle buffer around ring of procs back to self
   // when receive buffer, scan list of 1-3 neighbors for atoms I own
   // when find one, increment 1-4 count by # of 1-2 neighbors of my atom
   //   may include duplicates and original atom but they will be culled later
 
   sptr = this;
   comm->ring(size,sizeof(tagint),buf,5,ring_five,buf);
 
   // extract count from buffer that has cycled back to me
   // nspecial[i][2] = # of 1-4 neighbors of atom i
 
   j = 0;
   for (i = 0; i < nlocal; i++) {
     nspecial[i][2] = buf[j];
     j += 2 + nspecial[i][1];
   }
 
   memory->destroy(buf);
 
   // ----------------------------------------------------
   // create onefour[i] = list of 1-4 neighbors for atom i
   // ----------------------------------------------------
 
   max = 0;
   for (i = 0; i < nlocal; i++) max = MAX(max,nspecial[i][2]);
   MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world);
 
   if (me == 0) {
     if (screen) fprintf(screen,"  %d = max # of 1-4 neighbors\n",maxall);
     if (logfile) fprintf(logfile,"  %d = max # of 1-4 neighbors\n",maxall);
   }
 
   memory->create(onefour,nlocal,maxall,"special:onefour");
 
   // nbufmax = largest buffer needed to hold info from any proc
   // info for each atom = 3 scalars + list of 1-3 neighs + list of 1-4 neighs
 
   nbuf = 0;
   for (i = 0; i < nlocal; i++)
     nbuf += 3 + nspecial[i][1] + nspecial[i][2];
   memory->create(buf,nbuf,"special:buf");
 
   // fill buffer with:
   // (1) = # of 1-3 neighbors
   // (2) = # of 1-4 neighbors
   // (3) = counter for 1-4 neighbors, initialized to 0
   // (4:N) = list of 1-3 neighbors
   // (N+1:2N) space for list of 1-4 neighbors
 
   size = 0;
   for (i = 0; i < nlocal; i++) {
     buf[size++] = nspecial[i][1];
     buf[size++] = nspecial[i][2];
     buf[size++] = 0;
     for (j = 0; j < nspecial[i][1]; j++) buf[size++] = onethree[i][j];
     size += nspecial[i][2];
   }
 
   // cycle buffer around ring of procs back to self
   // when receive buffer, scan list of 1-3 neighbors for atoms I own
   // when find one, add its neighbors to 1-4 list
   //   incrementing the count in buf(i+4)
   //   this process may include duplicates but they will be culled later
 
   sptr = this;
   comm->ring(size,sizeof(tagint),buf,6,ring_six,buf);
 
   // fill onefour with buffer values that have been returned to me
   // sanity check: accumulated buf[i+2] count should equal
   //  nspecial[i][2] for each atom
 
   j = 0;
   for (i = 0; i < nlocal; i++) {
     if (buf[j+2] != nspecial[i][2])
       error->one(FLERR,"1-4 bond count is inconsistent");
     j += 3 + nspecial[i][1];
     for (k = 0; k < nspecial[i][2]; k++)
       onefour[i][k] = buf[j++];
   }
 
   memory->destroy(buf);
 
   dedup();
   if (force->special_angle) angle_trim();
   if (force->special_dihedral) dihedral_trim();
   combine();
 }
 
 /* ----------------------------------------------------------------------
    remove duplicates within each of onetwo, onethree, onefour individually
 ------------------------------------------------------------------------- */
 
 void Special::dedup()
 {
   int i,j;
   tagint m;
 
   // clear map so it can be used as scratch space
 
   atom->map_clear();
 
   // use map to cull duplicates
   // exclude original atom explicitly
   // adjust onetwo, onethree, onefour values to reflect removed duplicates
   // must unset map for each atom
 
   int **nspecial = atom->nspecial;
   tagint *tag = atom->tag;
   int nlocal = atom->nlocal;
 
   int unique;
 
   for (i = 0; i < nlocal; i++) {
     unique = 0;
     atom->map_one(tag[i],0);
     for (j = 0; j < nspecial[i][0]; j++) {
       m = onetwo[i][j];
       if (atom->map(m) < 0) {
         onetwo[i][unique++] = m;
         atom->map_one(m,0);
       }
     }
     nspecial[i][0] = unique;
     atom->map_one(tag[i],-1);
     for (j = 0; j < unique; j++) atom->map_one(onetwo[i][j],-1);
   }
 
   for (i = 0; i < nlocal; i++) {
     unique = 0;
     atom->map_one(tag[i],0);
     for (j = 0; j < nspecial[i][1]; j++) {
       m = onethree[i][j];
       if (atom->map(m) < 0) {
         onethree[i][unique++] = m;
         atom->map_one(m,0);
       }
     }
     nspecial[i][1] = unique;
     atom->map_one(tag[i],-1);
     for (j = 0; j < unique; j++) atom->map_one(onethree[i][j],-1);
   }
 
   for (i = 0; i < nlocal; i++) {
     unique = 0;
     atom->map_one(tag[i],0);
     for (j = 0; j < nspecial[i][2]; j++) {
       m = onefour[i][j];
       if (atom->map(m) < 0) {
         onefour[i][unique++] = m;
         atom->map_one(m,0);
       }
     }
     nspecial[i][2] = unique;
     atom->map_one(tag[i],-1);
     for (j = 0; j < unique; j++) atom->map_one(onefour[i][j],-1);
   }
 
   // re-create map
 
   atom->nghost = 0;
   atom->map_set();
 }
 
 /* ----------------------------------------------------------------------
    concatenate onetwo, onethree, onefour into master atom->special list
    remove duplicates between 3 lists, leave dup in first list it appears in
    convert nspecial[0], nspecial[1], nspecial[2] into cumulative counters
 ------------------------------------------------------------------------- */
 
 void Special::combine()
 {
   int i,j;
   tagint m;
 
   int me;
   MPI_Comm_rank(world,&me);
 
   int **nspecial = atom->nspecial;
   tagint *tag = atom->tag;
   int nlocal = atom->nlocal;
 
   // ----------------------------------------------------
   // compute culled maxspecial = max # of special neighs of any atom
   // ----------------------------------------------------
 
   // clear map so it can be used as scratch space
 
   atom->map_clear();
 
   // unique = # of unique nspecial neighbors of one atom
   // cull duplicates using map to check for them
   // exclude original atom explicitly
   // must unset map for each atom
 
   int unique;
   int maxspecial = 0;
 
   for (i = 0; i < nlocal; i++) {
     unique = 0;
     atom->map_one(tag[i],0);
 
     for (j = 0; j < nspecial[i][0]; j++) {
       m = onetwo[i][j];
       if (atom->map(m) < 0) {
         unique++;
         atom->map_one(m,0);
       }
     }
     for (j = 0; j < nspecial[i][1]; j++) {
       m = onethree[i][j];
       if (atom->map(m) < 0) {
         unique++;
         atom->map_one(m,0);
       }
     }
     for (j = 0; j < nspecial[i][2]; j++) {
       m = onefour[i][j];
       if (atom->map(m) < 0) {
         unique++;
         atom->map_one(m,0);
       }
     }
 
     maxspecial = MAX(maxspecial,unique);
 
     atom->map_one(tag[i],-1);
     for (j = 0; j < nspecial[i][0]; j++) atom->map_one(onetwo[i][j],-1);
     for (j = 0; j < nspecial[i][1]; j++) atom->map_one(onethree[i][j],-1);
     for (j = 0; j < nspecial[i][2]; j++) atom->map_one(onefour[i][j],-1);
   }
 
   // compute global maxspecial, must be at least 1
   // add in extra factor from special_bonds command
   // allocate correct special array with same nmax, new maxspecial
   // previously allocated one must be destroyed
   // must make AtomVec class update its ptr to special
 
   MPI_Allreduce(&maxspecial,&atom->maxspecial,1,MPI_INT,MPI_MAX,world);
   atom->maxspecial += force->special_extra;
   atom->maxspecial = MAX(atom->maxspecial,1);
 
   if (me == 0) {
     if (screen)
       fprintf(screen,"  %d = max # of special neighbors\n",atom->maxspecial);
     if (logfile)
       fprintf(logfile,"  %d = max # of special neighbors\n",atom->maxspecial);
   }
 
   if (lmp->kokkos) {
     AtomKokkos* atomKK = (AtomKokkos*) atom;
     memory->grow_kokkos(atomKK->k_special,atom->special,
                         atom->nmax,atom->maxspecial,"atom:special");
   } else {
     memory->destroy(atom->special);
     memory->create(atom->special,atom->nmax,atom->maxspecial,"atom:special");
   }
 
   atom->avec->grow_reset();
   tagint **special = atom->special;
 
   // ----------------------------------------------------
   // fill special array with 1-2, 1-3, 1-4 neighs for each atom
   // ----------------------------------------------------
 
   // again use map to cull duplicates
   // exclude original atom explicitly
   // adjust nspecial[i] values to reflect removed duplicates
   // nspecial[i][1] and nspecial[i][2] now become cumulative counters
 
   for (i = 0; i < nlocal; i++) {
     unique = 0;
     atom->map_one(tag[i],0);
 
     for (j = 0; j < nspecial[i][0]; j++) {
       m = onetwo[i][j];
       if (atom->map(m) < 0) {
         special[i][unique++] = m;
         atom->map_one(m,0);
       }
     }
     nspecial[i][0] = unique;
 
     for (j = 0; j < nspecial[i][1]; j++) {
       m = onethree[i][j];
       if (atom->map(m) < 0) {
         special[i][unique++] = m;
         atom->map_one(m,0);
       }
     }
     nspecial[i][1] = unique;
 
     for (j = 0; j < nspecial[i][2]; j++) {
       m = onefour[i][j];
       if (atom->map(m) < 0) {
         special[i][unique++] = m;
         atom->map_one(m,0);
       }
     }
     nspecial[i][2] = unique;
 
     atom->map_one(tag[i],-1);
     for (j = 0; j < nspecial[i][2]; j++) atom->map_one(special[i][j],-1);
   }
 
   // re-create map
 
   atom->nghost = 0;
   atom->map_set();
 }
 
 /* ----------------------------------------------------------------------
    trim list of 1-3 neighbors by checking defined angles
    delete a 1-3 neigh if they are not end atoms of a defined angle
      and if they are not 1,3 or 2,4 atoms of a defined dihedral
 ------------------------------------------------------------------------- */
 
 void Special::angle_trim()
 {
   int i,j,m,n;
 
   int *num_angle = atom->num_angle;
   int *num_dihedral = atom->num_dihedral;
   tagint **angle_atom1 = atom->angle_atom1;
   tagint **angle_atom3 = atom->angle_atom3;
   tagint **dihedral_atom1 = atom->dihedral_atom1;
   tagint **dihedral_atom2 = atom->dihedral_atom2;
   tagint **dihedral_atom3 = atom->dihedral_atom3;
   tagint **dihedral_atom4 = atom->dihedral_atom4;
   int **nspecial = atom->nspecial;
   int nlocal = atom->nlocal;
 
   // stats on old 1-3 neighbor counts
 
   double onethreecount = 0.0;
   for (i = 0; i < nlocal; i++) onethreecount += nspecial[i][1];
   double allcount;
   MPI_Allreduce(&onethreecount,&allcount,1,MPI_DOUBLE,MPI_SUM,world);
 
   if (me == 0) {
     if (screen)
       fprintf(screen,
               "  %g = # of 1-3 neighbors before angle trim\n",allcount);
     if (logfile)
       fprintf(logfile,
               "  %g = # of 1-3 neighbors before angle trim\n",allcount);
   }
 
   // if angles or dihedrals are defined,
   // flag each 1-3 neigh if it appears in an angle or dihedral
 
   if ((num_angle && atom->nangles) || (num_dihedral && atom->ndihedrals)) {
 
     // dflag = flag for 1-3 neighs of all owned atoms
 
     int maxcount = 0;
     for (i = 0; i < nlocal; i++) maxcount = MAX(maxcount,nspecial[i][1]);
     memory->create(dflag,nlocal,maxcount,"special::dflag");
 
     for (i = 0; i < nlocal; i++) {
       n = nspecial[i][1];
       for (j = 0; j < n; j++) dflag[i][j] = 0;
     }
 
     // nbufmax = largest buffer needed to hold info from any proc
     // info for each atom = list of 1,3 atoms in each angle stored by atom
     //   and list of 1,3 and 2,4 atoms in each dihedral stored by atom
 
     int nbuf = 0;
     for (i = 0; i < nlocal; i++) {
       if (num_angle && atom->nangles) nbuf += 2*num_angle[i];
       if (num_dihedral && atom->ndihedrals) nbuf += 2*2*num_dihedral[i];
     }
     int *buf;
     memory->create(buf,nbuf,"special:buf");
 
     // fill buffer with list of 1,3 atoms in each angle
     // and with list of 1,3 and 2,4 atoms in each dihedral
 
     int size = 0;
     if (num_angle && atom->nangles)
       for (i = 0; i < nlocal; i++)
         for (j = 0; j < num_angle[i]; j++) {
           buf[size++] = angle_atom1[i][j];
           buf[size++] = angle_atom3[i][j];
         }
 
     if (num_dihedral && atom->ndihedrals)
       for (i = 0; i < nlocal; i++)
         for (j = 0; j < num_dihedral[i]; j++) {
           buf[size++] = dihedral_atom1[i][j];
           buf[size++] = dihedral_atom3[i][j];
           buf[size++] = dihedral_atom2[i][j];
           buf[size++] = dihedral_atom4[i][j];
         }
 
     // cycle buffer around ring of procs back to self
     // when receive buffer, scan list of 1,3 atoms looking for atoms I own
     // when find one, scan its 1-3 neigh list and mark I,J as in an angle
 
     sptr = this;
     comm->ring(size,sizeof(tagint),buf,7,ring_seven,NULL);
 
     // delete 1-3 neighbors if they are not flagged in dflag
 
     for (i = 0; i < nlocal; i++) {
       m = 0;
       for (j = 0; j < nspecial[i][1]; j++)
         if (dflag[i][j]) onethree[i][m++] = onethree[i][j];
       nspecial[i][1] = m;
     }
 
     // clean up
 
     memory->destroy(dflag);
     memory->destroy(buf);
 
   // if no angles or dihedrals are defined, delete all 1-3 neighs
 
   } else {
     for (i = 0; i < nlocal; i++) nspecial[i][1] = 0;
   }
 
   // stats on new 1-3 neighbor counts
 
   onethreecount = 0.0;
   for (i = 0; i < nlocal; i++) onethreecount += nspecial[i][1];
   MPI_Allreduce(&onethreecount,&allcount,1,MPI_DOUBLE,MPI_SUM,world);
 
   if (me == 0) {
     if (screen)
       fprintf(screen,
               "  %g = # of 1-3 neighbors after angle trim\n",allcount);
     if (logfile)
       fprintf(logfile,
               "  %g = # of 1-3 neighbors after angle trim\n",allcount);
   }
 }
 
 /* ----------------------------------------------------------------------
    trim list of 1-4 neighbors by checking defined dihedrals
    delete a 1-4 neigh if they are not end atoms of a defined dihedral
 ------------------------------------------------------------------------- */
 
 void Special::dihedral_trim()
 {
   int i,j,m,n;
 
   int *num_dihedral = atom->num_dihedral;
   tagint **dihedral_atom1 = atom->dihedral_atom1;
   tagint **dihedral_atom4 = atom->dihedral_atom4;
   int **nspecial = atom->nspecial;
   int nlocal = atom->nlocal;
 
   // stats on old 1-4 neighbor counts
 
   double onefourcount = 0.0;
   for (i = 0; i < nlocal; i++) onefourcount += nspecial[i][2];
   double allcount;
   MPI_Allreduce(&onefourcount,&allcount,1,MPI_DOUBLE,MPI_SUM,world);
 
   if (me == 0) {
     if (screen)
       fprintf(screen,
               "  %g = # of 1-4 neighbors before dihedral trim\n",allcount);
     if (logfile)
       fprintf(logfile,
               "  %g = # of 1-4 neighbors before dihedral trim\n",allcount);
   }
 
   // if dihedrals are defined, flag each 1-4 neigh if it appears in a dihedral
 
   if (num_dihedral && atom->ndihedrals) {
 
     // dflag = flag for 1-4 neighs of all owned atoms
 
     int maxcount = 0;
     for (i = 0; i < nlocal; i++) maxcount = MAX(maxcount,nspecial[i][2]);
     memory->create(dflag,nlocal,maxcount,"special::dflag");
 
     for (i = 0; i < nlocal; i++) {
       n = nspecial[i][2];
       for (j = 0; j < n; j++) dflag[i][j] = 0;
     }
 
     // nbufmax = largest buffer needed to hold info from any proc
     // info for each atom = list of 1,4 atoms in each dihedral stored by atom
 
     int nbuf = 0;
     for (i = 0; i < nlocal; i++) nbuf += 2*num_dihedral[i];
     int *buf;
     memory->create(buf,nbuf,"special:buf");
 
     // fill buffer with list of 1,4 atoms in each dihedral
 
     int size = 0;
     for (i = 0; i < nlocal; i++)
       for (j = 0; j < num_dihedral[i]; j++) {
         buf[size++] = dihedral_atom1[i][j];
         buf[size++] = dihedral_atom4[i][j];
       }
 
     // cycle buffer around ring of procs back to self
     // when receive buffer, scan list of 1,4 atoms looking for atoms I own
     // when find one, scan its 1-4 neigh list and mark I,J as in a dihedral
 
     sptr = this;
     comm->ring(size,sizeof(tagint),buf,8,ring_eight,NULL);
 
     // delete 1-4 neighbors if they are not flagged in dflag
 
     for (i = 0; i < nlocal; i++) {
       m = 0;
       for (j = 0; j < nspecial[i][2]; j++)
         if (dflag[i][j]) onefour[i][m++] = onefour[i][j];
       nspecial[i][2] = m;
     }
 
     // clean up
 
     memory->destroy(dflag);
     memory->destroy(buf);
 
   // if no dihedrals are defined, delete all 1-4 neighs
 
   } else {
     for (i = 0; i < nlocal; i++) nspecial[i][2] = 0;
   }
 
   // stats on new 1-4 neighbor counts
 
   onefourcount = 0.0;
   for (i = 0; i < nlocal; i++) onefourcount += nspecial[i][2];
   MPI_Allreduce(&onefourcount,&allcount,1,MPI_DOUBLE,MPI_SUM,world);
 
   if (me == 0) {
     if (screen)
       fprintf(screen,
               "  %g = # of 1-4 neighbors after dihedral trim\n",allcount);
     if (logfile)
       fprintf(logfile,
               "  %g = # of 1-4 neighbors after dihedral trim\n",allcount);
   }
 }
 
 /* ----------------------------------------------------------------------
    when receive buffer, scan tags for atoms I own
    when find one, increment nspecial count for that atom
 ------------------------------------------------------------------------- */
 
 void Special::ring_one(int ndatum, char *cbuf)
 {
   Atom *atom = sptr->atom;
   int **nspecial = atom->nspecial;
   int nlocal = atom->nlocal;
 
   tagint *buf = (tagint *) cbuf;
   int m;
 
   for (int i = 0; i < ndatum; i++) {
     m = atom->map(buf[i]);
     if (m >= 0 && m < nlocal) nspecial[m][0]++;
   }
 }
 
 /* ----------------------------------------------------------------------
    when receive buffer, scan 2nd-atom tags for atoms I own
    when find one, add 1st-atom tag to onetwo list for 2nd atom
 ------------------------------------------------------------------------- */
 
 void Special::ring_two(int ndatum, char *cbuf)
 {
   Atom *atom = sptr->atom;
   int nlocal = atom->nlocal;
 
   tagint **onetwo = sptr->onetwo;
   int *count = sptr->count;
 
   tagint *buf = (tagint *) cbuf;
   int m;
 
   for (int i = 1; i < ndatum; i += 2) {
     m = atom->map(buf[i]);
     if (m >= 0 && m < nlocal) onetwo[m][count[m]++] = buf[i-1];
   }
 }
 
 /* ----------------------------------------------------------------------
    when receive buffer, scan list of 1-2 neighbors for atoms I own
    when find one, increment 1-3 count by # of 1-2 neighbors of my atom,
      subtracting one since my list will contain original atom
 ------------------------------------------------------------------------- */
 
 void Special::ring_three(int ndatum, char *cbuf)
 {
   Atom *atom = sptr->atom;
   int **nspecial = atom->nspecial;
   int nlocal = atom->nlocal;
 
   tagint *buf = (tagint *) cbuf;
   int i,j,m,n,num12;
 
   i = 0;
   while (i < ndatum) {
     n = buf[i];
     num12 = buf[i+1];
     for (j = 0; j < num12; j++) {
       m = atom->map(buf[i+2+j]);
       if (m >= 0 && m < nlocal)
         n += nspecial[m][0] - 1;
     }
     buf[i] = n;
     i += 2 + num12;
   }
 }
 
 /* ----------------------------------------------------------------------
   when receive buffer, scan list of 1-2 neighbors for atoms I own
   when find one, add its neighbors to 1-3 list
     increment the count in buf(i+4)
     exclude the atom whose tag = original
     this process may include duplicates but they will be culled later
 ------------------------------------------------------------------------- */
 
 void Special::ring_four(int ndatum, char *cbuf)
 {
   Atom *atom = sptr->atom;
   int **nspecial = atom->nspecial;
   int nlocal = atom->nlocal;
 
   tagint **onetwo = sptr->onetwo;
 
   tagint *buf = (tagint *) cbuf;
   tagint original;
   int i,j,k,m,n,num12,num13;
 
   i = 0;
   while (i < ndatum) {
     original = buf[i];
     num12 = buf[i+1];
     num13 = buf[i+2];
     n = buf[i+3];
     for (j = 0; j < num12; j++) {
       m = atom->map(buf[i+4+j]);
       if (m >= 0 && m < nlocal)
         for (k = 0; k < nspecial[m][0]; k++)
           if (onetwo[m][k] != original)
             buf[i+4+num12+(n++)] = onetwo[m][k];
     }
     buf[i+3] = n;
     i += 4 + num12 + num13;
   }
 }
 
 /* ----------------------------------------------------------------------
    when receive buffer, scan list of 1-3 neighbors for atoms I own
    when find one, increment 1-4 count by # of 1-2 neighbors of my atom
      may include duplicates and original atom but they will be culled later
 ------------------------------------------------------------------------- */
 
 void Special::ring_five(int ndatum, char *cbuf)
 {
   Atom *atom = sptr->atom;
   int **nspecial = atom->nspecial;
   int nlocal = atom->nlocal;
 
   tagint *buf = (tagint *) cbuf;
   int i,j,m,n,num13;
 
   i = 0;
   while (i < ndatum) {
     n = buf[i];
     num13 = buf[i+1];
     for (j = 0; j < num13; j++) {
       m = atom->map(buf[i+2+j]);
       if (m >= 0 && m < nlocal) n += nspecial[m][0];
     }
       buf[i] = n;
       i += 2 + num13;
   }
 }
 
 /* ----------------------------------------------------------------------
    when receive buffer, scan list of 1-3 neighbors for atoms I own
    when find one, add its neighbors to 1-4 list
      incrementing the count in buf(i+4)
      this process may include duplicates but they will be culled later
 ------------------------------------------------------------------------- */
 
 void Special::ring_six(int ndatum, char *cbuf)
 {
   Atom *atom = sptr->atom;
   int **nspecial = atom->nspecial;
   int nlocal = atom->nlocal;
 
   tagint **onetwo = sptr->onetwo;
 
   tagint *buf = (tagint *) cbuf;
   int i,j,k,m,n,num13,num14;
 
   i = 0;
   while (i < ndatum) {
     num13 = buf[i];
     num14 = buf[i+1];
     n = buf[i+2];
     for (j = 0; j < num13; j++) {
       m = atom->map(buf[i+3+j]);
       if (m >= 0 && m < nlocal)
         for (k = 0; k < nspecial[m][0]; k++)
           buf[i+3+num13+(n++)] = onetwo[m][k];
     }
     buf[i+2] = n;
     i += 3 + num13 + num14;
   }
 }
 
 /* ----------------------------------------------------------------------
    when receive buffer, scan list of 1,3 atoms looking for atoms I own
    when find one, scan its 1-3 neigh list and mark I,J as in an angle
 ------------------------------------------------------------------------- */
 
 void Special::ring_seven(int ndatum, char *cbuf)
 {
   Atom *atom = sptr->atom;
   int **nspecial = atom->nspecial;
   int nlocal = atom->nlocal;
 
   tagint **onethree = sptr->onethree;
   int **dflag = sptr->dflag;
 
   tagint *buf = (tagint *) cbuf;
   tagint iglobal,jglobal;
   int i,m,ilocal,jlocal;
 
   i = 0;
   while (i < ndatum) {
     iglobal = buf[i];
     jglobal = buf[i+1];
     ilocal = atom->map(iglobal);
     jlocal = atom->map(jglobal);
     if (ilocal >= 0 && ilocal < nlocal)
       for (m = 0; m < nspecial[ilocal][1]; m++)
         if (jglobal == onethree[ilocal][m]) {
           dflag[ilocal][m] = 1;
           break;
         }
     if (jlocal >= 0 && jlocal < nlocal)
       for (m = 0; m < nspecial[jlocal][1]; m++)
         if (iglobal == onethree[jlocal][m]) {
           dflag[jlocal][m] = 1;
           break;
         }
     i += 2;
   }
 }
 
 /* ----------------------------------------------------------------------
    when receive buffer, scan list of 1,4 atoms looking for atoms I own
    when find one, scan its 1-4 neigh list and mark I,J as in a dihedral
 ------------------------------------------------------------------------- */
 
 void Special::ring_eight(int ndatum, char *cbuf)
 {
   Atom *atom = sptr->atom;
   int **nspecial = atom->nspecial;
   int nlocal = atom->nlocal;
 
   tagint **onefour = sptr->onefour;
   int **dflag = sptr->dflag;
 
   tagint *buf = (tagint *) cbuf;
   tagint iglobal,jglobal;
   int i,m,ilocal,jlocal;
 
   i = 0;
   while (i < ndatum) {
     iglobal = buf[i];
     jglobal = buf[i+1];
     ilocal = atom->map(iglobal);
     jlocal = atom->map(jglobal);
     if (ilocal >= 0 && ilocal < nlocal)
       for (m = 0; m < nspecial[ilocal][2]; m++)
         if (jglobal == onefour[ilocal][m]) {
           dflag[ilocal][m] = 1;
           break;
         }
     if (jlocal >= 0 && jlocal < nlocal)
       for (m = 0; m < nspecial[jlocal][2]; m++)
         if (iglobal == onefour[jlocal][m]) {
           dflag[jlocal][m] = 1;
           break;
         }
     i += 2;
   }
 }