diff --git a/src/GPU/pppm_gpu.cpp b/src/GPU/pppm_gpu.cpp
index e815dbcbe..7c5d0a320 100644
--- a/src/GPU/pppm_gpu.cpp
+++ b/src/GPU/pppm_gpu.cpp
@@ -1,725 +1,731 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Mike Brown (ORNL), Axel Kohlmeyer (Temple)
 ------------------------------------------------------------------------- */
 
 #include "lmptype.h"
 #include "mpi.h"
 #include "string.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "math.h"
 #include "pppm_gpu.h"
 #include "atom.h"
 #include "comm.h"
 #include "commgrid.h"
 #include "neighbor.h"
 #include "force.h"
 #include "pair.h"
 #include "bond.h"
 #include "angle.h"
 #include "domain.h"
 #include "fft3d_wrap.h"
 #include "remap_wrap.h"
 #include "gpu_extra.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 #include "update.h"
 #include "universe.h"
 #include "fix.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 #define MAXORDER 7
 #define OFFSET 16384
 #define SMALL 0.00001
 #define LARGE 10000.0
 #define EPS_HOC 1.0e-7
 
 enum{REVERSE_RHO};
 enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM};
 
 #ifdef FFT_SINGLE
 #define ZEROF 0.0f
 #define ONEF  1.0f
 #else
 #define ZEROF 0.0
 #define ONEF  1.0
 #endif
 
 // external functions from cuda library for atom decomposition
 
 #ifdef FFT_SINGLE
 #define PPPM_GPU_API(api)  pppm_gpu_ ## api ## _f
 #else
 #define PPPM_GPU_API(api)  pppm_gpu_ ## api ## _d
 #endif
 
 FFT_SCALAR* PPPM_GPU_API(init)(const int nlocal, const int nall, FILE *screen,
                                const int order, const int nxlo_out,
                                const int nylo_out, const int nzlo_out,
                                const int nxhi_out, const int nyhi_out,
                                const int nzhi_out, FFT_SCALAR **rho_coeff,
                                FFT_SCALAR **_vd_brick,
                                const double slab_volfactor,
                                const int nx_pppm, const int ny_pppm,
                                const int nz_pppm, const bool split,
                                const bool respa, int &success);
 void PPPM_GPU_API(clear)(const double poisson_time);
 int PPPM_GPU_API(spread)(const int ago, const int nlocal, const int nall,
                       double **host_x, int *host_type, bool &success,
                       double *host_q, double *boxlo, const double delxinv,
                       const double delyinv, const double delzinv);
 void PPPM_GPU_API(interp)(const FFT_SCALAR qqrd2e_scale);
 double PPPM_GPU_API(bytes)();
 void PPPM_GPU_API(forces)(double **f);
 
 /* ---------------------------------------------------------------------- */
 
 PPPMGPU::PPPMGPU(LAMMPS *lmp, int narg, char **arg) : PPPM(lmp, narg, arg)
 {
   if (narg != 1) error->all(FLERR,"Illegal kspace_style pppm/gpu command");
 
   triclinic_support = 0;
   density_brick_gpu = vd_brick = NULL;
   kspace_split = false;
   im_real_space = false;
 
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
 
 /* ----------------------------------------------------------------------
    free all memory
 ------------------------------------------------------------------------- */
 
 PPPMGPU::~PPPMGPU()
 {
   PPPM_GPU_API(clear)(poisson_time);
 }
 
 /* ----------------------------------------------------------------------
    called once before run
 ------------------------------------------------------------------------- */
 
 void PPPMGPU::init()
 {
   // PPPM init manages all arrays except density_brick_gpu and vd_brick
   //      thru its deallocate(), allocate()
   // NOTE: could free density_brick and vdxyz_brick after PPPM allocates them,
   //       before allocating db_gpu and vd_brick down below, if don't need,
   //       if do this, make sure to set them to NULL
 
   destroy_3d_offset(density_brick_gpu,nzlo_out,nylo_out);
   destroy_3d_offset(vd_brick,nzlo_out,nylo_out);
   density_brick_gpu = vd_brick = NULL;
 
   PPPM::init();
 
   // insure no conflict with fix balance
 
   for (int i = 0; i < modify->nfix; i++)
     if (strcmp(modify->fix[i]->style,"balance") == 0)
       error->all(FLERR,"Cannot currently use pppm/gpu with fix balance.");
 
   // unsupported option
 
   if (differentiation_flag == 1)
     error->all(FLERR,"Cannot (yet) do analytic differentiation with pppm/gpu");
 
   if (strcmp(update->integrate_style,"verlet/split") == 0) {
     kspace_split=true;
     old_nlocal = 0;
   }
 
   if (kspace_split && universe->iworld == 0) {
     im_real_space = true;
     return;
   }
 
   // GPU precision specific init
 
   bool respa_value=false;
   if (strstr(update->integrate_style,"respa"))
     respa_value=true;  
 
   if (order>8)
     error->all(FLERR,"Cannot use order greater than 8 with pppm/gpu.");
   PPPM_GPU_API(clear)(poisson_time);
 
   int success;
   FFT_SCALAR *data, *h_brick;
   h_brick = PPPM_GPU_API(init)(atom->nlocal, atom->nlocal+atom->nghost, screen,
                                order, nxlo_out, nylo_out, nzlo_out, nxhi_out,
                                nyhi_out, nzhi_out, rho_coeff, &data,
                                slab_volfactor,nx_pppm,ny_pppm,nz_pppm,
                                kspace_split,respa_value,success);
 
   GPU_EXTRA::check_flag(success,error,world);
 
   // allocate density_brick_gpu and vd_brick
 
   density_brick_gpu =
     create_3d_offset(nzlo_out,nzhi_out,nylo_out,nyhi_out,
                      nxlo_out,nxhi_out,"pppm:density_brick_gpu",h_brick,1);
   vd_brick =
     create_3d_offset(nzlo_out,nzhi_out,nylo_out,nyhi_out,
                      nxlo_out,nxhi_out,"pppm:vd_brick",data,4);
 
   poisson_time = 0.0;
 }
 
 /* ----------------------------------------------------------------------
    compute the PPPMGPU long-range force, energy, virial
 ------------------------------------------------------------------------- */
 
 void PPPMGPU::compute(int eflag, int vflag)
 {
   int i,j;
 
   int nago;
   if (kspace_split) {
     if (im_real_space) return;
     if (atom->nlocal > old_nlocal) {
       nago=0;
       old_nlocal = atom->nlocal;
     } else nago = 1;
   } else nago = neighbor->ago;
 
   // set energy/virial flags
   // invoke allocate_peratom() if needed for first time
 
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = evflag_atom = eflag_global = vflag_global = 
         eflag_atom = vflag_atom = 0;
 
   // If need per-atom energies/virials, also do particle map on host
   // concurrently with GPU calculations
   if (evflag_atom && !peratom_allocate_flag) {
     allocate_peratom();
     cg_peratom->ghost_notify();
     cg_peratom->setup();
     peratom_allocate_flag = 1;
   }
 
   bool success = true;
   int flag=PPPM_GPU_API(spread)(nago, atom->nlocal, atom->nlocal +
                              atom->nghost, atom->x, atom->type, success,
                              atom->q, domain->boxlo, delxinv, delyinv,
                              delzinv);
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
   if (flag != 0)
     error->one(FLERR,"Out of range atoms - cannot compute PPPM");
 
   // convert atoms from box to lamda coords
 
   if (triclinic == 0) boxlo = domain->boxlo;
   else {
     boxlo = domain->boxlo_lamda;
     domain->x2lamda(atom->nlocal);
   }
 
   // extend size of per-atom arrays if necessary
 
   if (evflag_atom && atom->nlocal > nmax) {
     memory->destroy(part2grid);
     nmax = atom->nmax;
     memory->create(part2grid,nmax,3,"pppm:part2grid");
     particle_map();
   }
 
   double t3 = MPI_Wtime();
 
   // all procs communicate density values from their ghost cells
   //   to fully sum contribution in their 3d bricks
   // remap from 3d decomposition to FFT decomposition
 
   cg->reverse_comm(this,REVERSE_RHO);
   brick2fft();
 
   // compute potential gradient on my FFT grid and
   //   portion of e_long on this proc's FFT grid
   // return gradients (electric fields) in 3d brick decomposition
 
   poisson();
 
   // all procs communicate E-field values
   // to fill ghost cells surrounding their 3d bricks
 
   if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD);
   else cg->forward_comm(this,FORWARD_IK);
 
   // extra per-atom energy/virial communication
 
   if (evflag_atom) {
     if (differentiation_flag == 1 && vflag_atom) 
       cg_peratom->forward_comm(this,FORWARD_AD_PERATOM);
     else if (differentiation_flag == 0)
       cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
   }
 
   poisson_time += MPI_Wtime()-t3;
 
   // calculate the force on my particles
 
   FFT_SCALAR qscale = force->qqrd2e * scale;
   PPPM_GPU_API(interp)(qscale);
 
   // per-atom energy/virial
   // energy includes self-energy correction
 
   if (evflag_atom) fieldforce_peratom();
 
   // sum energy across procs and add in volume-dependent term
+  // reset qsum and qsqsum if atom count has changed
 
   if (eflag_global) {
     double energy_all;
     MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
     energy = energy_all;
 
+    if (atom->natoms != natoms_original) {
+      qsum_qsq(0);
+      natoms_original = atom->natoms;
+    }
+
     energy *= 0.5*volume;
     energy -= g_ewald*qsqsum/1.772453851 +
       MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
     energy *= qscale;
   }
 
   // sum virial across procs
 
   if (vflag_global) {
     double virial_all[6];
     MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
     for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
   }
 
   // per-atom energy/virial
   // energy includes self-energy correction
 
   if (evflag_atom) {
     double *q = atom->q;
     int nlocal = atom->nlocal;
 
     if (eflag_atom) {
       for (i = 0; i < nlocal; i++) {
         eatom[i] *= 0.5;
         eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
           (g_ewald*g_ewald*volume);
         eatom[i] *= qscale;
       }
     }
 
     if (vflag_atom) {
       for (i = 0; i < nlocal; i++)
         for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale;
     }
   }
 
   // 2d slab correction
 
   if (slabflag) slabcorr();
 
   // convert atoms back from lamda to box coords
 
   if (triclinic) domain->lamda2x(atom->nlocal);
 
   if (kspace_split) PPPM_GPU_API(forces)(atom->f);
 }
 
 /* ----------------------------------------------------------------------
    remap density from 3d brick decomposition to FFT decomposition
 ------------------------------------------------------------------------- */
 
 void PPPMGPU::brick2fft()
 {
   int n,ix,iy,iz;
 
   // copy grabs inner portion of density from 3d brick
   // remap could be done as pre-stage of FFT,
   //   but this works optimally on only double values, not complex values
 
   n = 0;
   for (iz = nzlo_in; iz <= nzhi_in; iz++)
     for (iy = nylo_in; iy <= nyhi_in; iy++)
       for (ix = nxlo_in; ix <= nxhi_in; ix++)
         density_fft[n++] = density_brick_gpu[iz][iy][ix];
 
   remap->perform(density_fft,density_fft,work1);
 }
 
 /* ----------------------------------------------------------------------
    FFT-based Poisson solver
 ------------------------------------------------------------------------- */
 
 void PPPMGPU::poisson_ik()
 {
   int i,j,k,n;
   double eng;
 
   // transform charge density (r -> k)
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work1[n++] = density_fft[i];
     work1[n++] = ZEROF;
   }
 
   fft1->compute(work1,work1,1);
 
   // if requested, compute energy and virial contribution
 
   double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
   double s2 = scaleinv*scaleinv;
 
   if (eflag_global || vflag_global) {
     if (vflag_global) {
       n = 0;
       for (i = 0; i < nfft; i++) {
         eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
         for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j];
         if (eflag_global) energy += eng;
         n += 2;
       }
     } else {
       n = 0;
       for (i = 0; i < nfft; i++) {
         energy +=
           s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
         n += 2;
       }
     }
   }
 
   // scale by 1/total-grid-pts to get rho(k)
   // multiply by Green's function to get V(k)
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work1[n++] *= scaleinv * greensfn[i];
     work1[n++] *= scaleinv * greensfn[i];
   }
 
   // extra FFTs for per-atom energy/virial
 
   if (evflag_atom) poisson_peratom();
 
   // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
   // FFT leaves data in 3d brick decomposition
   // copy it into inner portion of vdx,vdy,vdz arrays
 
   // x direction gradient
 
   n = 0;
   for (k = nzlo_fft; k <= nzhi_fft; k++)
     for (j = nylo_fft; j <= nyhi_fft; j++)
       for (i = nxlo_fft; i <= nxhi_fft; i++) {
         work2[n] = fkx[i]*work1[n+1];
         work2[n+1] = -fkx[i]*work1[n];
         n += 2;
       }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   int x_hi = nxhi_in * 4 + 3;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in * 4; i < x_hi; i+=4) {
         vd_brick[k][j][i] = work2[n];
         n += 2;
       }
 
   // y direction gradient
 
   n = 0;
   for (k = nzlo_fft; k <= nzhi_fft; k++)
     for (j = nylo_fft; j <= nyhi_fft; j++)
       for (i = nxlo_fft; i <= nxhi_fft; i++) {
         work2[n] = fky[j]*work1[n+1];
         work2[n+1] = -fky[j]*work1[n];
         n += 2;
       }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in * 4 + 1; i < x_hi; i+=4) {
         vd_brick[k][j][i] = work2[n];
         n += 2;
       }
 
   // z direction gradient
 
   n = 0;
   for (k = nzlo_fft; k <= nzhi_fft; k++)
     for (j = nylo_fft; j <= nyhi_fft; j++)
       for (i = nxlo_fft; i <= nxhi_fft; i++) {
         work2[n] = fkz[k]*work1[n+1];
         work2[n+1] = -fkz[k]*work1[n];
         n += 2;
       }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in * 4 + 2; i < x_hi; i+=4) {
         vd_brick[k][j][i] = work2[n];
         n += 2;
       }
 }
 
 /* ----------------------------------------------------------------------
    pack own values to buf to send to another proc
 ------------------------------------------------------------------------- */
 
 void PPPMGPU::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
 {
   int n = 0;
 
   if (flag == FORWARD_IK) {
     int offset;
     FFT_SCALAR *src = &vd_brick[nzlo_out][nylo_out][4*nxlo_out];
     for (int i = 0; i < nlist; i++) {
       offset = 4*list[i];
       buf[n++] = src[offset++];
       buf[n++] = src[offset++];
       buf[n++] = src[offset];
     }
   } else if (flag == FORWARD_AD) {
     FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++)
       buf[i] = src[list[i]];
   } else if (flag == FORWARD_IK_PERATOM) {
     FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       if (eflag_atom) buf[n++] = esrc[list[i]];
       if (vflag_atom) {
         buf[n++] = v0src[list[i]];
         buf[n++] = v1src[list[i]];
         buf[n++] = v2src[list[i]];
         buf[n++] = v3src[list[i]];
         buf[n++] = v4src[list[i]];
         buf[n++] = v5src[list[i]];
       }
     }
   } else if (flag == FORWARD_AD_PERATOM) {
     FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       buf[n++] = v0src[list[i]];
       buf[n++] = v1src[list[i]];
       buf[n++] = v2src[list[i]];
       buf[n++] = v3src[list[i]];
       buf[n++] = v4src[list[i]];
       buf[n++] = v5src[list[i]];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    unpack another proc's own values from buf and set own ghost values
 ------------------------------------------------------------------------- */
 
 void PPPMGPU::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
 {
   int n = 0;
 
   if (flag == FORWARD_IK) {
     int offset;
     FFT_SCALAR *dest = &vd_brick[nzlo_out][nylo_out][4*nxlo_out];
     for (int i = 0; i < nlist; i++) {
       offset = 4*list[i];
       dest[offset++] = buf[n++];
       dest[offset++] = buf[n++];
       dest[offset] = buf[n++];
     }
   } else if (flag == FORWARD_AD) {
     FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++)
       dest[list[i]] = buf[i];
   } else if (flag == FORWARD_IK_PERATOM) {
     FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       if (eflag_atom) esrc[list[i]] = buf[n++];
       if (vflag_atom) {
         v0src[list[i]] = buf[n++];
         v1src[list[i]] = buf[n++];
         v2src[list[i]] = buf[n++];
         v3src[list[i]] = buf[n++];
         v4src[list[i]] = buf[n++];
         v5src[list[i]] = buf[n++];
       }
     }
   } else if (flag == FORWARD_AD_PERATOM) {
     FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       v0src[list[i]] = buf[n++];
       v1src[list[i]] = buf[n++];
       v2src[list[i]] = buf[n++];
       v3src[list[i]] = buf[n++];
       v4src[list[i]] = buf[n++];
       v5src[list[i]] = buf[n++];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    pack ghost values into buf to send to another proc
 ------------------------------------------------------------------------- */
 
 void PPPMGPU::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
 {
   if (flag == REVERSE_RHO) {
     FFT_SCALAR *src = &density_brick_gpu[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++)
       buf[i] = src[list[i]];
   }
 }
 
 /* ----------------------------------------------------------------------
    unpack another proc's ghost values from buf and add to own values
 ------------------------------------------------------------------------- */
 
 void PPPMGPU::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
 {
   if (flag == REVERSE_RHO) {
     FFT_SCALAR *dest = &density_brick_gpu[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++)
       dest[list[i]] += buf[i];
   } 
 }
 
 /* ----------------------------------------------------------------------
    create array using offsets from pinned memory allocation
 ------------------------------------------------------------------------- */
 
 FFT_SCALAR ***PPPMGPU::create_3d_offset(int n1lo, int n1hi, int n2lo, int n2hi,
                                         int n3lo, int n3hi, const char *name,
                                         FFT_SCALAR *data, int vec_length)
 {
   int i,j;
   int n1 = n1hi - n1lo + 1;
   int n2 = n2hi - n2lo + 1;
   int n3 = n3hi - n3lo + 1;
 
   FFT_SCALAR **plane = (FFT_SCALAR **)
     memory->smalloc(n1*n2*sizeof(FFT_SCALAR *),name);
   FFT_SCALAR ***array = (FFT_SCALAR ***)
     memory->smalloc(n1*sizeof(FFT_SCALAR **),name);
 
   int n = 0;
   for (i = 0; i < n1; i++) {
     array[i] = &plane[i*n2];
     for (j = 0; j < n2; j++) {
       plane[i*n2+j] = &data[n];
       n += n3*vec_length;
     }
   }
 
   for (i = 0; i < n1*n2; i++) array[0][i] -= n3lo*vec_length;
   for (i = 0; i < n1; i++) array[i] -= n2lo;
   return array-n1lo;
 }
 
 /* ----------------------------------------------------------------------
    3d memory offsets
 ------------------------------------------------------------------------- */
 
 void PPPMGPU::destroy_3d_offset(FFT_SCALAR ***array, int n1_offset,
                                  int n2_offset)
 {
   if (array == NULL) return;
   memory->sfree(&array[n1_offset][n2_offset]);
   memory->sfree(array + n1_offset);
 }
 
 
 /* ----------------------------------------------------------------------
    memory usage of local arrays
 ------------------------------------------------------------------------- */
 
 double PPPMGPU::memory_usage()
 {
   double bytes = PPPM::memory_usage();
 
   // NOTE: add tallying here for density_brick_gpu and vd_brick
   //       could subtract out density_brick and vdxyz_brick if freed them above
   //       it the net efffect is zero, do nothing
 
   return bytes + PPPM_GPU_API(bytes)();
 }
 
 /* ----------------------------------------------------------------------
    perform and time the 1d FFTs required for N timesteps
 ------------------------------------------------------------------------- */
 
 int PPPMGPU::timing_1d(int n, double &time1d)
 {
   if (im_real_space) {
     time1d = 1.0;
     return 4;
   }
   PPPM::timing_1d(n,time1d);
   return 4;
 }
 
 /* ----------------------------------------------------------------------
    perform and time the 3d FFTs required for N timesteps
 ------------------------------------------------------------------------- */
 
 int PPPMGPU::timing_3d(int n, double &time3d)
 {
   if (im_real_space) {
     time3d = 1.0;
     return 4;
   }
   PPPM::timing_3d(n,time3d);
   return 4;
 }
 
 /* ----------------------------------------------------------------------
    adjust PPPM coeffs, called initially and whenever volume has changed
 ------------------------------------------------------------------------- */
 
 void PPPMGPU::setup()
 {
   if (im_real_space) return;
   PPPM::setup();
 }