diff --git a/src/atom_vec_sphere.cpp b/src/atom_vec_sphere.cpp
index 7c6691ebf..32aeb5139 100644
--- a/src/atom_vec_sphere.cpp
+++ b/src/atom_vec_sphere.cpp
@@ -1,1171 +1,1170 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdlib.h"
 #include "string.h"
 #include "atom_vec_sphere.h"
 #include "atom.h"
 #include "comm.h"
 #include "domain.h"
 #include "modify.h"
 #include "force.h"
 #include "fix.h"
 #include "fix_adapt.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 #define DELTA 10000
 
 /* ---------------------------------------------------------------------- */
 
 AtomVecSphere::AtomVecSphere(LAMMPS *lmp) : AtomVec(lmp)
 {
   molecular = 0;
 
   comm_x_only = 1;
   comm_f_only = 0;
   size_forward = 3;
   size_reverse = 6;
   size_border = 8;
   size_velocity = 6;
   size_data_atom = 7;
   size_data_vel = 7;
   xcol_data = 5;
 
   atom->sphere_flag = 1;
   atom->radius_flag = atom->rmass_flag = atom->omega_flag =
     atom->torque_flag = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AtomVecSphere::init()
 {
   AtomVec::init();
 
   // set radvary if particle diameters are time-varying due to fix adapt
 
   radvary = 0;
   comm_x_only = 1;
   size_forward = 3;
 
   for (int i = 0; i < modify->nfix; i++)
     if (strcmp(modify->fix[i]->style,"adapt") == 0) {
       FixAdapt *fix = (FixAdapt *) modify->fix[i];
       if (fix->diamflag) {
         radvary = 1;
         comm_x_only = 0;
         size_forward = 5;
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    grow atom arrays
    n = 0 grows arrays by DELTA
    n > 0 allocates arrays to size n
 ------------------------------------------------------------------------- */
 
 void AtomVecSphere::grow(int n)
 {
   if (n == 0) nmax += DELTA;
   else nmax = n;
   atom->nmax = nmax;
   if (nmax < 0 || nmax > MAXSMALLINT)
     error->one(FLERR,"Per-processor system is too big");
 
   tag = memory->grow(atom->tag,nmax,"atom:tag");
   type = memory->grow(atom->type,nmax,"atom:type");
   mask = memory->grow(atom->mask,nmax,"atom:mask");
   image = memory->grow(atom->image,nmax,"atom:image");
   x = memory->grow(atom->x,nmax,3,"atom:x");
   v = memory->grow(atom->v,nmax,3,"atom:v");
   f = memory->grow(atom->f,nmax*comm->nthreads,3,"atom:f");
 
   radius = memory->grow(atom->radius,nmax,"atom:radius");
   rmass = memory->grow(atom->rmass,nmax,"atom:rmass");
   omega = memory->grow(atom->omega,nmax,3,"atom:omega");
   torque = memory->grow(atom->torque,nmax*comm->nthreads,3,"atom:torque");
 
   if (atom->nextra_grow)
     for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
       modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax);
 }
 
 /* ----------------------------------------------------------------------
    reset local array ptrs
 ------------------------------------------------------------------------- */
 
 void AtomVecSphere::grow_reset()
 {
   tag = atom->tag; type = atom->type;
   mask = atom->mask; image = atom->image;
   x = atom->x; v = atom->v; f = atom->f;
   radius = atom->radius; rmass = atom->rmass;
   omega = atom->omega; torque = atom->torque;
 }
 
 /* ----------------------------------------------------------------------
    copy atom I info to atom J
 ------------------------------------------------------------------------- */
 
 void AtomVecSphere::copy(int i, int j, int delflag)
 {
   tag[j] = tag[i];
   type[j] = type[i];
   mask[j] = mask[i];
   image[j] = image[i];
   x[j][0] = x[i][0];
   x[j][1] = x[i][1];
   x[j][2] = x[i][2];
   v[j][0] = v[i][0];
   v[j][1] = v[i][1];
   v[j][2] = v[i][2];
 
   radius[j] = radius[i];
   rmass[j] = rmass[i];
   omega[j][0] = omega[i][0];
   omega[j][1] = omega[i][1];
   omega[j][2] = omega[i][2];
 
   if (atom->nextra_grow)
     for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
       modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag);
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecSphere::pack_comm(int n, int *list, double *buf,
                                int pbc_flag, int *pbc)
 {
   int i,j,m;
   double dx,dy,dz;
 
   if (radvary == 0) {
     m = 0;
     if (pbc_flag == 0) {
       for (i = 0; i < n; i++) {
         j = list[i];
         buf[m++] = x[j][0];
         buf[m++] = x[j][1];
         buf[m++] = x[j][2];
       }
     } else {
       if (domain->triclinic == 0) {
         dx = pbc[0]*domain->xprd;
         dy = pbc[1]*domain->yprd;
         dz = pbc[2]*domain->zprd;
       } else {
         dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
         dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
         dz = pbc[2]*domain->zprd;
       }
       for (i = 0; i < n; i++) {
         j = list[i];
         buf[m++] = x[j][0] + dx;
         buf[m++] = x[j][1] + dy;
         buf[m++] = x[j][2] + dz;
       }
     }
 
   } else {
     m = 0;
     if (pbc_flag == 0) {
       for (i = 0; i < n; i++) {
         j = list[i];
         buf[m++] = x[j][0];
         buf[m++] = x[j][1];
         buf[m++] = x[j][2];
         buf[m++] = radius[j];
         buf[m++] = rmass[j];
       }
     } else {
       if (domain->triclinic == 0) {
         dx = pbc[0]*domain->xprd;
         dy = pbc[1]*domain->yprd;
         dz = pbc[2]*domain->zprd;
       } else {
         dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
         dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
         dz = pbc[2]*domain->zprd;
       }
       for (i = 0; i < n; i++) {
         j = list[i];
         buf[m++] = x[j][0] + dx;
         buf[m++] = x[j][1] + dy;
         buf[m++] = x[j][2] + dz;
         buf[m++] = radius[j];
         buf[m++] = rmass[j];
       }
     }
   }
 
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecSphere::pack_comm_vel(int n, int *list, double *buf,
                                    int pbc_flag, int *pbc)
 {
   int i,j,m;
   double dx,dy,dz,dvx,dvy,dvz;
 
   if (radvary == 0) {
     m = 0;
     if (pbc_flag == 0) {
       for (i = 0; i < n; i++) {
         j = list[i];
         buf[m++] = x[j][0];
         buf[m++] = x[j][1];
         buf[m++] = x[j][2];
         buf[m++] = v[j][0];
         buf[m++] = v[j][1];
         buf[m++] = v[j][2];
         buf[m++] = omega[j][0];
         buf[m++] = omega[j][1];
         buf[m++] = omega[j][2];
       }
     } else {
       if (domain->triclinic == 0) {
         dx = pbc[0]*domain->xprd;
         dy = pbc[1]*domain->yprd;
         dz = pbc[2]*domain->zprd;
       } else {
         dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
         dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
         dz = pbc[2]*domain->zprd;
       }
       if (!deform_vremap) {
         for (i = 0; i < n; i++) {
           j = list[i];
           buf[m++] = x[j][0] + dx;
           buf[m++] = x[j][1] + dy;
           buf[m++] = x[j][2] + dz;
           buf[m++] = v[j][0];
           buf[m++] = v[j][1];
           buf[m++] = v[j][2];
           buf[m++] = omega[j][0];
           buf[m++] = omega[j][1];
           buf[m++] = omega[j][2];
         }
       } else {
         dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
         dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
         dvz = pbc[2]*h_rate[2];
         for (i = 0; i < n; i++) {
           j = list[i];
           buf[m++] = x[j][0] + dx;
           buf[m++] = x[j][1] + dy;
           buf[m++] = x[j][2] + dz;
           if (mask[i] & deform_groupbit) {
             buf[m++] = v[j][0] + dvx;
             buf[m++] = v[j][1] + dvy;
             buf[m++] = v[j][2] + dvz;
           } else {
             buf[m++] = v[j][0];
             buf[m++] = v[j][1];
             buf[m++] = v[j][2];
           }
           buf[m++] = omega[j][0];
           buf[m++] = omega[j][1];
           buf[m++] = omega[j][2];
         }
       }
     }
 
   } else {
     m = 0;
     if (pbc_flag == 0) {
       for (i = 0; i < n; i++) {
         j = list[i];
         buf[m++] = x[j][0];
         buf[m++] = x[j][1];
         buf[m++] = x[j][2];
         buf[m++] = radius[j];
         buf[m++] = rmass[j];
         buf[m++] = v[j][0];
         buf[m++] = v[j][1];
         buf[m++] = v[j][2];
         buf[m++] = omega[j][0];
         buf[m++] = omega[j][1];
         buf[m++] = omega[j][2];
       }
     } else {
       if (domain->triclinic == 0) {
         dx = pbc[0]*domain->xprd;
         dy = pbc[1]*domain->yprd;
         dz = pbc[2]*domain->zprd;
       } else {
         dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
         dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
         dz = pbc[2]*domain->zprd;
       }
       if (!deform_vremap) {
         for (i = 0; i < n; i++) {
           j = list[i];
           buf[m++] = x[j][0] + dx;
           buf[m++] = x[j][1] + dy;
           buf[m++] = x[j][2] + dz;
           buf[m++] = radius[j];
           buf[m++] = rmass[j];
           buf[m++] = v[j][0];
           buf[m++] = v[j][1];
           buf[m++] = v[j][2];
           buf[m++] = omega[j][0];
           buf[m++] = omega[j][1];
           buf[m++] = omega[j][2];
         }
       } else {
         dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
         dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
         dvz = pbc[2]*h_rate[2];
         for (i = 0; i < n; i++) {
           j = list[i];
           buf[m++] = x[j][0] + dx;
           buf[m++] = x[j][1] + dy;
           buf[m++] = x[j][2] + dz;
           buf[m++] = radius[j];
           buf[m++] = rmass[j];
           if (mask[i] & deform_groupbit) {
             buf[m++] = v[j][0] + dvx;
             buf[m++] = v[j][1] + dvy;
             buf[m++] = v[j][2] + dvz;
           } else {
             buf[m++] = v[j][0];
             buf[m++] = v[j][1];
             buf[m++] = v[j][2];
           }
           buf[m++] = omega[j][0];
           buf[m++] = omega[j][1];
           buf[m++] = omega[j][2];
         }
       }
     }
   }
 
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecSphere::pack_comm_hybrid(int n, int *list, double *buf)
 {
   int i,j,m;
 
   if (radvary == 0) return 0;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     buf[m++] = radius[j];
     buf[m++] = rmass[j];
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AtomVecSphere::unpack_comm(int n, int first, double *buf)
 {
   int i,m,last;
 
   if (radvary == 0) {
     m = 0;
     last = first + n;
     for (i = first; i < last; i++) {
       x[i][0] = buf[m++];
       x[i][1] = buf[m++];
       x[i][2] = buf[m++];
     }
   } else {
     m = 0;
     last = first + n;
     for (i = first; i < last; i++) {
       x[i][0] = buf[m++];
       x[i][1] = buf[m++];
       x[i][2] = buf[m++];
       radius[i] = buf[m++];
       rmass[i] = buf[m++];
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AtomVecSphere::unpack_comm_vel(int n, int first, double *buf)
 {
   int i,m,last;
 
   if (radvary == 0) {
     m = 0;
     last = first + n;
     for (i = first; i < last; i++) {
       x[i][0] = buf[m++];
       x[i][1] = buf[m++];
       x[i][2] = buf[m++];
       v[i][0] = buf[m++];
       v[i][1] = buf[m++];
       v[i][2] = buf[m++];
       omega[i][0] = buf[m++];
       omega[i][1] = buf[m++];
       omega[i][2] = buf[m++];
     }
   } else {
     m = 0;
     last = first + n;
     for (i = first; i < last; i++) {
       x[i][0] = buf[m++];
       x[i][1] = buf[m++];
       x[i][2] = buf[m++];
       radius[i] = buf[m++];
       rmass[i] = buf[m++];
       v[i][0] = buf[m++];
       v[i][1] = buf[m++];
       v[i][2] = buf[m++];
       omega[i][0] = buf[m++];
       omega[i][1] = buf[m++];
       omega[i][2] = buf[m++];
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecSphere::unpack_comm_hybrid(int n, int first, double *buf)
 {
   int i,m,last;
 
   if (radvary == 0) return 0;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     radius[i] = buf[m++];
     rmass[i] = buf[m++];
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecSphere::pack_reverse(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     buf[m++] = f[i][0];
     buf[m++] = f[i][1];
     buf[m++] = f[i][2];
     buf[m++] = torque[i][0];
     buf[m++] = torque[i][1];
     buf[m++] = torque[i][2];
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecSphere::pack_reverse_hybrid(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     buf[m++] = torque[i][0];
     buf[m++] = torque[i][1];
     buf[m++] = torque[i][2];
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AtomVecSphere::unpack_reverse(int n, int *list, double *buf)
 {
   int i,j,m;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     f[j][0] += buf[m++];
     f[j][1] += buf[m++];
     f[j][2] += buf[m++];
     torque[j][0] += buf[m++];
     torque[j][1] += buf[m++];
     torque[j][2] += buf[m++];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecSphere::unpack_reverse_hybrid(int n, int *list, double *buf)
 {
   int i,j,m;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     torque[j][0] += buf[m++];
     torque[j][1] += buf[m++];
     torque[j][2] += buf[m++];
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecSphere::pack_border(int n, int *list, double *buf,
                                  int pbc_flag, int *pbc)
 {
   int i,j,m;
   double dx,dy,dz;
 
   m = 0;
   if (pbc_flag == 0) {
     for (i = 0; i < n; i++) {
       j = list[i];
       buf[m++] = x[j][0];
       buf[m++] = x[j][1];
       buf[m++] = x[j][2];
       buf[m++] = tag[j];
       buf[m++] = type[j];
       buf[m++] = mask[j];
       buf[m++] = radius[j];
       buf[m++] = rmass[j];
     }
   } else {
     if (domain->triclinic == 0) {
       dx = pbc[0]*domain->xprd;
       dy = pbc[1]*domain->yprd;
       dz = pbc[2]*domain->zprd;
     } else {
       dx = pbc[0];
       dy = pbc[1];
       dz = pbc[2];
     }
     for (i = 0; i < n; i++) {
       j = list[i];
       buf[m++] = x[j][0] + dx;
       buf[m++] = x[j][1] + dy;
       buf[m++] = x[j][2] + dz;
       buf[m++] = tag[j];
       buf[m++] = type[j];
       buf[m++] = mask[j];
       buf[m++] = radius[j];
       buf[m++] = rmass[j];
     }
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecSphere::pack_border_vel(int n, int *list, double *buf,
                                      int pbc_flag, int *pbc)
 {
   int i,j,m;
   double dx,dy,dz,dvx,dvy,dvz;
 
   m = 0;
   if (pbc_flag == 0) {
     for (i = 0; i < n; i++) {
       j = list[i];
       buf[m++] = x[j][0];
       buf[m++] = x[j][1];
       buf[m++] = x[j][2];
       buf[m++] = tag[j];
       buf[m++] = type[j];
       buf[m++] = mask[j];
       buf[m++] = radius[j];
       buf[m++] = rmass[j];
       buf[m++] = v[j][0];
       buf[m++] = v[j][1];
       buf[m++] = v[j][2];
       buf[m++] = omega[j][0];
       buf[m++] = omega[j][1];
       buf[m++] = omega[j][2];
     }
   } else {
     if (domain->triclinic == 0) {
       dx = pbc[0]*domain->xprd;
       dy = pbc[1]*domain->yprd;
       dz = pbc[2]*domain->zprd;
     } else {
       dx = pbc[0];
       dy = pbc[1];
       dz = pbc[2];
     }
     if (!deform_vremap) {
       for (i = 0; i < n; i++) {
         j = list[i];
         buf[m++] = x[j][0] + dx;
         buf[m++] = x[j][1] + dy;
         buf[m++] = x[j][2] + dz;
         buf[m++] = tag[j];
         buf[m++] = type[j];
         buf[m++] = mask[j];
         buf[m++] = radius[j];
         buf[m++] = rmass[j];
         buf[m++] = v[j][0];
         buf[m++] = v[j][1];
         buf[m++] = v[j][2];
         buf[m++] = omega[j][0];
         buf[m++] = omega[j][1];
         buf[m++] = omega[j][2];
       }
     } else {
       dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
       dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
       dvz = pbc[2]*h_rate[2];
       for (i = 0; i < n; i++) {
         j = list[i];
         buf[m++] = x[j][0] + dx;
         buf[m++] = x[j][1] + dy;
         buf[m++] = x[j][2] + dz;
         buf[m++] = tag[j];
         buf[m++] = type[j];
         buf[m++] = mask[j];
         buf[m++] = radius[j];
         buf[m++] = rmass[j];
         if (mask[i] & deform_groupbit) {
           buf[m++] = v[j][0] + dvx;
           buf[m++] = v[j][1] + dvy;
           buf[m++] = v[j][2] + dvz;
         } else {
           buf[m++] = v[j][0];
           buf[m++] = v[j][1];
           buf[m++] = v[j][2];
         }
         buf[m++] = omega[j][0];
         buf[m++] = omega[j][1];
         buf[m++] = omega[j][2];
       }
     }
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecSphere::pack_border_hybrid(int n, int *list, double *buf)
 {
   int i,j,m;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     buf[m++] = radius[j];
     buf[m++] = rmass[j];
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AtomVecSphere::unpack_border(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     if (i == nmax) grow(0);
     x[i][0] = buf[m++];
     x[i][1] = buf[m++];
     x[i][2] = buf[m++];
     tag[i] = static_cast<int> (buf[m++]);
     type[i] = static_cast<int> (buf[m++]);
     mask[i] = static_cast<int> (buf[m++]);
     radius[i] = buf[m++];
     rmass[i] = buf[m++];
   }
 }
 
 
 /* ---------------------------------------------------------------------- */
 
 void AtomVecSphere::unpack_border_vel(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     if (i == nmax) grow(0);
     x[i][0] = buf[m++];
     x[i][1] = buf[m++];
     x[i][2] = buf[m++];
     tag[i] = static_cast<int> (buf[m++]);
     type[i] = static_cast<int> (buf[m++]);
     mask[i] = static_cast<int> (buf[m++]);
     radius[i] = buf[m++];
     rmass[i] = buf[m++];
     v[i][0] = buf[m++];
     v[i][1] = buf[m++];
     v[i][2] = buf[m++];
     omega[i][0] = buf[m++];
     omega[i][1] = buf[m++];
     omega[i][2] = buf[m++];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecSphere::unpack_border_hybrid(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     radius[i] = buf[m++];
     rmass[i] = buf[m++];
   }
   return m;
 }
 
 /* ----------------------------------------------------------------------
    pack data for atom I for sending to another proc
    xyz must be 1st 3 values, so comm::exchange() can test on them
 ------------------------------------------------------------------------- */
 
 int AtomVecSphere::pack_exchange(int i, double *buf)
 {
   int m = 1;
   buf[m++] = x[i][0];
   buf[m++] = x[i][1];
   buf[m++] = x[i][2];
   buf[m++] = v[i][0];
   buf[m++] = v[i][1];
   buf[m++] = v[i][2];
   buf[m++] = tag[i];
   buf[m++] = type[i];
   buf[m++] = mask[i];
   *((tagint *) &buf[m++]) = image[i];
 
   buf[m++] = radius[i];
   buf[m++] = rmass[i];
   buf[m++] = omega[i][0];
   buf[m++] = omega[i][1];
   buf[m++] = omega[i][2];
 
   if (atom->nextra_grow)
     for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
       m += modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf[m]);
 
   buf[0] = m;
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecSphere::unpack_exchange(double *buf)
 {
   int nlocal = atom->nlocal;
   if (nlocal == nmax) grow(0);
 
   int m = 1;
   x[nlocal][0] = buf[m++];
   x[nlocal][1] = buf[m++];
   x[nlocal][2] = buf[m++];
   v[nlocal][0] = buf[m++];
   v[nlocal][1] = buf[m++];
   v[nlocal][2] = buf[m++];
   tag[nlocal] = static_cast<int> (buf[m++]);
   type[nlocal] = static_cast<int> (buf[m++]);
   mask[nlocal] = static_cast<int> (buf[m++]);
   image[nlocal] = *((tagint *) &buf[m++]);
 
   radius[nlocal] = buf[m++];
   rmass[nlocal] = buf[m++];
   omega[nlocal][0] = buf[m++];
   omega[nlocal][1] = buf[m++];
   omega[nlocal][2] = buf[m++];
 
   if (atom->nextra_grow)
     for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
       m += modify->fix[atom->extra_grow[iextra]]->
         unpack_exchange(nlocal,&buf[m]);
 
   atom->nlocal++;
   return m;
 }
 
 /* ----------------------------------------------------------------------
    size of restart data for all atoms owned by this proc
    include extra data stored by fixes
 ------------------------------------------------------------------------- */
 
 int AtomVecSphere::size_restart()
 {
   int i;
 
   int nlocal = atom->nlocal;
   int n = 16 * nlocal;
 
   if (atom->nextra_restart)
     for (int iextra = 0; iextra < atom->nextra_restart; iextra++)
       for (i = 0; i < nlocal; i++)
         n += modify->fix[atom->extra_restart[iextra]]->size_restart(i);
 
   return n;
 }
 
 /* ----------------------------------------------------------------------
    pack atom I's data for restart file including extra quantities
    xyz must be 1st 3 values, so that read_restart can test on them
    molecular types may be negative, but write as positive
 ------------------------------------------------------------------------- */
 
 int AtomVecSphere::pack_restart(int i, double *buf)
 {
   int m = 1;
   buf[m++] = x[i][0];
   buf[m++] = x[i][1];
   buf[m++] = x[i][2];
   buf[m++] = tag[i];
   buf[m++] = type[i];
   buf[m++] = mask[i];
   *((tagint *) &buf[m++]) = image[i];
   buf[m++] = v[i][0];
   buf[m++] = v[i][1];
   buf[m++] = v[i][2];
 
   buf[m++] = radius[i];
   buf[m++] = rmass[i];
   buf[m++] = omega[i][0];
   buf[m++] = omega[i][1];
   buf[m++] = omega[i][2];
 
   if (atom->nextra_restart)
     for (int iextra = 0; iextra < atom->nextra_restart; iextra++)
       m += modify->fix[atom->extra_restart[iextra]]->pack_restart(i,&buf[m]);
 
   buf[0] = m;
   return m;
 }
 
 /* ----------------------------------------------------------------------
    unpack data for one atom from restart file including extra quantities
 ------------------------------------------------------------------------- */
 
 int AtomVecSphere::unpack_restart(double *buf)
 {
   int nlocal = atom->nlocal;
   if (nlocal == nmax) {
     grow(0);
     if (atom->nextra_store)
       memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
   }
 
   int m = 1;
   x[nlocal][0] = buf[m++];
   x[nlocal][1] = buf[m++];
   x[nlocal][2] = buf[m++];
   tag[nlocal] = static_cast<int> (buf[m++]);
   type[nlocal] = static_cast<int> (buf[m++]);
   mask[nlocal] = static_cast<int> (buf[m++]);
   image[nlocal] = *((tagint *) &buf[m++]);
   v[nlocal][0] = buf[m++];
   v[nlocal][1] = buf[m++];
   v[nlocal][2] = buf[m++];
 
   radius[nlocal] = buf[m++];
   rmass[nlocal] = buf[m++];
   omega[nlocal][0] = buf[m++];
   omega[nlocal][1] = buf[m++];
   omega[nlocal][2] = buf[m++];
 
   double **extra = atom->extra;
   if (atom->nextra_store) {
     int size = static_cast<int> (buf[0]) - m;
     for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++];
   }
 
   atom->nlocal++;
   return m;
 }
 
 /* ----------------------------------------------------------------------
    create one atom of itype at coord
    set other values to defaults
 ------------------------------------------------------------------------- */
 
 void AtomVecSphere::create_atom(int itype, double *coord)
 {
   int nlocal = atom->nlocal;
   if (nlocal == nmax) grow(0);
 
   tag[nlocal] = 0;
   type[nlocal] = itype;
   x[nlocal][0] = coord[0];
   x[nlocal][1] = coord[1];
   x[nlocal][2] = coord[2];
   mask[nlocal] = 1;
   image[nlocal] = ((tagint) IMGMAX << IMG2BITS) |
     ((tagint) IMGMAX << IMGBITS) | IMGMAX;
   v[nlocal][0] = 0.0;
   v[nlocal][1] = 0.0;
   v[nlocal][2] = 0.0;
 
   radius[nlocal] = 0.5;
   rmass[nlocal] = 4.0*MY_PI/3.0 * radius[nlocal]*radius[nlocal]*radius[nlocal];
   omega[nlocal][0] = 0.0;
   omega[nlocal][1] = 0.0;
   omega[nlocal][2] = 0.0;
 
   atom->nlocal++;
 }
 
 /* ----------------------------------------------------------------------
    unpack one line from Atoms section of data file
    initialize other atom quantities
 ------------------------------------------------------------------------- */
 
 void AtomVecSphere::data_atom(double *coord, tagint imagetmp, char **values)
 {
   int nlocal = atom->nlocal;
   if (nlocal == nmax) grow(0);
 
   tag[nlocal] = atoi(values[0]);
   if (tag[nlocal] <= 0)
     error->one(FLERR,"Invalid atom ID in Atoms section of data file");
 
   type[nlocal] = atoi(values[1]);
   if (type[nlocal] <= 0 || type[nlocal] > atom->ntypes)
     error->one(FLERR,"Invalid atom type in Atoms section of data file");
 
   radius[nlocal] = 0.5 * atof(values[2]);
   if (radius[nlocal] < 0.0)
     error->one(FLERR,"Invalid radius in Atoms section of data file");
 
   double density = atof(values[3]);
   if (density <= 0.0)
     error->one(FLERR,"Invalid density in Atoms section of data file");
 
   if (radius[nlocal] == 0.0) rmass[nlocal] = density;
   else
     rmass[nlocal] = 4.0*MY_PI/3.0 *
       radius[nlocal]*radius[nlocal]*radius[nlocal] * density;
 
   x[nlocal][0] = coord[0];
   x[nlocal][1] = coord[1];
   x[nlocal][2] = coord[2];
 
   image[nlocal] = imagetmp;
 
   mask[nlocal] = 1;
   v[nlocal][0] = 0.0;
   v[nlocal][1] = 0.0;
   v[nlocal][2] = 0.0;
   omega[nlocal][0] = 0.0;
   omega[nlocal][1] = 0.0;
   omega[nlocal][2] = 0.0;
 
   atom->nlocal++;
 }
 
 /* ----------------------------------------------------------------------
    unpack hybrid quantities from one line in Atoms section of data file
    initialize other atom quantities for this sub-style
 ------------------------------------------------------------------------- */
 
 int AtomVecSphere::data_atom_hybrid(int nlocal, char **values)
 {
   radius[nlocal] = 0.5 * atof(values[0]);
   if (radius[nlocal] < 0.0)
     error->one(FLERR,"Invalid radius in Atoms section of data file");
 
   double density = atof(values[1]);
   if (density <= 0.0)
     error->one(FLERR,"Invalid density in Atoms section of data file");
 
   if (radius[nlocal] == 0.0) rmass[nlocal] = density;
   else
     rmass[nlocal] = 4.0*MY_PI/3.0 *
       radius[nlocal]*radius[nlocal]*radius[nlocal] * density;
 
   return 2;
 }
 
 /* ----------------------------------------------------------------------
    unpack one line from Velocities section of data file
 ------------------------------------------------------------------------- */
 
 void AtomVecSphere::data_vel(int m, char **values)
 {
   v[m][0] = atof(values[0]);
   v[m][1] = atof(values[1]);
   v[m][2] = atof(values[2]);
   omega[m][0] = atof(values[3]);
   omega[m][1] = atof(values[4]);
   omega[m][2] = atof(values[5]);
 }
 
 /* ----------------------------------------------------------------------
    unpack hybrid quantities from one line in Velocities section of data file
 ------------------------------------------------------------------------- */
 
 int AtomVecSphere::data_vel_hybrid(int m, char **values)
 {
   omega[m][0] = atof(values[0]);
   omega[m][1] = atof(values[1]);
   omega[m][2] = atof(values[2]);
   return 3;
 }
 
 /* ----------------------------------------------------------------------
    pack atom info for data file including 3 image flags
 ------------------------------------------------------------------------- */
 
 void AtomVecSphere::pack_data(double **buf)
 {
   int nlocal = atom->nlocal;
   for (int i = 0; i < nlocal; i++) {
     buf[i][0] = tag[i];
     buf[i][1] = type[i];
     buf[i][2] = 2.0*radius[i];
     if (radius[i] == 0.0) buf[i][3] = rmass[i];
     else 
       buf[i][3] = rmass[i] / (4.0*MY_PI/3.0 * radius[i]*radius[i]*radius[i]);
     buf[i][4] = x[i][0];
-    buf[i][5] = x[i][0];
-    buf[i][6] = x[i][1];
-    buf[i][7] = x[i][2];
-    buf[i][8] = (image[i] & IMGMASK) - IMGMAX;
-    buf[i][9] = (image[i] >> IMGBITS & IMGMASK) - IMGMAX;
-    buf[i][10] = (image[i] >> IMG2BITS) - IMGMAX;
+    buf[i][5] = x[i][1];
+    buf[i][6] = x[i][2];
+    buf[i][7] = (image[i] & IMGMASK) - IMGMAX;
+    buf[i][8] = (image[i] >> IMGBITS & IMGMASK) - IMGMAX;
+    buf[i][9] = (image[i] >> IMG2BITS) - IMGMAX;
   }
 }
 
 /* ----------------------------------------------------------------------
    pack hybrid atom info for data file
 ------------------------------------------------------------------------- */
 
 int AtomVecSphere::pack_data_hybrid(int i, double *buf)
 {
   buf[0] = 2.0*radius[i];
   if (radius[i] == 0.0) buf[1] = rmass[i];
   else buf[1] = rmass[i] / (4.0*MY_PI/3.0 * radius[i]*radius[i]*radius[i]);
   return 2;
 }
 
 /* ----------------------------------------------------------------------
    write atom info to data file including 3 image flags
 ------------------------------------------------------------------------- */
 
 void AtomVecSphere::write_data(FILE *fp, int n, double **buf)
 {
   for (int i = 0; i < n; i++)
     fprintf(fp,"%d %d %-1.16e %-1.16e %-1.16e %-1.16e %-1.16e %d %d %d\n",
             (int) buf[i][0],(int) buf[i][1],
             buf[i][2],buf[i][3],
             buf[i][4],buf[i][5],buf[i][6],
             (int) buf[i][7],(int) buf[i][8],(int) buf[i][9]);
 }
 
 /* ----------------------------------------------------------------------
    write hybrid atom info to data file
 ------------------------------------------------------------------------- */
 
 int AtomVecSphere::write_data_hybrid(FILE *fp, double *buf)
 {
   fprintf(fp," %-1.16e %-1.16e",buf[0],buf[1]);
   return 2;
 }
 
 /* ----------------------------------------------------------------------
    pack velocity info for data file
 ------------------------------------------------------------------------- */
 
 void AtomVecSphere::pack_vel(double **buf)
 {
   int nlocal = atom->nlocal;
   for (int i = 0; i < nlocal; i++) {
     buf[i][0] = tag[i];
     buf[i][1] = v[i][0];
     buf[i][2] = v[i][1];
     buf[i][3] = v[i][2];
     buf[i][4] = omega[i][0];
     buf[i][5] = omega[i][1];
     buf[i][6] = omega[i][2];
   }
 }
 
 /* ----------------------------------------------------------------------
    pack hybrid velocity info for data file
 ------------------------------------------------------------------------- */
 
 int AtomVecSphere::pack_vel_hybrid(int i, double *buf)
 {
   buf[0] = omega[i][0];
   buf[1] = omega[i][1];
   buf[2] = omega[i][2];
   return 3;
 }
 
 /* ----------------------------------------------------------------------
    write velocity info to data file
 ------------------------------------------------------------------------- */
 
 void AtomVecSphere::write_vel(FILE *fp, int n, double **buf)
 {
   for (int i = 0; i < n; i++)
     fprintf(fp,"%d %-1.16e %-1.16e %-1.16e %-1.16e %-1.16e %-1.16e\n",
             (int) buf[i][0],buf[i][1],buf[i][2],buf[i][3],
             buf[i][4],buf[i][5],buf[i][6]);
 }
 
 /* ----------------------------------------------------------------------
    write hybrid velocity info to data file
 ------------------------------------------------------------------------- */
 
 int AtomVecSphere::write_vel_hybrid(FILE *fp, double *buf)
 {
   fprintf(fp," %-1.16e %-1.16e %-1.16e",buf[0],buf[1],buf[2]);
   return 3;
 }
 
 /* ----------------------------------------------------------------------
    return # of bytes of allocated memory
 ------------------------------------------------------------------------- */
 
 bigint AtomVecSphere::memory_usage()
 {
   bigint bytes = 0;
 
   if (atom->memcheck("tag")) bytes += memory->usage(tag,nmax);
   if (atom->memcheck("type")) bytes += memory->usage(type,nmax);
   if (atom->memcheck("mask")) bytes += memory->usage(mask,nmax);
   if (atom->memcheck("image")) bytes += memory->usage(image,nmax);
   if (atom->memcheck("x")) bytes += memory->usage(x,nmax,3);
   if (atom->memcheck("v")) bytes += memory->usage(v,nmax,3);
   if (atom->memcheck("f")) bytes += memory->usage(f,nmax*comm->nthreads,3);
 
   if (atom->memcheck("radius")) bytes += memory->usage(radius,nmax);
   if (atom->memcheck("rmass")) bytes += memory->usage(rmass,nmax);
   if (atom->memcheck("omega")) bytes += memory->usage(omega,nmax,3);
   if (atom->memcheck("torque")) 
     bytes += memory->usage(torque,nmax*comm->nthreads,3);
 
   return bytes;
 }
diff --git a/src/comm.cpp b/src/comm.cpp
index 648711d23..b15fc5a28 100644
--- a/src/comm.cpp
+++ b/src/comm.cpp
@@ -1,1862 +1,1863 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author (triclinic) : Pieter in 't Veld (SNL)
 ------------------------------------------------------------------------- */
 
 #include "lmptype.h"
 #include "mpi.h"
 #include "math.h"
 #include "string.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "comm.h"
 #include "universe.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "force.h"
 #include "pair.h"
 #include "domain.h"
 #include "neighbor.h"
 #include "group.h"
 #include "modify.h"
 #include "fix.h"
 #include "compute.h"
 #include "output.h"
 #include "dump.h"
 #include "procmap.h"
 #include "math_extra.h"
 #include "error.h"
 #include "memory.h"
 
 #ifdef _OPENMP
 #include "omp.h"
 #endif
 
 using namespace LAMMPS_NS;
 
 #define BUFFACTOR 1.5
 #define BUFMIN 1000
 #define BUFEXTRA 1000
 #define BIG 1.0e20
 
 enum{SINGLE,MULTI};
 enum{MULTIPLE};                   // same as in ProcMap
 enum{ONELEVEL,TWOLEVEL,NUMA,CUSTOM};
 enum{CART,CARTREORDER,XYZ};
 
 /* ----------------------------------------------------------------------
    setup MPI and allocate buffer space
 ------------------------------------------------------------------------- */
 
 Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
 {
   MPI_Comm_rank(world,&me);
   MPI_Comm_size(world,&nprocs);
 
   user_procgrid[0] = user_procgrid[1] = user_procgrid[2] = 0;
   coregrid[0] = coregrid[1] = coregrid[2] = 1;
   gridflag = ONELEVEL;
   mapflag = CART;
   customfile = NULL;
   recv_from_partition = send_to_partition = -1;
   otherflag = 0;
   outfile = NULL;
 
   grid2proc = NULL;
 
   bordergroup = 0;
   style = SINGLE;
   uniform = 1;
   xsplit = ysplit = zsplit = NULL;
   multilo = multihi = NULL;
   cutghostmulti = NULL;
   cutghostuser = 0.0;
   ghost_velocity = 0;
 
   // use of OpenMP threads
   // query OpenMP for number of threads/process set by user at run-time
   // if the OMP_NUM_THREADS environment variable is not set, we default
   // to using 1 thread. This follows the principle of the least surprise,
   // while practically all OpenMP implementations violate it by using
   // as many threads as there are (virtual) CPU cores by default.
 
   nthreads = 1;
 #ifdef _OPENMP
   if (getenv("OMP_NUM_THREADS") == NULL) {
     nthreads = 1;
     if (me == 0)
       error->warning(FLERR,"OMP_NUM_THREADS environment is not set.");
   } else {
     nthreads = omp_get_max_threads();
   }
 
   // enforce consistent number of threads across all MPI tasks
 
   MPI_Bcast(&nthreads,1,MPI_INT,0,world);
   omp_set_num_threads(nthreads);
 
   if (me == 0) {
     if (screen)
       fprintf(screen,"  using %d OpenMP thread(s) per MPI task\n",nthreads);
     if (logfile)
       fprintf(logfile,"  using %d OpenMP thread(s) per MPI task\n",nthreads);
   }
 #endif
 
   // initialize comm buffers & exchange memory
 
   maxsend = BUFMIN;
   memory->create(buf_send,maxsend+BUFEXTRA,"comm:buf_send");
   maxrecv = BUFMIN;
   memory->create(buf_recv,maxrecv,"comm:buf_recv");
 
   maxswap = 6;
   allocate_swap(maxswap);
 
   sendlist = (int **) memory->smalloc(maxswap*sizeof(int *),"comm:sendlist");
   memory->create(maxsendlist,maxswap,"comm:maxsendlist");
   for (int i = 0; i < maxswap; i++) {
     maxsendlist[i] = BUFMIN;
     memory->create(sendlist[i],BUFMIN,"comm:sendlist[i]");
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 Comm::~Comm()
 {
   memory->destroy(xsplit);
   memory->destroy(ysplit);
   memory->destroy(zsplit);
 
   delete [] customfile;
   delete [] outfile;
 
   memory->destroy(grid2proc);
 
   free_swap();
   if (style == MULTI) {
     free_multi();
     memory->destroy(cutghostmulti);
   }
 
   if (sendlist) for (int i = 0; i < maxswap; i++) memory->destroy(sendlist[i]);
   memory->sfree(sendlist);
   memory->destroy(maxsendlist);
 
   memory->destroy(buf_send);
   memory->destroy(buf_recv);
 }
 
 /* ----------------------------------------------------------------------
    create 3d grid of procs based on Nprocs and box size & shape
    map processors to grid, setup xyz split for a uniform grid
 ------------------------------------------------------------------------- */
 
 void Comm::set_proc_grid(int outflag)
 {
   // recv 3d proc grid of another partition if my 3d grid depends on it
 
   if (recv_from_partition >= 0) {
     MPI_Status status;
     if (me == 0) {
       MPI_Recv(other_procgrid,3,MPI_INT,
                universe->root_proc[recv_from_partition],0,
                universe->uworld,&status);
       MPI_Recv(other_coregrid,3,MPI_INT,
                universe->root_proc[recv_from_partition],0,
                universe->uworld,&status);
     }
     MPI_Bcast(other_procgrid,3,MPI_INT,0,world);
     MPI_Bcast(other_coregrid,3,MPI_INT,0,world);
   }
 
   // create ProcMap class to create 3d grid and map procs to it
 
   ProcMap *pmap = new ProcMap(lmp);
 
   // create 3d grid of processors
   // produces procgrid and coregrid (if relevant)
 
   if (gridflag == ONELEVEL) {
     pmap->onelevel_grid(nprocs,user_procgrid,procgrid,
                         otherflag,other_style,other_procgrid,other_coregrid);
 
   } else if (gridflag == TWOLEVEL) {
     pmap->twolevel_grid(nprocs,user_procgrid,procgrid,
                         ncores,user_coregrid,coregrid,
                         otherflag,other_style,other_procgrid,other_coregrid);
 
   } else if (gridflag == NUMA) {
     pmap->numa_grid(nprocs,user_procgrid,procgrid,coregrid);
 
   } else if (gridflag == CUSTOM) {
     pmap->custom_grid(customfile,nprocs,user_procgrid,procgrid);
   }
 
   // error check on procgrid
   // should not be necessary due to ProcMap
 
   if (procgrid[0]*procgrid[1]*procgrid[2] != nprocs)
     error->all(FLERR,"Bad grid of processors");
   if (domain->dimension == 2 && procgrid[2] != 1)
     error->all(FLERR,"Processor count in z must be 1 for 2d simulation");
 
   // grid2proc[i][j][k] = proc that owns i,j,k location in 3d grid
 
   if (grid2proc) memory->destroy(grid2proc);
   memory->create(grid2proc,procgrid[0],procgrid[1],procgrid[2],
                  "comm:grid2proc");
 
   // map processor IDs to 3d processor grid
   // produces myloc, procneigh, grid2proc
 
   if (gridflag == ONELEVEL) {
     if (mapflag == CART)
       pmap->cart_map(0,procgrid,myloc,procneigh,grid2proc);
     else if (mapflag == CARTREORDER)
       pmap->cart_map(1,procgrid,myloc,procneigh,grid2proc);
     else if (mapflag == XYZ)
       pmap->xyz_map(xyz,procgrid,myloc,procneigh,grid2proc);
 
   } else if (gridflag == TWOLEVEL) {
     if (mapflag == CART)
       pmap->cart_map(0,procgrid,ncores,coregrid,myloc,procneigh,grid2proc);
     else if (mapflag == CARTREORDER)
       pmap->cart_map(1,procgrid,ncores,coregrid,myloc,procneigh,grid2proc);
     else if (mapflag == XYZ)
       pmap->xyz_map(xyz,procgrid,ncores,coregrid,myloc,procneigh,grid2proc);
 
   } else if (gridflag == NUMA) {
     pmap->numa_map(0,coregrid,myloc,procneigh,grid2proc);
 
   } else if (gridflag == CUSTOM) {
     pmap->custom_map(procgrid,myloc,procneigh,grid2proc);
   }
 
   // print 3d grid info to screen and logfile
 
   if (outflag && me == 0) {
     if (screen) {
       fprintf(screen,"  %d by %d by %d MPI processor grid\n",
               procgrid[0],procgrid[1],procgrid[2]);
       if (gridflag == NUMA || gridflag == TWOLEVEL)
         fprintf(screen,"  %d by %d by %d core grid within node\n",
                 coregrid[0],coregrid[1],coregrid[2]);
     }
     if (logfile) {
       fprintf(logfile,"  %d by %d by %d MPI processor grid\n",
               procgrid[0],procgrid[1],procgrid[2]);
       if (gridflag == NUMA || gridflag == TWOLEVEL)
         fprintf(logfile,"  %d by %d by %d core grid within node\n",
                 coregrid[0],coregrid[1],coregrid[2]);
     }
   }
 
   // print 3d grid details to outfile
 
   if (outfile) pmap->output(outfile,procgrid,grid2proc);
 
   // free ProcMap class
 
   delete pmap;
 
   // set xsplit,ysplit,zsplit for uniform spacings
 
   memory->destroy(xsplit);
   memory->destroy(ysplit);
   memory->destroy(zsplit);
 
   memory->create(xsplit,procgrid[0]+1,"comm:xsplit");
   memory->create(ysplit,procgrid[1]+1,"comm:ysplit");
   memory->create(zsplit,procgrid[2]+1,"comm:zsplit");
 
   for (int i = 0; i < procgrid[0]; i++) xsplit[i] = i * 1.0/procgrid[0];
   for (int i = 0; i < procgrid[1]; i++) ysplit[i] = i * 1.0/procgrid[1];
   for (int i = 0; i < procgrid[2]; i++) zsplit[i] = i * 1.0/procgrid[2];
 
   xsplit[procgrid[0]] = ysplit[procgrid[1]] = zsplit[procgrid[2]] = 1.0;
 
   // set lamda box params after procs are assigned
   // only set once unless load-balancing occurs
 
   if (domain->triclinic) domain->set_lamda_box();
 
   // send my 3d proc grid to another partition if requested
 
   if (send_to_partition >= 0) {
     if (me == 0) {
       MPI_Send(procgrid,3,MPI_INT,
                universe->root_proc[send_to_partition],0,
                universe->uworld);
       MPI_Send(coregrid,3,MPI_INT,
                universe->root_proc[send_to_partition],0,
                universe->uworld);
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void Comm::init()
 {
   triclinic = domain->triclinic;
   map_style = atom->map_style;
 
   // comm_only = 1 if only x,f are exchanged in forward/reverse comm
   // comm_x_only = 0 if ghost_velocity since velocities are added
 
   comm_x_only = atom->avec->comm_x_only;
   comm_f_only = atom->avec->comm_f_only;
   if (ghost_velocity) comm_x_only = 0;
 
   // set per-atom sizes for forward/reverse/border comm
   // augment by velocity quantities if needed
 
   size_forward = atom->avec->size_forward;
   size_reverse = atom->avec->size_reverse;
   size_border = atom->avec->size_border;
 
   if (ghost_velocity) size_forward += atom->avec->size_velocity;
   if (ghost_velocity) size_border += atom->avec->size_velocity;
 
   // maxforward = # of datums in largest forward communication
   // maxreverse = # of datums in largest reverse communication
   // query pair,fix,compute,dump for their requirements
   // pair style can force reverse comm even if newton off
 
   maxforward = MAX(size_forward,size_border);
   maxreverse = size_reverse;
 
   if (force->pair) maxforward = MAX(maxforward,force->pair->comm_forward);
   if (force->pair) maxreverse = MAX(maxreverse,force->pair->comm_reverse);
 
   for (int i = 0; i < modify->nfix; i++) {
     maxforward = MAX(maxforward,modify->fix[i]->comm_forward);
     maxreverse = MAX(maxreverse,modify->fix[i]->comm_reverse);
   }
 
   for (int i = 0; i < modify->ncompute; i++) {
     maxforward = MAX(maxforward,modify->compute[i]->comm_forward);
     maxreverse = MAX(maxreverse,modify->compute[i]->comm_reverse);
   }
 
   for (int i = 0; i < output->ndump; i++) {
     maxforward = MAX(maxforward,output->dump[i]->comm_forward);
     maxreverse = MAX(maxreverse,output->dump[i]->comm_reverse);
   }
 
   if (force->newton == 0) maxreverse = 0;
   if (force->pair) maxreverse = MAX(maxreverse,force->pair->comm_reverse_off);
 
   // memory for multi-style communication
 
   if (style == MULTI && multilo == NULL) {
     allocate_multi(maxswap);
     memory->create(cutghostmulti,atom->ntypes+1,3,"comm:cutghostmulti");
   }
   if (style == SINGLE && multilo) {
     free_multi();
     memory->destroy(cutghostmulti);
   }
 }
 
 /* ----------------------------------------------------------------------
    setup spatial-decomposition communication patterns
    function of neighbor cutoff(s) & cutghostuser & current box size
    single style sets slab boundaries (slablo,slabhi) based on max cutoff
    multi style sets type-dependent slab boundaries (multilo,multihi)
 ------------------------------------------------------------------------- */
 
 void Comm::setup()
 {
   // cutghost[] = max distance at which ghost atoms need to be acquired
   // for orthogonal:
   //   cutghost is in box coords = neigh->cutghost in all 3 dims
   // for triclinic:
   //   neigh->cutghost = distance between tilted planes in box coords
   //   cutghost is in lamda coords = distance between those planes
   // for multi:
   //   cutghostmulti = same as cutghost, only for each atom type
 
   int i;
   int ntypes = atom->ntypes;
   double *prd,*sublo,*subhi;
 
   double cut = MAX(neighbor->cutneighmax,cutghostuser);
 
   if (triclinic == 0) {
     prd = domain->prd;
     sublo = domain->sublo;
     subhi = domain->subhi;
     cutghost[0] = cutghost[1] = cutghost[2] = cut;
 
     if (style == MULTI) {
       double *cuttype = neighbor->cuttype;
       for (i = 1; i <= ntypes; i++)
         cutghostmulti[i][0] = cutghostmulti[i][1] = cutghostmulti[i][2] =
           cuttype[i];
     }
 
   } else {
     prd = domain->prd_lamda;
     sublo = domain->sublo_lamda;
     subhi = domain->subhi_lamda;
     double *h_inv = domain->h_inv;
     double length0,length1,length2;
     length0 = sqrt(h_inv[0]*h_inv[0] + h_inv[5]*h_inv[5] + h_inv[4]*h_inv[4]);
     cutghost[0] = cut * length0;
     length1 = sqrt(h_inv[1]*h_inv[1] + h_inv[3]*h_inv[3]);
     cutghost[1] = cut * length1;
     length2 = h_inv[2];
     cutghost[2] = cut * length2;
 
     if (style == MULTI) {
       double *cuttype = neighbor->cuttype;
       for (i = 1; i <= ntypes; i++) {
         cutghostmulti[i][0] = cuttype[i] * length0;
         cutghostmulti[i][1] = cuttype[i] * length1;
         cutghostmulti[i][2] = cuttype[i] * length2;
       }
     }
   }
 
   // recvneed[idim][0/1] = # of procs away I recv atoms from, within cutghost
   //   0 = from left, 1 = from right
   //   do not cross non-periodic boundaries, need[2] = 0 for 2d
   // sendneed[idim][0/1] = # of procs away I send atoms to
   //   0 = to left, 1 = to right
   //   set equal to recvneed[idim][1/0] of neighbor proc
   // maxneed[idim] = max procs away any proc recvs atoms in either direction
   // uniform = 1 = uniform sized sub-domains:
   //   maxneed is directly computable from sub-domain size
   //     limit to procgrid-1 for non-PBC
   //   recvneed = maxneed except for procs near non-PBC
   //   sendneed = recvneed of neighbor on each side
   // uniform = 0 = non-uniform sized sub-domains:
   //   compute recvneed via updown() which accounts for non-PBC
   //   sendneed = recvneed of neighbor on each side
   //   maxneed via Allreduce() of recvneed
 
   int *periodicity = domain->periodicity;
   int left,right;
 
   if (uniform) {
     maxneed[0] = static_cast<int> (cutghost[0] * procgrid[0] / prd[0]) + 1;
     maxneed[1] = static_cast<int> (cutghost[1] * procgrid[1] / prd[1]) + 1;
     maxneed[2] = static_cast<int> (cutghost[2] * procgrid[2] / prd[2]) + 1;
     if (domain->dimension == 2) maxneed[2] = 0;
     if (!periodicity[0]) maxneed[0] = MIN(maxneed[0],procgrid[0]-1);
     if (!periodicity[1]) maxneed[1] = MIN(maxneed[1],procgrid[1]-1);
     if (!periodicity[2]) maxneed[2] = MIN(maxneed[2],procgrid[2]-1);
 
     if (!periodicity[0]) {
       recvneed[0][0] = MIN(maxneed[0],myloc[0]);
       recvneed[0][1] = MIN(maxneed[0],procgrid[0]-myloc[0]-1);
       left = myloc[0] - 1;
       if (left < 0) left = procgrid[0] - 1;
       sendneed[0][0] = MIN(maxneed[0],procgrid[0]-left-1);
       right = myloc[0] + 1;
       if (right == procgrid[0]) right = 0;
       sendneed[0][1] = MIN(maxneed[0],right);
     } else recvneed[0][0] = recvneed[0][1] =
              sendneed[0][0] = sendneed[0][1] = maxneed[0];
 
     if (!periodicity[1]) {
       recvneed[1][0] = MIN(maxneed[1],myloc[1]);
       recvneed[1][1] = MIN(maxneed[1],procgrid[1]-myloc[1]-1);
       left = myloc[1] - 1;
       if (left < 0) left = procgrid[1] - 1;
       sendneed[1][0] = MIN(maxneed[1],procgrid[1]-left-1);
       right = myloc[1] + 1;
       if (right == procgrid[1]) right = 0;
       sendneed[1][1] = MIN(maxneed[1],right);
     } else recvneed[1][0] = recvneed[1][1] =
              sendneed[1][0] = sendneed[1][1] = maxneed[1];
 
     if (!periodicity[2]) {
       recvneed[2][0] = MIN(maxneed[2],myloc[2]);
       recvneed[2][1] = MIN(maxneed[2],procgrid[2]-myloc[2]-1);
       left = myloc[2] - 1;
       if (left < 0) left = procgrid[2] - 1;
       sendneed[2][0] = MIN(maxneed[2],procgrid[2]-left-1);
       right = myloc[2] + 1;
       if (right == procgrid[2]) right = 0;
       sendneed[2][1] = MIN(maxneed[2],right);
     } else recvneed[2][0] = recvneed[2][1] =
              sendneed[2][0] = sendneed[2][1] = maxneed[2];
 
   } else {
     recvneed[0][0] = updown(0,0,myloc[0],prd[0],periodicity[0],xsplit);
     recvneed[0][1] = updown(0,1,myloc[0],prd[0],periodicity[0],xsplit);
     left = myloc[0] - 1;
     if (left < 0) left = procgrid[0] - 1;
     sendneed[0][0] = updown(0,1,left,prd[0],periodicity[0],xsplit);
     right = myloc[0] + 1;
     if (right == procgrid[0]) right = 0;
     sendneed[0][1] = updown(0,0,right,prd[0],periodicity[0],xsplit);
 
     recvneed[1][0] = updown(1,0,myloc[1],prd[1],periodicity[1],ysplit);
     recvneed[1][1] = updown(1,1,myloc[1],prd[1],periodicity[1],ysplit);
     left = myloc[1] - 1;
     if (left < 0) left = procgrid[1] - 1;
     sendneed[1][0] = updown(1,1,left,prd[1],periodicity[1],ysplit);
     right = myloc[1] + 1;
     if (right == procgrid[1]) right = 0;
     sendneed[1][1] = updown(1,0,right,prd[1],periodicity[1],ysplit);
 
     if (domain->dimension == 3) {
       recvneed[2][0] = updown(2,0,myloc[2],prd[2],periodicity[2],zsplit);
       recvneed[2][1] = updown(2,1,myloc[2],prd[2],periodicity[2],zsplit);
       left = myloc[2] - 1;
       if (left < 0) left = procgrid[2] - 1;
       sendneed[2][0] = updown(2,1,left,prd[2],periodicity[2],zsplit);
       right = myloc[2] + 1;
       if (right == procgrid[2]) right = 0;
       sendneed[2][1] = updown(2,0,right,prd[2],periodicity[2],zsplit);
     } else recvneed[2][0] = recvneed[2][1] =
              sendneed[2][0] = sendneed[2][1] = 0;
 
     int all[6];
     MPI_Allreduce(&recvneed[0][0],all,6,MPI_INT,MPI_MAX,world);
     maxneed[0] = MAX(all[0],all[1]);
     maxneed[1] = MAX(all[2],all[3]);
     maxneed[2] = MAX(all[4],all[5]);
     //if (me == 0) 
     //printf("MAXNEED %d %d %d\n",maxneed[0],maxneed[1],maxneed[2]);
   }
 
   // allocate comm memory
 
   nswap = 2 * (maxneed[0]+maxneed[1]+maxneed[2]);
   if (nswap > maxswap) grow_swap(nswap);
 
   // setup parameters for each exchange:
   // sendproc = proc to send to at each swap
   // recvproc = proc to recv from at each swap
   // for style SINGLE:
   //   slablo/slabhi = boundaries for slab of atoms to send at each swap
   //   use -BIG/midpt/BIG to insure all atoms included even if round-off occurs
   //   if round-off, atoms recvd across PBC can be < or > than subbox boundary
   //   note that borders() only loops over subset of atoms during each swap
   //   treat all as PBC here, non-PBC is handled in borders() via r/s need[][]
   // for style MULTI:
   //   multilo/multihi is same, with slablo/slabhi for each atom type
   // pbc_flag: 0 = nothing across a boundary, 1 = something across a boundary
   // pbc = -1/0/1 for PBC factor in each of 3/6 orthogonal/triclinic dirs
   // for triclinic, slablo/hi and pbc_border will be used in lamda (0-1) coords
   // 1st part of if statement is sending to the west/south/down
   // 2nd part of if statement is sending to the east/north/up
 
   int dim,ineed;
 
   int iswap = 0;
   for (dim = 0; dim < 3; dim++) {
     for (ineed = 0; ineed < 2*maxneed[dim]; ineed++) {
       pbc_flag[iswap] = 0;
       pbc[iswap][0] = pbc[iswap][1] = pbc[iswap][2] =
         pbc[iswap][3] = pbc[iswap][4] = pbc[iswap][5] = 0;
 
       if (ineed % 2 == 0) {
         sendproc[iswap] = procneigh[dim][0];
         recvproc[iswap] = procneigh[dim][1];
         if (style == SINGLE) {
           if (ineed < 2) slablo[iswap] = -BIG;
           else slablo[iswap] = 0.5 * (sublo[dim] + subhi[dim]);
           slabhi[iswap] = sublo[dim] + cutghost[dim];
         } else {
           for (i = 1; i <= ntypes; i++) {
             if (ineed < 2) multilo[iswap][i] = -BIG;
             else multilo[iswap][i] = 0.5 * (sublo[dim] + subhi[dim]);
             multihi[iswap][i] = sublo[dim] + cutghostmulti[i][dim];
           }
         }
         if (myloc[dim] == 0) {
           pbc_flag[iswap] = 1;
           pbc[iswap][dim] = 1;
           if (triclinic) {
             if (dim == 1) pbc[iswap][5] = 1;
             else if (dim == 2) pbc[iswap][4] = pbc[iswap][3] = 1;
           }
         }
 
       } else {
         sendproc[iswap] = procneigh[dim][1];
         recvproc[iswap] = procneigh[dim][0];
         if (style == SINGLE) {
           slablo[iswap] = subhi[dim] - cutghost[dim];
           if (ineed < 2) slabhi[iswap] = BIG;
           else slabhi[iswap] = 0.5 * (sublo[dim] + subhi[dim]);
         } else {
           for (i = 1; i <= ntypes; i++) {
             multilo[iswap][i] = subhi[dim] - cutghostmulti[i][dim];
             if (ineed < 2) multihi[iswap][i] = BIG;
             else multihi[iswap][i] = 0.5 * (sublo[dim] + subhi[dim]);
           }
         }
         if (myloc[dim] == procgrid[dim]-1) {
           pbc_flag[iswap] = 1;
           pbc[iswap][dim] = -1;
           if (triclinic) {
             if (dim == 1) pbc[iswap][5] = -1;
             else if (dim == 2) pbc[iswap][4] = pbc[iswap][3] = -1;
           }
         }
       }
 
       iswap++;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    walk up/down the extent of nearby processors in dim and dir
    loc = myloc of proc to start at
    dir = 0/1 = walk to left/right
    do not cross non-periodic boundaries
    is not called for z dim in 2d
    return how many procs away are needed to encompass cutghost away from loc
 ------------------------------------------------------------------------- */
 
 int Comm::updown(int dim, int dir, int loc,
                  double prd, int periodicity, double *split)
 {
   int index,count;
   double frac,delta;
 
   if (dir == 0) {
     frac = cutghost[dim]/prd;
     index = loc - 1;
     delta = 0.0;
     count = 0;
     while (delta < frac) {
       if (index < 0) {
         if (!periodicity) break;
         index = procgrid[dim] - 1;
       }
       count++;
       delta += split[index+1] - split[index];
       index--;
     }
 
   } else {
     frac = cutghost[dim]/prd;
     index = loc + 1;
     delta = 0.0;
     count = 0;
     while (delta < frac) {
       if (index >= procgrid[dim]) {
         if (!periodicity) break;
         index = 0;
       }
       count++;
       delta += split[index+1] - split[index];
       index++;
     }
   }
 
   return count;
 }
 
 /* ----------------------------------------------------------------------
    forward communication of atom coords every timestep
    other per-atom attributes may also be sent via pack/unpack routines
 ------------------------------------------------------------------------- */
 
 void Comm::forward_comm(int dummy)
 {
   int n;
   MPI_Request request;
   MPI_Status status;
   AtomVec *avec = atom->avec;
   double **x = atom->x;
   double *buf;
 
   // exchange data with another proc
   // if other proc is self, just copy
   // if comm_x_only set, exchange or copy directly to x, don't unpack
 
   for (int iswap = 0; iswap < nswap; iswap++) {
     if (sendproc[iswap] != me) {
       if (comm_x_only) {
         if (size_forward_recv[iswap]) buf = x[firstrecv[iswap]];
         else buf = NULL;
         if (size_forward_recv[iswap])
           MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
                     recvproc[iswap],0,world,&request);
         n = avec->pack_comm(sendnum[iswap],sendlist[iswap],
                             buf_send,pbc_flag[iswap],pbc[iswap]);
         if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
         if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
       } else if (ghost_velocity) {
         if (size_forward_recv[iswap])
           MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE,
                     recvproc[iswap],0,world,&request);
         n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap],
                                 buf_send,pbc_flag[iswap],pbc[iswap]);
         if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
         if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
         avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_recv);
       } else {
         if (size_forward_recv[iswap])
           MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE,
                     recvproc[iswap],0,world,&request);
         n = avec->pack_comm(sendnum[iswap],sendlist[iswap],
                             buf_send,pbc_flag[iswap],pbc[iswap]);
         if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
         if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
         avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_recv);
       }
 
     } else {
       if (comm_x_only) {
         if (sendnum[iswap])
           n = avec->pack_comm(sendnum[iswap],sendlist[iswap],
                               x[firstrecv[iswap]],pbc_flag[iswap],
                               pbc[iswap]);
       } else if (ghost_velocity) {
         n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap],
                                 buf_send,pbc_flag[iswap],pbc[iswap]);
         avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_send);
       } else {
         n = avec->pack_comm(sendnum[iswap],sendlist[iswap],
                             buf_send,pbc_flag[iswap],pbc[iswap]);
         avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_send);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    reverse communication of forces on atoms every timestep
    other per-atom attributes may also be sent via pack/unpack routines
 ------------------------------------------------------------------------- */
 
 void Comm::reverse_comm()
 {
   int n;
   MPI_Request request;
   MPI_Status status;
   AtomVec *avec = atom->avec;
   double **f = atom->f;
   double *buf;
 
   // exchange data with another proc
   // if other proc is self, just copy
   // if comm_f_only set, exchange or copy directly from f, don't pack
 
   for (int iswap = nswap-1; iswap >= 0; iswap--) {
     if (sendproc[iswap] != me) {
       if (comm_f_only) {
         if (size_reverse_recv[iswap])
           MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE,
                     sendproc[iswap],0,world,&request);
         if (size_reverse_send[iswap]) buf = f[firstrecv[iswap]];
         else buf = NULL;
         if (size_reverse_send[iswap])
           MPI_Send(buf,size_reverse_send[iswap],MPI_DOUBLE,
                    recvproc[iswap],0,world);
         if (size_reverse_recv[iswap]) MPI_Wait(&request,&status);
       } else {
         if (size_reverse_recv[iswap])
           MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE,
                     sendproc[iswap],0,world,&request);
         n = avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send);
         if (n) MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap],0,world);
         if (size_reverse_recv[iswap]) MPI_Wait(&request,&status);
       }
       avec->unpack_reverse(sendnum[iswap],sendlist[iswap],buf_recv);
 
     } else {
       if (comm_f_only) {
         if (sendnum[iswap])
             avec->unpack_reverse(sendnum[iswap],sendlist[iswap],
                                 f[firstrecv[iswap]]);
       } else {
         n = avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send);
         avec->unpack_reverse(sendnum[iswap],sendlist[iswap],buf_send);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    exchange: move atoms to correct processors
    atoms exchanged with all 6 stencil neighbors
    send out atoms that have left my box, receive ones entering my box
    atoms will be lost if not inside some proc's box
      can happen if atom moves outside of non-periodic bounary
      or if atom moves more than one proc away
    this routine called before every reneighboring
    for triclinic, atoms must be in lamda coords (0-1) before exchange is called
 ------------------------------------------------------------------------- */
 
 void Comm::exchange()
 {
   int i,m,nsend,nrecv,nrecv1,nrecv2,nlocal;
   double lo,hi,value;
   double **x;
   double *sublo,*subhi,*buf;
   MPI_Request request;
   MPI_Status status;
   AtomVec *avec = atom->avec;
 
   // clear global->local map for owned and ghost atoms
   // b/c atoms migrate to new procs in exchange() and
   //   new ghosts are created in borders()
   // map_set() is done at end of borders()
   // clear ghost count and any ghost bonus data internal to AtomVec
 
   if (map_style) atom->map_clear();
   atom->nghost = 0;
   atom->avec->clear_bonus();
 
   // subbox bounds for orthogonal or triclinic
 
   if (triclinic == 0) {
     sublo = domain->sublo;
     subhi = domain->subhi;
   } else {
     sublo = domain->sublo_lamda;
     subhi = domain->subhi_lamda;
   }
 
   // loop over dimensions
 
   for (int dim = 0; dim < 3; dim++) {
 
     // fill buffer with atoms leaving my box, using < and >=
     // when atom is deleted, fill it in with last atom
 
     x = atom->x;
     lo = sublo[dim];
     hi = subhi[dim];
     nlocal = atom->nlocal;
     i = nsend = 0;
 
     while (i < nlocal) {
       if (x[i][dim] < lo || x[i][dim] >= hi) {
         if (nsend > maxsend) grow_send(nsend,1);
         nsend += avec->pack_exchange(i,&buf_send[nsend]);
         avec->copy(nlocal-1,i,1);
         nlocal--;
       } else i++;
     }
     atom->nlocal = nlocal;
 
     // send/recv atoms in both directions
     // if 1 proc in dimension, no send/recv, set recv buf to send buf
     // if 2 procs in dimension, single send/recv
     // if more than 2 procs in dimension, send/recv to both neighbors
 
     if (procgrid[dim] == 1) {
       nrecv = nsend;
       buf = buf_send;
 
     } else {
       MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0,
                    &nrecv1,1,MPI_INT,procneigh[dim][1],0,world,&status);
       nrecv = nrecv1;
       if (procgrid[dim] > 2) {
         MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][1],0,
                      &nrecv2,1,MPI_INT,procneigh[dim][0],0,world,&status);
         nrecv += nrecv2;
       }
       if (nrecv > maxrecv) grow_recv(nrecv);
 
       MPI_Irecv(buf_recv,nrecv1,MPI_DOUBLE,procneigh[dim][1],0,
                 world,&request);
       MPI_Send(buf_send,nsend,MPI_DOUBLE,procneigh[dim][0],0,world);
       MPI_Wait(&request,&status);
 
       if (procgrid[dim] > 2) {
         MPI_Irecv(&buf_recv[nrecv1],nrecv2,MPI_DOUBLE,procneigh[dim][0],0,
                   world,&request);
         MPI_Send(buf_send,nsend,MPI_DOUBLE,procneigh[dim][1],0,world);
         MPI_Wait(&request,&status);
       }
 
       buf = buf_recv;
     }
 
     // check incoming atoms to see if they are in my box
     // if so, add to my list
 
     m = 0;
     while (m < nrecv) {
       value = buf[m+dim+1];
       if (value >= lo && value < hi) m += avec->unpack_exchange(&buf[m]);
       else m += static_cast<int> (buf[m]);
     }
   }
 
   if (atom->firstgroupname) atom->first_reorder();
 }
 
 /* ----------------------------------------------------------------------
    borders: list nearby atoms to send to neighboring procs at every timestep
    one list is created for every swap that will be made
    as list is made, actually do swaps
    this does equivalent of a communicate, so don't need to explicitly
      call communicate routine on reneighboring timestep
    this routine is called before every reneighboring
    for triclinic, atoms must be in lamda coords (0-1) before borders is called
 ------------------------------------------------------------------------- */
 
 void Comm::borders()
 {
   int i,n,itype,iswap,dim,ineed,twoneed,smax,rmax;
   int nsend,nrecv,sendflag,nfirst,nlast,ngroup;
   double lo,hi;
   int *type;
   double **x;
   double *buf,*mlo,*mhi;
   MPI_Request request;
   MPI_Status status;
   AtomVec *avec = atom->avec;
 
   // do swaps over all 3 dimensions
 
   iswap = 0;
   smax = rmax = 0;
 
   for (dim = 0; dim < 3; dim++) {
     nlast = 0;
     twoneed = 2*maxneed[dim];
     for (ineed = 0; ineed < twoneed; ineed++) {
 
       // find atoms within slab boundaries lo/hi using <= and >=
       // check atoms between nfirst and nlast
       //   for first swaps in a dim, check owned and ghost
       //   for later swaps in a dim, only check newly arrived ghosts
       // store sent atom indices in list for use in future timesteps
 
       x = atom->x;
       if (style == SINGLE) {
         lo = slablo[iswap];
         hi = slabhi[iswap];
       } else {
         type = atom->type;
         mlo = multilo[iswap];
         mhi = multihi[iswap];
       }
       if (ineed % 2 == 0) {
         nfirst = nlast;
         nlast = atom->nlocal + atom->nghost;
       }
 
       nsend = 0;
 
       // sendflag = 0 if I do not send on this swap
       // sendneed test indicates receiver no longer requires data
       // e.g. due to non-PBC or non-uniform sub-domains
 
       if (ineed/2 >= sendneed[dim][ineed % 2]) sendflag = 0;
       else sendflag = 1;
 
       // find send atoms according to SINGLE vs MULTI
       // all atoms eligible versus atoms in bordergroup
       // only need to limit loop to bordergroup for first sends (ineed < 2)
       // on these sends, break loop in two: owned (in group) and ghost
 
       if (sendflag) {
         if (!bordergroup || ineed >= 2) {
           if (style == SINGLE) {
             for (i = nfirst; i < nlast; i++)
               if (x[i][dim] >= lo && x[i][dim] <= hi) {
                 if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
                 sendlist[iswap][nsend++] = i;
               }
           } else {
             for (i = nfirst; i < nlast; i++) {
               itype = type[i];
               if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
                 if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
                 sendlist[iswap][nsend++] = i;
               }
             }
           }
 
         } else {
           if (style == SINGLE) {
             ngroup = atom->nfirst;
             for (i = 0; i < ngroup; i++)
               if (x[i][dim] >= lo && x[i][dim] <= hi) {
                 if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
                 sendlist[iswap][nsend++] = i;
               }
             for (i = atom->nlocal; i < nlast; i++)
               if (x[i][dim] >= lo && x[i][dim] <= hi) {
                 if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
                 sendlist[iswap][nsend++] = i;
               }
           } else {
             ngroup = atom->nfirst;
             for (i = 0; i < ngroup; i++) {
               itype = type[i];
               if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
                 if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
                 sendlist[iswap][nsend++] = i;
               }
             }
             for (i = atom->nlocal; i < nlast; i++) {
               itype = type[i];
               if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
                 if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
                 sendlist[iswap][nsend++] = i;
               }
             }
           }
         }
       }
 
       // pack up list of border atoms
 
       if (nsend*size_border > maxsend)
         grow_send(nsend*size_border,0);
       if (ghost_velocity)
         n = avec->pack_border_vel(nsend,sendlist[iswap],buf_send,
                                   pbc_flag[iswap],pbc[iswap]);
       else
         n = avec->pack_border(nsend,sendlist[iswap],buf_send,
                               pbc_flag[iswap],pbc[iswap]);
 
       // swap atoms with other proc
       // no MPI calls except SendRecv if nsend/nrecv = 0
       // put incoming ghosts at end of my atom arrays
       // if swapping with self, simply copy, no messages
 
       if (sendproc[iswap] != me) {
         MPI_Sendrecv(&nsend,1,MPI_INT,sendproc[iswap],0,
                      &nrecv,1,MPI_INT,recvproc[iswap],0,world,&status);
         if (nrecv*size_border > maxrecv) grow_recv(nrecv*size_border);
         if (nrecv) MPI_Irecv(buf_recv,nrecv*size_border,MPI_DOUBLE,
                              recvproc[iswap],0,world,&request);
         if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
         if (nrecv) MPI_Wait(&request,&status);
         buf = buf_recv;
       } else {
         nrecv = nsend;
         buf = buf_send;
       }
 
       // unpack buffer
 
       if (ghost_velocity)
         avec->unpack_border_vel(nrecv,atom->nlocal+atom->nghost,buf);
       else
         avec->unpack_border(nrecv,atom->nlocal+atom->nghost,buf);
 
       // set all pointers & counters
 
       smax = MAX(smax,nsend);
       rmax = MAX(rmax,nrecv);
       sendnum[iswap] = nsend;
       recvnum[iswap] = nrecv;
       size_forward_recv[iswap] = nrecv*size_forward;
       size_reverse_send[iswap] = nrecv*size_reverse;
       size_reverse_recv[iswap] = nsend*size_reverse;
       firstrecv[iswap] = atom->nlocal + atom->nghost;
       atom->nghost += nrecv;
       iswap++;
     }
   }
 
   // insure send/recv buffers are long enough for all forward & reverse comm
 
   int max = MAX(maxforward*smax,maxreverse*rmax);
   if (max > maxsend) grow_send(max,0);
   max = MAX(maxforward*rmax,maxreverse*smax);
   if (max > maxrecv) grow_recv(max);
 
   // reset global->local map
 
   if (map_style) atom->map_set();
 }
 
 /* ----------------------------------------------------------------------
    forward communication invoked by a Pair
 ------------------------------------------------------------------------- */
 
 void Comm::forward_comm_pair(Pair *pair)
 {
   int iswap,n;
   double *buf;
   MPI_Request request;
   MPI_Status status;
 
   for (iswap = 0; iswap < nswap; iswap++) {
 
     // pack buffer
 
     n = pair->pack_comm(sendnum[iswap],sendlist[iswap],
                         buf_send,pbc_flag[iswap],pbc[iswap]);
 
     // exchange with another proc
     // if self, set recv buffer to send buffer
 
     if (sendproc[iswap] != me) {
       if (recvnum[iswap])
         MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
                   world,&request);
       if (sendnum[iswap])
         MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
       if (recvnum[iswap]) MPI_Wait(&request,&status);
       buf = buf_recv;
     } else buf = buf_send;
 
     // unpack buffer
 
     pair->unpack_comm(recvnum[iswap],firstrecv[iswap],buf);
   }
 }
 
 /* ----------------------------------------------------------------------
    reverse communication invoked by a Pair
 ------------------------------------------------------------------------- */
 
 void Comm::reverse_comm_pair(Pair *pair)
 {
   int iswap,n;
   double *buf;
   MPI_Request request;
   MPI_Status status;
 
   for (iswap = nswap-1; iswap >= 0; iswap--) {
 
     // pack buffer
 
     n = pair->pack_reverse_comm(recvnum[iswap],firstrecv[iswap],buf_send);
 
     // exchange with another proc
     // if self, set recv buffer to send buffer
 
     if (sendproc[iswap] != me) {
       if (sendnum[iswap])
         MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
                   world,&request);
       if (recvnum[iswap])
         MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
       if (sendnum[iswap]) MPI_Wait(&request,&status);
       buf = buf_recv;
     } else buf = buf_send;
 
     // unpack buffer
 
     pair->unpack_reverse_comm(sendnum[iswap],sendlist[iswap],buf);
   }
 }
 
 /* ----------------------------------------------------------------------
    forward communication invoked by a Fix
    n = constant number of datums per atom
 ------------------------------------------------------------------------- */
 
 void Comm::forward_comm_fix(Fix *fix)
 {
   int iswap,n;
   double *buf;
   MPI_Request request;
   MPI_Status status;
 
   for (iswap = 0; iswap < nswap; iswap++) {
 
     // pack buffer
 
     n = fix->pack_comm(sendnum[iswap],sendlist[iswap],
                        buf_send,pbc_flag[iswap],pbc[iswap]);
 
     // exchange with another proc
     // if self, set recv buffer to send buffer
 
     if (sendproc[iswap] != me) {
       if (recvnum[iswap])
         MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
                   world,&request);
       if (sendnum[iswap])
         MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
       if (recvnum[iswap]) MPI_Wait(&request,&status);
       buf = buf_recv;
     } else buf = buf_send;
 
     // unpack buffer
 
     fix->unpack_comm(recvnum[iswap],firstrecv[iswap],buf);
   }
 }
 
 /* ----------------------------------------------------------------------
    reverse communication invoked by a Fix
    n = constant number of datums per atom
 ------------------------------------------------------------------------- */
 
 void Comm::reverse_comm_fix(Fix *fix)
 {
   int iswap,n;
   double *buf;
   MPI_Request request;
   MPI_Status status;
 
   for (iswap = nswap-1; iswap >= 0; iswap--) {
 
     // pack buffer
 
     n = fix->pack_reverse_comm(recvnum[iswap],firstrecv[iswap],buf_send);
 
     // exchange with another proc
     // if self, set recv buffer to send buffer
 
     if (sendproc[iswap] != me) {
       if (sendnum[iswap])
         MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
                   world,&request);
       if (recvnum[iswap])
         MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
       if (sendnum[iswap]) MPI_Wait(&request,&status);
       buf = buf_recv;
     } else buf = buf_send;
 
     // unpack buffer
 
     fix->unpack_reverse_comm(sendnum[iswap],sendlist[iswap],buf);
   }
 }
 
 /* ----------------------------------------------------------------------
    forward communication invoked by a Fix
    n = total datums for all atoms, allows for variable number/atom
 ------------------------------------------------------------------------- */
 
 void Comm::forward_comm_variable_fix(Fix *fix)
 {
   int iswap,n;
   double *buf;
   MPI_Request request;
   MPI_Status status;
 
   for (iswap = 0; iswap < nswap; iswap++) {
 
     // pack buffer
 
     n = fix->pack_comm(sendnum[iswap],sendlist[iswap],
                        buf_send,pbc_flag[iswap],pbc[iswap]);
 
     // exchange with another proc
     // if self, set recv buffer to send buffer
 
     if (sendproc[iswap] != me) {
       if (recvnum[iswap])
         MPI_Irecv(buf_recv,maxrecv,MPI_DOUBLE,recvproc[iswap],0,
                   world,&request);
       if (sendnum[iswap])
         MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
       if (recvnum[iswap]) MPI_Wait(&request,&status);
       buf = buf_recv;
     } else buf = buf_send;
 
     // unpack buffer
 
     fix->unpack_comm(recvnum[iswap],firstrecv[iswap],buf);
   }
 }
 
 /* ----------------------------------------------------------------------
    reverse communication invoked by a Fix
    n = total datums for all atoms, allows for variable number/atom
 ------------------------------------------------------------------------- */
 
 void Comm::reverse_comm_variable_fix(Fix *fix)
 {
   int iswap,n;
   double *buf;
   MPI_Request request;
   MPI_Status status;
 
   for (iswap = nswap-1; iswap >= 0; iswap--) {
 
     // pack buffer
 
     n = fix->pack_reverse_comm(recvnum[iswap],firstrecv[iswap],buf_send);
 
     // exchange with another proc
     // if self, set recv buffer to send buffer
 
     if (sendproc[iswap] != me) {
       if (sendnum[iswap])
         MPI_Irecv(buf_recv,maxrecv,MPI_DOUBLE,sendproc[iswap],0,
                   world,&request);
       if (recvnum[iswap])
         MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap],0,world);
       if (sendnum[iswap]) MPI_Wait(&request,&status);
       buf = buf_recv;
     } else buf = buf_send;
 
     // unpack buffer
 
     fix->unpack_reverse_comm(sendnum[iswap],sendlist[iswap],buf);
   }
 }
 
 /* ----------------------------------------------------------------------
    forward communication invoked by a Compute
 ------------------------------------------------------------------------- */
 
 void Comm::forward_comm_compute(Compute *compute)
 {
   int iswap,n;
   double *buf;
   MPI_Request request;
   MPI_Status status;
 
   for (iswap = 0; iswap < nswap; iswap++) {
 
     // pack buffer
 
     n = compute->pack_comm(sendnum[iswap],sendlist[iswap],
                            buf_send,pbc_flag[iswap],pbc[iswap]);
 
     // exchange with another proc
     // if self, set recv buffer to send buffer
 
     if (sendproc[iswap] != me) {
       if (recvnum[iswap])
         MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
                   world,&request);
       if (sendnum[iswap])
         MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
       if (recvnum[iswap]) MPI_Wait(&request,&status);
       buf = buf_recv;
     } else buf = buf_send;
 
     // unpack buffer
 
     compute->unpack_comm(recvnum[iswap],firstrecv[iswap],buf);
   }
 }
 
 /* ----------------------------------------------------------------------
    reverse communication invoked by a Compute
 ------------------------------------------------------------------------- */
 
 void Comm::reverse_comm_compute(Compute *compute)
 {
   int iswap,n;
   double *buf;
   MPI_Request request;
   MPI_Status status;
 
   for (iswap = nswap-1; iswap >= 0; iswap--) {
 
     // pack buffer
 
     n = compute->pack_reverse_comm(recvnum[iswap],firstrecv[iswap],buf_send);
 
     // exchange with another proc
     // if self, set recv buffer to send buffer
 
     if (sendproc[iswap] != me) {
       if (sendnum[iswap])
         MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
                   world,&request);
       if (recvnum[iswap])
         MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
       if (sendnum[iswap]) MPI_Wait(&request,&status);
       buf = buf_recv;
     } else buf = buf_send;
 
     // unpack buffer
 
     compute->unpack_reverse_comm(sendnum[iswap],sendlist[iswap],buf);
   }
 }
 
 /* ----------------------------------------------------------------------
    forward communication invoked by a Dump
 ------------------------------------------------------------------------- */
 
 void Comm::forward_comm_dump(Dump *dump)
 {
   int iswap,n;
   double *buf;
   MPI_Request request;
   MPI_Status status;
 
   for (iswap = 0; iswap < nswap; iswap++) {
 
     // pack buffer
 
     n = dump->pack_comm(sendnum[iswap],sendlist[iswap],
                         buf_send,pbc_flag[iswap],pbc[iswap]);
 
     // exchange with another proc
     // if self, set recv buffer to send buffer
 
     if (sendproc[iswap] != me) {
       if (recvnum[iswap])
         MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
                   world,&request);
       if (sendnum[iswap])
         MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
       if (recvnum[iswap]) MPI_Wait(&request,&status);
       buf = buf_recv;
     } else buf = buf_send;
 
     // unpack buffer
 
     dump->unpack_comm(recvnum[iswap],firstrecv[iswap],buf);
   }
 }
 
 /* ----------------------------------------------------------------------
    reverse communication invoked by a Dump
 ------------------------------------------------------------------------- */
 
 void Comm::reverse_comm_dump(Dump *dump)
 {
   int iswap,n;
   double *buf;
   MPI_Request request;
   MPI_Status status;
 
   for (iswap = nswap-1; iswap >= 0; iswap--) {
 
     // pack buffer
 
     n = dump->pack_reverse_comm(recvnum[iswap],firstrecv[iswap],buf_send);
 
     // exchange with another proc
     // if self, set recv buffer to send buffer
 
     if (sendproc[iswap] != me) {
       if (sendnum[iswap])
         MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
                   world,&request);
       if (recvnum[iswap])
         MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
       if (sendnum[iswap]) MPI_Wait(&request,&status);
       buf = buf_recv;
     } else buf = buf_send;
 
     // unpack buffer
 
     dump->unpack_reverse_comm(sendnum[iswap],sendlist[iswap],buf);
   }
 }
 
 /* ----------------------------------------------------------------------
    forward communication of N values in array
 ------------------------------------------------------------------------- */
 
 void Comm::forward_comm_array(int n, double **array)
 {
   int i,j,k,m,iswap,last;
   double *buf;
   MPI_Request request;
   MPI_Status status;
 
   // check that buf_send and buf_recv are big enough
 
 
 
 
   for (iswap = 0; iswap < nswap; iswap++) {
 
     // pack buffer
 
     m = 0;
     for (i = 0; i < sendnum[iswap]; i++) {
       j = sendlist[iswap][i];
       for (k = 0; k < n; k++)
         buf_send[m++] = array[j][k];
     }
 
     // exchange with another proc
     // if self, set recv buffer to send buffer
 
     if (sendproc[iswap] != me) {
       if (recvnum[iswap])
         MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
                   world,&request);
       if (sendnum[iswap])
         MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
       if (recvnum[iswap]) MPI_Wait(&request,&status);
       buf = buf_recv;
     } else buf = buf_send;
 
     // unpack buffer
 
     m = 0;
     last = firstrecv[iswap] + recvnum[iswap];
     for (i = firstrecv[iswap]; i < last; i++)
       for (k = 0; k < n; k++)
         array[i][k] = buf[m++];
   }
 }
 
 /* ----------------------------------------------------------------------
    communicate inbuf around full ring of processors with messtag
    nbytes = size of inbuf = n datums * nper bytes
    callback() is invoked to allow caller to process/update each proc's inbuf
-   note that callback() is invoked on final iteration for original inbuf
+   if self=1 (default), then callback() is invoked on final iteration
+     using original inbuf, which may have been updated
    for non-NULL outbuf, final updated inbuf is copied to it
-   outbuf = inbuf is OK
+     outbuf = inbuf is OK
 ------------------------------------------------------------------------- */
 
 void Comm::ring(int n, int nper, void *inbuf, int messtag,
                 void (*callback)(int, char *), void *outbuf, int self)
 {
   MPI_Request request;
   MPI_Status status;
 
   int nbytes = n*nper;
   int maxbytes;
   MPI_Allreduce(&nbytes,&maxbytes,1,MPI_INT,MPI_MAX,world);
 
   char *buf,*bufcopy;
   memory->create(buf,maxbytes,"comm:buf");
   memory->create(bufcopy,maxbytes,"comm:bufcopy");
   memcpy(buf,inbuf,nbytes);
 
   int next = me + 1;
   int prev = me - 1;
   if (next == nprocs) next = 0;
   if (prev < 0) prev = nprocs - 1;
 
   for (int loop = 0; loop < nprocs; loop++) {
     if (me != next) {
       MPI_Irecv(bufcopy,maxbytes,MPI_CHAR,prev,messtag,world,&request);
       MPI_Send(buf,nbytes,MPI_CHAR,next,messtag,world);
       MPI_Wait(&request,&status);
       MPI_Get_count(&status,MPI_CHAR,&nbytes);
       memcpy(buf,bufcopy,nbytes);
     }
     if (self || loop < nprocs-1) callback(nbytes/nper,buf);
   }
 
   if (outbuf) memcpy(outbuf,buf,nbytes);
 
   memory->destroy(buf);
   memory->destroy(bufcopy);
 }
 
 /* ----------------------------------------------------------------------
    realloc the size of the send buffer as needed with BUFFACTOR & BUFEXTRA
    if flag = 1, realloc
    if flag = 0, don't need to realloc with copy, just free/malloc
 ------------------------------------------------------------------------- */
 
 void Comm::grow_send(int n, int flag)
 {
   maxsend = static_cast<int> (BUFFACTOR * n);
   if (flag)
     memory->grow(buf_send,(maxsend+BUFEXTRA),"comm:buf_send");
   else {
     memory->destroy(buf_send);
     memory->create(buf_send,maxsend+BUFEXTRA,"comm:buf_send");
   }
 }
 
 /* ----------------------------------------------------------------------
    free/malloc the size of the recv buffer as needed with BUFFACTOR
 ------------------------------------------------------------------------- */
 
 void Comm::grow_recv(int n)
 {
   maxrecv = static_cast<int> (BUFFACTOR * n);
   memory->destroy(buf_recv);
   memory->create(buf_recv,maxrecv,"comm:buf_recv");
 }
 
 /* ----------------------------------------------------------------------
    realloc the size of the iswap sendlist as needed with BUFFACTOR
 ------------------------------------------------------------------------- */
 
 void Comm::grow_list(int iswap, int n)
 {
   maxsendlist[iswap] = static_cast<int> (BUFFACTOR * n);
   memory->grow(sendlist[iswap],maxsendlist[iswap],"comm:sendlist[iswap]");
 }
 
 /* ----------------------------------------------------------------------
    realloc the buffers needed for swaps
 ------------------------------------------------------------------------- */
 
 void Comm::grow_swap(int n)
 {
   free_swap();
   allocate_swap(n);
   if (style == MULTI) {
     free_multi();
     allocate_multi(n);
   }
 
   sendlist = (int **)
     memory->srealloc(sendlist,n*sizeof(int *),"comm:sendlist");
   memory->grow(maxsendlist,n,"comm:maxsendlist");
   for (int i = maxswap; i < n; i++) {
     maxsendlist[i] = BUFMIN;
     memory->create(sendlist[i],BUFMIN,"comm:sendlist[i]");
   }
   maxswap = n;
 }
 
 /* ----------------------------------------------------------------------
    allocation of swap info
 ------------------------------------------------------------------------- */
 
 void Comm::allocate_swap(int n)
 {
   memory->create(sendnum,n,"comm:sendnum");
   memory->create(recvnum,n,"comm:recvnum");
   memory->create(sendproc,n,"comm:sendproc");
   memory->create(recvproc,n,"comm:recvproc");
   memory->create(size_forward_recv,n,"comm:size");
   memory->create(size_reverse_send,n,"comm:size");
   memory->create(size_reverse_recv,n,"comm:size");
   memory->create(slablo,n,"comm:slablo");
   memory->create(slabhi,n,"comm:slabhi");
   memory->create(firstrecv,n,"comm:firstrecv");
   memory->create(pbc_flag,n,"comm:pbc_flag");
   memory->create(pbc,n,6,"comm:pbc");
 }
 
 /* ----------------------------------------------------------------------
    allocation of multi-type swap info
 ------------------------------------------------------------------------- */
 
 void Comm::allocate_multi(int n)
 {
   multilo = memory->create(multilo,n,atom->ntypes+1,"comm:multilo");
   multihi = memory->create(multihi,n,atom->ntypes+1,"comm:multihi");
 }
 
 /* ----------------------------------------------------------------------
    free memory for swaps
 ------------------------------------------------------------------------- */
 
 void Comm::free_swap()
 {
   memory->destroy(sendnum);
   memory->destroy(recvnum);
   memory->destroy(sendproc);
   memory->destroy(recvproc);
   memory->destroy(size_forward_recv);
   memory->destroy(size_reverse_send);
   memory->destroy(size_reverse_recv);
   memory->destroy(slablo);
   memory->destroy(slabhi);
   memory->destroy(firstrecv);
   memory->destroy(pbc_flag);
   memory->destroy(pbc);
 }
 
 /* ----------------------------------------------------------------------
    free memory for multi-type swaps
 ------------------------------------------------------------------------- */
 
 void Comm::free_multi()
 {
   memory->destroy(multilo);
   memory->destroy(multihi);
 }
 
 /* ----------------------------------------------------------------------
    set communication style
    invoked from input script by communicate command
 ------------------------------------------------------------------------- */
 
 void Comm::set(int narg, char **arg)
 {
   if (narg < 1) error->all(FLERR,"Illegal communicate command");
 
   if (strcmp(arg[0],"single") == 0) style = SINGLE;
   else if (strcmp(arg[0],"multi") == 0) style = MULTI;
   else error->all(FLERR,"Illegal communicate command");
 
   int iarg = 1;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"group") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal communicate command");
       bordergroup = group->find(arg[iarg+1]);
       if (bordergroup < 0)
         error->all(FLERR,"Invalid group in communicate command");
       if (bordergroup && (atom->firstgroupname == NULL ||
                           strcmp(arg[iarg+1],atom->firstgroupname) != 0))
         error->all(FLERR,"Communicate group != atom_modify first group");
       iarg += 2;
     } else if (strcmp(arg[iarg],"cutoff") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal communicate command");
       cutghostuser = atof(arg[iarg+1]);
       if (cutghostuser < 0.0)
         error->all(FLERR,"Invalid cutoff in communicate command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"vel") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal communicate command");
       if (strcmp(arg[iarg+1],"yes") == 0) ghost_velocity = 1;
       else if (strcmp(arg[iarg+1],"no") == 0) ghost_velocity = 0;
       else error->all(FLERR,"Illegal communicate command");
       iarg += 2;
     } else error->all(FLERR,"Illegal communicate command");
   }
 }
 
 /* ----------------------------------------------------------------------
    set dimensions for 3d grid of processors, and associated flags
    invoked from input script by processors command
 ------------------------------------------------------------------------- */
 
 void Comm::set_processors(int narg, char **arg)
 {
   if (narg < 3) error->all(FLERR,"Illegal processors command");
 
   if (strcmp(arg[0],"*") == 0) user_procgrid[0] = 0;
   else user_procgrid[0] = atoi(arg[0]);
   if (strcmp(arg[1],"*") == 0) user_procgrid[1] = 0;
   else user_procgrid[1] = atoi(arg[1]);
   if (strcmp(arg[2],"*") == 0) user_procgrid[2] = 0;
   else user_procgrid[2] = atoi(arg[2]);
 
   if (user_procgrid[0] < 0 || user_procgrid[1] < 0 || user_procgrid[2] < 0)
     error->all(FLERR,"Illegal processors command");
 
   int p = user_procgrid[0]*user_procgrid[1]*user_procgrid[2];
   if (p && p != nprocs)
     error->all(FLERR,"Specified processors != physical processors");
 
   int iarg = 3;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"grid") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal processors command");
 
       if (strcmp(arg[iarg+1],"onelevel") == 0) {
         gridflag = ONELEVEL;
 
       } else if (strcmp(arg[iarg+1],"twolevel") == 0) {
         if (iarg+6 > narg) error->all(FLERR,"Illegal processors command");
         gridflag = TWOLEVEL;
 
         ncores = atoi(arg[iarg+2]);
         if (strcmp(arg[iarg+3],"*") == 0) user_coregrid[0] = 0;
         else user_coregrid[0] = atoi(arg[iarg+3]);
         if (strcmp(arg[iarg+4],"*") == 0) user_coregrid[1] = 0;
         else user_coregrid[1] = atoi(arg[iarg+4]);
         if (strcmp(arg[iarg+5],"*") == 0) user_coregrid[2] = 0;
         else user_coregrid[2] = atoi(arg[iarg+5]);
 
         if (ncores <= 0 || user_coregrid[0] < 0 ||
             user_coregrid[1] < 0 || user_coregrid[2] < 0)
           error->all(FLERR,"Illegal processors command");
         iarg += 4;
 
       } else if (strcmp(arg[iarg+1],"numa") == 0) {
         gridflag = NUMA;
 
       } else if (strcmp(arg[iarg],"custom") == 0) {
         if (iarg+3 > narg) error->all(FLERR,"Illegal processors command");
         gridflag = CUSTOM;
         delete [] customfile;
         int n = strlen(arg[iarg+2]) + 1;
         customfile = new char[n];
         strcpy(customfile,arg[iarg+2]);
         iarg += 1;
 
       } else error->all(FLERR,"Illegal processors command");
       iarg += 2;
 
     } else if (strcmp(arg[iarg],"map") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal processors command");
       if (strcmp(arg[iarg+1],"cart") == 0) mapflag = CART;
       else if (strcmp(arg[iarg+1],"cart/reorder") == 0) mapflag = CARTREORDER;
       else if (strcmp(arg[iarg+1],"xyz") == 0 ||
                strcmp(arg[iarg+1],"xzy") == 0 ||
                strcmp(arg[iarg+1],"yxz") == 0 ||
                strcmp(arg[iarg+1],"yzx") == 0 ||
                strcmp(arg[iarg+1],"zxy") == 0 ||
                strcmp(arg[iarg+1],"zyx") == 0) {
         mapflag = XYZ;
         strcpy(xyz,arg[iarg+1]);
       } else error->all(FLERR,"Illegal processors command");
       iarg += 2;
 
     } else if (strcmp(arg[iarg],"part") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal processors command");
       if (universe->nworlds == 1)
         error->all(FLERR,
                    "Cannot use processors part command "
                    "without using partitions");
       int isend = atoi(arg[iarg+1]);
       int irecv = atoi(arg[iarg+2]);
       if (isend < 1 || isend > universe->nworlds ||
           irecv < 1 || irecv > universe->nworlds || isend == irecv)
         error->all(FLERR,"Invalid partitions in processors part command");
       if (isend-1 == universe->iworld) {
         if (send_to_partition >= 0)
           error->all(FLERR,
                      "Sending partition in processors part command "
                      "is already a sender");
         send_to_partition = irecv-1;
       }
       if (irecv-1 == universe->iworld) {
         if (recv_from_partition >= 0)
           error->all(FLERR,
                      "Receiving partition in processors part command "
                      "is already a receiver");
         recv_from_partition = isend-1;
       }
 
       // only receiver has otherflag dependency
 
       if (strcmp(arg[iarg+3],"multiple") == 0) {
         if (universe->iworld == irecv-1) {
           otherflag = 1;
           other_style = MULTIPLE;
         }
       } else error->all(FLERR,"Illegal processors command");
       iarg += 4;
 
     } else if (strcmp(arg[iarg],"file") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal processors command");
       delete [] outfile;
       int n = strlen(arg[iarg+1]) + 1;
       outfile = new char[n];
       strcpy(outfile,arg[iarg+1]);
       iarg += 2;
 
     } else error->all(FLERR,"Illegal processors command");
   }
 
   // error checks
 
   if (gridflag == NUMA && mapflag != CART)
     error->all(FLERR,"Processors grid numa and map style are incompatible");
   if (otherflag && (gridflag == NUMA || gridflag == CUSTOM))
     error->all(FLERR,
                "Processors part option and grid style are incompatible");
 }
 
 /* ----------------------------------------------------------------------
    return # of bytes of allocated memory
 ------------------------------------------------------------------------- */
 
 bigint Comm::memory_usage()
 {
   bigint bytes = 0;
   bytes += nprocs * sizeof(int);    // grid2proc
   for (int i = 0; i < nswap; i++)
     bytes += memory->usage(sendlist[i],maxsendlist[i]);
   bytes += memory->usage(buf_send,maxsend+BUFEXTRA);
   bytes += memory->usage(buf_recv,maxrecv);
   return bytes;
 }
diff --git a/src/delete_atoms.cpp b/src/delete_atoms.cpp
index dc70d8557..d2fa42192 100644
--- a/src/delete_atoms.cpp
+++ b/src/delete_atoms.cpp
@@ -1,360 +1,422 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "stdlib.h"
 #include "string.h"
 #include "delete_atoms.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "domain.h"
 #include "force.h"
 #include "group.h"
 #include "region.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "random_mars.h"
 #include "memory.h"
 #include "error.h"
 
+#include <map>
+
 using namespace LAMMPS_NS;
 
+// allocate space for static class variable
+
+DeleteAtoms *DeleteAtoms::cptr;
+
 /* ---------------------------------------------------------------------- */
 
 DeleteAtoms::DeleteAtoms(LAMMPS *lmp) : Pointers(lmp) {}
 
 /* ---------------------------------------------------------------------- */
 
 void DeleteAtoms::command(int narg, char **arg)
 {
   if (domain->box_exist == 0)
     error->all(FLERR,"Delete_atoms command before simulation box is defined");
   if (narg < 1) error->all(FLERR,"Illegal delete_atoms command");
   if (atom->tag_enable == 0)
     error->all(FLERR,"Cannot use delete_atoms unless atoms have IDs");
 
   // store state before delete
 
   bigint natoms_previous = atom->natoms;
 
   // delete the atoms
 
   if (strcmp(arg[0],"group") == 0) delete_group(narg,arg);
   else if (strcmp(arg[0],"region") == 0) delete_region(narg,arg);
   else if (strcmp(arg[0],"overlap") == 0) delete_overlap(narg,arg);
   else if (strcmp(arg[0],"porosity") == 0) delete_porosity(narg,arg);
   else error->all(FLERR,"Illegal delete_atoms command");
 
   // delete local atoms flagged in dlist
   // reset nlocal
 
   AtomVec *avec = atom->avec;
   int nlocal = atom->nlocal;
 
   int i = 0;
   while (i < nlocal) {
     if (dlist[i]) {
       avec->copy(nlocal-1,i,1);
       dlist[i] = dlist[nlocal-1];
       nlocal--;
     } else i++;
   }
 
   atom->nlocal = nlocal;
   memory->destroy(dlist);
 
   // if non-molecular system and compress flag set,
   // reset atom tags to be contiguous
   // set all atom IDs to 0, call tag_extend()
 
   if (atom->molecular == 0 && compress_flag) {
     int *tag = atom->tag;
     for (i = 0; i < nlocal; i++) tag[i] = 0;
     atom->tag_extend();
   }
 
   // reset atom->natoms
   // reset atom->map if it exists
   // set nghost to 0 so old ghosts of deleted atoms won't be mapped
 
   bigint nblocal = atom->nlocal;
   MPI_Allreduce(&nblocal,&atom->natoms,1,MPI_LMP_BIGINT,MPI_SUM,world);
   if (atom->map_style) {
     atom->nghost = 0;
     atom->map_init();
     atom->map_set();
   }
 
   // print before and after atom count
 
   bigint ndelete = natoms_previous - atom->natoms;
 
   if (comm->me == 0) {
     if (screen) fprintf(screen,"Deleted " BIGINT_FORMAT
                         " atoms, new total = " BIGINT_FORMAT "\n",
                         ndelete,atom->natoms);
     if (logfile) fprintf(logfile,"Deleted " BIGINT_FORMAT
                          " atoms, new total = " BIGINT_FORMAT "\n",
                          ndelete,atom->natoms);
   }
 }
 
 /* ----------------------------------------------------------------------
    delete all atoms in group
    group will still exist
 ------------------------------------------------------------------------- */
 
 void DeleteAtoms::delete_group(int narg, char **arg)
 {
   if (narg < 2) error->all(FLERR,"Illegal delete_atoms command");
 
   int igroup = group->find(arg[1]);
   if (igroup == -1) error->all(FLERR,"Could not find delete_atoms group ID");
   options(narg-2,&arg[2]);
 
   // allocate and initialize deletion list
 
   int nlocal = atom->nlocal;
   memory->create(dlist,nlocal,"delete_atoms:dlist");
   for (int i = 0; i < nlocal; i++) dlist[i] = 0;
 
   int *mask = atom->mask;
   int groupbit = group->bitmask[igroup];
 
   for (int i = 0; i < nlocal; i++)
     if (mask[i] & groupbit) dlist[i] = 1;
 }
 
 /* ----------------------------------------------------------------------
    delete all atoms in region
+   if mol_flag is set, also delete atoms in molecules with any deletions
 ------------------------------------------------------------------------- */
 
 void DeleteAtoms::delete_region(int narg, char **arg)
 {
   if (narg < 2) error->all(FLERR,"Illegal delete_atoms command");
 
   int iregion = domain->find_region(arg[1]);
   if (iregion == -1) error->all(FLERR,"Could not find delete_atoms region ID");
   options(narg-2,&arg[2]);
 
   // allocate and initialize deletion list
 
   int nlocal = atom->nlocal;
   memory->create(dlist,nlocal,"delete_atoms:dlist");
   for (int i = 0; i < nlocal; i++) dlist[i] = 0;
 
   double **x = atom->x;
 
   for (int i = 0; i < nlocal; i++)
     if (domain->regions[iregion]->match(x[i][0],x[i][1],x[i][2])) dlist[i] = 1;
+
+  if (mol_flag == 0) return;
+
+  // delete entire molecules if any atom in molecule was deleted
+  // store list of molecule IDs I delete atoms from in list
+  // pass list from proc to proc via ring communication
+
+  hash = new std::map<int,int>();
+
+  int *molecule = atom->molecule;
+  for (int i = 0; i < nlocal; i++)
+    if (dlist[i] && hash->find(molecule[i]) == hash->end())
+      (*hash)[molecule[i]] = 1;
+
+  int n = hash->size();
+  int *list;
+  memory->create(list,n,"delete_atoms:list");
+
+  n = 0;
+  std::map<int,int>::iterator pos;
+  for (pos = hash->begin(); pos != hash->end(); ++pos) list[n++] = pos->first;
+
+  cptr = this;
+  comm->ring(n,sizeof(int),list,1,molring,NULL);
+
+  delete hash;
+  memory->destroy(list);
+}
+
+/* ----------------------------------------------------------------------
+   callback from comm->ring()
+   cbuf = list of N molecule IDs, put them in hash
+   loop over my atoms, if matches moleculed ID in hash, delete that atom
+------------------------------------------------------------------------- */
+
+void DeleteAtoms::molring(int n, char *cbuf)
+{
+  int *list = (int *) cbuf;
+  int *dlist = cptr->dlist;
+  std::map<int,int> *hash = cptr->hash;
+  int nlocal = cptr->atom->nlocal;
+  int *molecule = cptr->atom->molecule;
+
+  hash->clear();
+  for (int i = 0; i < n; i++) (*hash)[list[i]] = 1;
+
+  for (int i = 0; i < nlocal; i++)
+    if (hash->find(molecule[i]) != hash->end()) dlist[i] = 1;
 }
 
 /* ----------------------------------------------------------------------
    delete atoms so there are no pairs within cutoff
    which atoms are deleted depends on ordering of atoms within proc
    deletions can vary with processor count
    no guarantee that minimium number of atoms will be deleted
 ------------------------------------------------------------------------- */
 
 void DeleteAtoms::delete_overlap(int narg, char **arg)
 {
   if (narg < 4) error->all(FLERR,"Illegal delete_atoms command");
 
   // read args
 
   double cut = atof(arg[1]);
   double cutsq = cut*cut;
 
   int igroup1 = group->find(arg[2]);
   int igroup2 = group->find(arg[3]);
   if (igroup1 < 0 || igroup2 < 0)
     error->all(FLERR,"Could not find delete_atoms group ID");
   options(narg-4,&arg[4]);
 
   int group1bit = group->bitmask[igroup1];
   int group2bit = group->bitmask[igroup2];
 
   if (comm->me == 0 && screen)
     fprintf(screen,"System init for delete_atoms ...\n");
 
   // request a full neighbor list for use by this command
 
   int irequest = neighbor->request((void *) this);
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->command = 1;
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
   neighbor->requests[irequest]->occasional = 1;
 
   // init entire system since comm->borders and neighbor->build is done
   // comm::init needs neighbor::init needs pair::init needs kspace::init, etc
 
   lmp->init();
 
   // error check on cutoff
   // if no pair style, neighbor list will be empty
 
   if (force->pair == NULL)
     error->all(FLERR,"Delete_atoms requires a pair style be defined");
   if (cut > neighbor->cutneighmax)
     error->all(FLERR,"Delete_atoms cutoff > neighbor cutoff");
 
   // setup domain, communication and neighboring
   // acquire ghosts
   // build neighbor list based on earlier request
 
   if (domain->triclinic) domain->x2lamda(atom->nlocal);
   domain->pbc();
   domain->reset_box();
   comm->setup();
   if (neighbor->style) neighbor->setup_bins();
   comm->exchange();
   comm->borders();
   if (domain->triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
 
   NeighList *list = neighbor->lists[irequest];
   neighbor->build_one(irequest);
 
   // allocate and initialize deletion list
   // must be after exchange potentially changes nlocal
 
   int nlocal = atom->nlocal;
   memory->create(dlist,nlocal,"delete_atoms:dlist");
   for (int i = 0; i < nlocal; i++) dlist[i] = 0;
 
   // double loop over owned atoms and their full neighbor list
   // at end of loop, there are no more overlaps
   // only ever delete owned atom I, never J even if owned
 
   int *tag = atom->tag;
   int *mask = atom->mask;
   double **x = atom->x;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
 
   int i,j,ii,jj,inum,jnum;
   double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
   int *ilist,*jlist,*numneigh,**firstneigh;
   double factor_lj,factor_coul;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       // if both weighting factors are 0, skip this pair
       // could be 0 and still be in neigh list for long-range Coulombics
       // want consistency with non-charged pairs which wouldn't be in list
 
       if (factor_lj == 0.0 && factor_coul == 0.0) continue;
 
       // only consider deletion if I,J distance < cutoff
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       if (rsq >= cutsq) continue;
 
       // only consider deletion if I,J are in groups 1,2 respectively
       // true whether J is owned or ghost atom
 
       if (!(mask[i] & group1bit)) continue;
       if (!(mask[j] & group2bit)) continue;
 
       // J is owned atom:
       //   delete atom I if atom J has not already been deleted
       // J is ghost atom:
       //   delete atom I if J,I is not a candidate deletion pair
       //     due to being in groups 1,2 respectively
       //   if they are candidate pair, then either:
       //      another proc owns J and could delete J
       //      J is a ghost of another of my owned atoms, and I could delete J
       //   test on tags of I,J insures that only I or J is deleted
 
       if (j < nlocal) {
         if (dlist[j]) continue;
       } else if ((mask[i] & group2bit) && (mask[j] & group1bit)) {
         if (tag[i] > tag[j]) continue;
       }
 
       dlist[i] = 1;
       break;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    create porosity by deleting atoms in a specified region
 ------------------------------------------------------------------------- */
 
 void DeleteAtoms::delete_porosity(int narg, char **arg)
 {
   if (narg < 4) error->all(FLERR,"Illegal delete_atoms command");
 
   int iregion = domain->find_region(arg[1]);
   if (iregion == -1) error->all(FLERR,"Could not find delete_atoms region ID");
 
   double porosity_fraction = atof(arg[2]);
   int seed = atoi(arg[3]);
   options(narg-4,&arg[4]);
 
   RanMars *random = new RanMars(lmp,seed + comm->me);
 
   // allocate and initialize deletion list
 
   int nlocal = atom->nlocal;
   memory->create(dlist,nlocal,"delete_atoms:dlist");
   for (int i = 0; i < nlocal; i++) dlist[i] = 0;
 
   double **x = atom->x;
 
   for (int i = 0; i < nlocal; i++)
     if (domain->regions[iregion]->match(x[i][0],x[i][1],x[i][2]))
       if (random->uniform() <= porosity_fraction) dlist[i] = 1;
 }
 
 /* ----------------------------------------------------------------------
    process command options
 ------------------------------------------------------------------------- */
 
 void DeleteAtoms::options(int narg, char **arg)
 {
   compress_flag = 1;
+  mol_flag = 0;
 
   int iarg = 0;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"compress") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal delete_bonds command");
       if (strcmp(arg[iarg+1],"yes") == 0) compress_flag = 1;
       else if (strcmp(arg[iarg+1],"no") == 0) compress_flag = 0;
       else error->all(FLERR,"Illegal delete_bonds command");
       iarg += 2;
+    } else if (strcmp(arg[iarg],"mol") == 0) {
+      if (iarg+2 > narg) error->all(FLERR,"Illegal delete_bonds command");
+      if (strcmp(arg[iarg+1],"yes") == 0) mol_flag = 1;
+      else if (strcmp(arg[iarg+1],"no") == 0) mol_flag = 0;
+      else error->all(FLERR,"Illegal delete_bonds command");
+      iarg += 2;
     } else error->all(FLERR,"Illegal delete_bonds command");
   }
 }
diff --git a/src/delete_atoms.h b/src/delete_atoms.h
index a1d8104e8..f091bbbab 100644
--- a/src/delete_atoms.h
+++ b/src/delete_atoms.h
@@ -1,87 +1,95 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef COMMAND_CLASS
 
 CommandStyle(delete_atoms,DeleteAtoms)
 
 #else
 
 #ifndef LMP_DELETE_ATOMS_H
 #define LMP_DELETE_ATOMS_H
 
 #include "pointers.h"
+#include <map>
 
 namespace LAMMPS_NS {
 
 class DeleteAtoms : protected Pointers {
  public:
   DeleteAtoms(class LAMMPS *);
   void command(int, char **);
 
  private:
   int *dlist;
-  int compress_flag;
+  int compress_flag,mol_flag;
+  std::map<int,int> *hash;
 
   void delete_group(int, char **);
   void delete_region(int, char **);
   void delete_overlap(int, char **);
   void delete_porosity(int, char **);
   void options(int, char **);
 
   inline int sbmask(int j) {
     return j >> SBBITS & 3;
   }
+
+  // static variable for ring communication callback to access class data
+  // callback functions for ring communication
+
+  static DeleteAtoms *cptr;
+  static void molring(int, char *);
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Delete_atoms command before simulation box is defined
 
 The delete_atoms command cannot be used before a read_data,
 read_restart, or create_box command.
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Cannot use delete_atoms unless atoms have IDs
 
 Your atoms do not have IDs, so the delete_atoms command cannot be
 used.
 
 E: Could not find delete_atoms group ID
 
 Group ID used in the delete_atoms command does not exist.
 
 E: Could not find delete_atoms region ID
 
 Region ID used in the delete_atoms command does not exist.
 
 E: Delete_atoms requires a pair style be defined
 
 This is because atom deletion within a cutoff uses a pairwise
 neighbor list.
 
 E: Delete_atoms cutoff > neighbor cutoff
 
 Cannot delete atoms further away than a processor knows about.
 
 */