diff --git a/src/BODY/body_nparticle.cpp b/src/BODY/body_nparticle.cpp
index 9869220a9..a41823bb7 100644
--- a/src/BODY/body_nparticle.cpp
+++ b/src/BODY/body_nparticle.cpp
@@ -1,209 +1,221 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "stdlib.h"
 #include "body_nparticle.h"
 #include "math_extra.h"
 #include "atom_vec_body.h"
 #include "atom.h"
-#include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define EPSILON 1.0e-7
 
 /* ---------------------------------------------------------------------- */
 
 BodyNparticle::BodyNparticle(LAMMPS *lmp, int narg, char **arg) : 
   Body(lmp, narg, arg)
 {
   if (narg != 3) error->all(FLERR,"Invalid body nparticle command");
 
   int nmin = atoi(arg[1]);
   int nmax = atoi(arg[2]);
   if (nmin <= 0 || nmin > nmax) 
     error->all(FLERR,"Invalid body nparticle command");
 
   size_forward = 0;
   size_border = 1 + 3*nmax;
+
+  // NOTE: need to set appropriate nnbin param for dcp
+
+  icp = new MyPool<int>(1,1);
+  dcp = new MyPool<double>(3*nmin,3*nmax);
+}
+
+/* ---------------------------------------------------------------------- */
+
+BodyNparticle::~BodyNparticle()
+{
+  delete icp;
+  delete dcp;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int BodyNparticle::nsub(AtomVecBody::Bonus *bonus)
 {
   return bonus->ivalue[0];
 }
 
 /* ---------------------------------------------------------------------- */
 
 double *BodyNparticle::coords(AtomVecBody::Bonus *bonus)
 {
   return bonus->dvalue;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int BodyNparticle::pack_border_body(AtomVecBody::Bonus *bonus, double *buf)
 {
   int nsub = bonus->ivalue[0];
   buf[0] = nsub;
   memcpy(&buf[1],bonus->dvalue,3*nsub*sizeof(double));
   return 1+3*nsub;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int BodyNparticle::unpack_border_body(AtomVecBody::Bonus *bonus, double *buf)
 {
   int nsub = static_cast<int> (buf[0]);
   bonus->ivalue[0] = nsub;
   memcpy(bonus->dvalue,&buf[1],3*nsub*sizeof(double));
   return 1+3*nsub;
 }
 
 /* ----------------------------------------------------------------------
    populate bonus data structure with data file values
 ------------------------------------------------------------------------- */
 
 void BodyNparticle::data_body(int ibonus, int ninteger, int ndouble, 
                               char **ifile, char **dfile)
 {
   AtomVecBody::Bonus *bonus = &avec->bonus[ibonus];
 
   // error in data file if any values are NULL
 
   for (int i = 0; i < ninteger; i++)
     if (ifile[0] == NULL) 
       error->one(FLERR,"Invalid format in Bodies section of data file");
   for (int i = 0; i < ndouble; i++)
     if (dfile[0] == NULL)
       error->one(FLERR,"Invalid format in Bodies section of data file");
 
   // set ninteger, ndouble in bonus and allocate 2 vectors of ints, doubles  
 
   if (ninteger != 1) 
     error->one(FLERR,"Incorrect # of integer values in "
                "Bodies section of data file");
   int nsub = atoi(ifile[0]);
   if (nsub < 1)
     error->one(FLERR,"Incorrect integer value in "
                "Bodies section of data file");
   if (ndouble != 6 + 3*nsub) 
     error->one(FLERR,"Incorrect # of floating-point values in "
                "Bodies section of data file");
 
   bonus->ninteger = 1;
-  memory->create(bonus->ivalue,bonus->ninteger,"body:ivalue");
+  bonus->ivalue = icp->get(bonus->iindex);
   bonus->ivalue[0] = nsub;
   bonus->ndouble = 3*nsub;
-  memory->create(bonus->dvalue,3*nsub,"body:dvalue");
+  bonus->dvalue = dcp->get(bonus->ndouble,bonus->dindex);
 
   // diagonalize inertia tensor
 
   double tensor[3][3];
   tensor[0][0] = atof(dfile[0]);
   tensor[1][1] = atof(dfile[1]);
   tensor[2][2] = atof(dfile[2]);
   tensor[0][1] = tensor[1][0] = atof(dfile[3]);
   tensor[0][2] = tensor[2][0] = atof(dfile[4]);
   tensor[1][2] = tensor[2][1] = atof(dfile[5]);
 
   double *inertia = bonus->inertia;
   double evectors[3][3];
   int ierror = MathExtra::jacobi(tensor,inertia,evectors);
   if (ierror) error->one(FLERR,
                          "Insufficient Jacobi rotations for body nparticle");
 
   // if any principal moment < scaled EPSILON, set to 0.0
 
   double max;
   max = MAX(inertia[0],inertia[1]);
   max = MAX(max,inertia[2]);
   
   if (inertia[0] < EPSILON*max) inertia[0] = 0.0;
   if (inertia[1] < EPSILON*max) inertia[1] = 0.0;
   if (inertia[2] < EPSILON*max) inertia[2] = 0.0;
 
   // exyz_space = principal axes in space frame
 
   double ex_space[3],ey_space[3],ez_space[3];
 
   ex_space[0] = evectors[0][0];
   ex_space[1] = evectors[1][0];
   ex_space[2] = evectors[2][0];
   ey_space[0] = evectors[0][1];
   ey_space[1] = evectors[1][1];
   ey_space[2] = evectors[2][1];
   ez_space[0] = evectors[0][2];
   ez_space[1] = evectors[1][2];
   ez_space[2] = evectors[2][2];
 
   // enforce 3 evectors as a right-handed coordinate system
   // flip 3rd vector if needed
 
   double cross[3];
   MathExtra::cross3(ex_space,ey_space,cross);
   if (MathExtra::dot3(cross,ez_space) < 0.0) MathExtra::negate3(ez_space);
   
   // create initial quaternion
   
   MathExtra::exyz_to_q(ex_space,ey_space,ez_space,bonus->quat);
 
   // bonus->dvalue = sub-particle displacements in body frame
 
   double delta[3],displace[3];
 
   int j = 6;
   int k = 0;
   for (int i = 0; i < nsub; i++) {
     delta[0] = atof(dfile[j]);
     delta[1] = atof(dfile[j+1]);
     delta[2] = atof(dfile[j+2]);
     MathExtra::transpose_matvec(ex_space,ey_space,ez_space,
                                 delta,&bonus->dvalue[k]);
     j += 3;
     k += 3;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int BodyNparticle::noutcol()
 {
   return 3;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int BodyNparticle::noutrow(int ibonus)
 {
   return avec->bonus[ibonus].ivalue[0];
 }
 
 /* ---------------------------------------------------------------------- */
 
 void BodyNparticle::output(int ibonus, int m, double *values)
 {
   AtomVecBody::Bonus *bonus = &avec->bonus[ibonus];
 
   double p[3][3];
   MathExtra::quat_to_mat(bonus->quat,p);
   MathExtra::matvec(p,&bonus->dvalue[3*m],values);
 
   double *x = atom->x[bonus->ilocal];
   values[0] += x[0];
   values[1] += x[1];
   values[2] += x[2];
 }
diff --git a/src/BODY/body_nparticle.h b/src/BODY/body_nparticle.h
index 84dec188a..89d08c86e 100644
--- a/src/BODY/body_nparticle.h
+++ b/src/BODY/body_nparticle.h
@@ -1,59 +1,59 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef BODY_CLASS
 
 BodyStyle(nparticle,BodyNparticle)
 
 #else
 
 #ifndef LMP_BODY_NPARTICLE_H
 #define LMP_BODY_NPARTICLE_H
 
 #include "body.h"
 #include "atom_vec_body.h"
 
 namespace LAMMPS_NS {
 
 class BodyNparticle : public Body {
  public:
   BodyNparticle(class LAMMPS *, int, char **);
-  ~BodyNparticle() {}
+  ~BodyNparticle();
   int nsub(class AtomVecBody::Bonus *);
   double *coords(class AtomVecBody::Bonus *);
 
   int pack_border_body(class AtomVecBody::Bonus *, double *);
   int unpack_border_body(class AtomVecBody::Bonus *, double *);
   void data_body(int, int, int, char **, char **);
 
   int noutrow(int);
   int noutcol();
   void output(int, int, double *);
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Invalid body nparticle command
 
 Arguments in atom-style command are not correct.
 
 E: Insufficient Jacobi rotations for body nparticle
 
 Eigensolve for rigid body was not sufficiently accurate.
 
 */
diff --git a/src/BODY/fix_nve_body.h b/src/BODY/fix_nve_body.h
index b75897362..213434b60 100644
--- a/src/BODY/fix_nve_body.h
+++ b/src/BODY/fix_nve_body.h
@@ -1,53 +1,53 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef FIX_CLASS
 
 FixStyle(nve/body,FixNVEBody)
 
 #else
 
 #ifndef LMP_FIX_NVE_BODY_H
 #define LMP_FIX_NVE_BODY_H
 
 #include "fix_nve.h"
 
 namespace LAMMPS_NS {
 
 class FixNVEBody : public FixNVE {
  public:
   FixNVEBody(class LAMMPS *, int, char **);
   void init();
   void initial_integrate(int);
   void final_integrate();
 
  private:
   double dtq;
   class AtomVecBody *avec;
 };
 
 }
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Fix nve/body requires atom style body
 
-UNDOCUMENTED
+Self-explanatory.
 
 E: Fix nve/body requires bodies
 
-UNDOCUMENTED
+This fix can only be used for particles that are bodies.
 
 */
diff --git a/src/BODY/pair_body.h b/src/BODY/pair_body.h
index 25544c983..80d36be0d 100644
--- a/src/BODY/pair_body.h
+++ b/src/BODY/pair_body.h
@@ -1,80 +1,82 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(body,PairBody)
 
 #else
 
 #ifndef LMP_PAIR_BODY_H
 #define LMP_PAIR_BODY_H
 
 #include "pair.h"
 
 namespace LAMMPS_NS {
 
 class PairBody : public Pair {
  public:
   PairBody(class LAMMPS *);
   ~PairBody();
   void compute(int, int);
   void settings(int, char **);
   void coeff(int, char **);
   void init_style();
   double init_one(int, int);
 
  protected:
   double cut_global;
   double **cut;
   double **epsilon,**sigma;
   double **lj1,**lj2,**lj3,**lj4;
 
   class AtomVecBody *avec;
   class BodyNparticle *bptr;
 
   double **discrete;            // list of all sub-particles for all bodies
   int ndiscrete;                // number of discretes in list
   int dmax;                     // allocated size of discrete list
   int *dnum;                    // number of discretes per line, 0 if uninit
   int *dfirst;                  // index of first discrete per each line
   int nmax;                     // allocated size of dnum,dfirst vectors
 
   void allocate();
   void body2space(int);
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
-UNDOCUMENTED
+Self-explanatory.  Check the input script syntax and compare to the
+documentation for the command.  You can use -echo screen as a
+command-line option when running LAMMPS to see the offending line.
 
 E: Incorrect args for pair coefficients
 
-UNDOCUMENTED
+Self-explanatory.  Check the input script or data file.
 
 E: Pair body requires atom style body
 
-UNDOCUMENTED
+Self-explanatory.
 
 E: Pair body requires body style nparticle
 
-UNDOCUMENTED
+This pair style is specific to the nparticle body style.
 
 */
diff --git a/src/CLASS2/pair_lj_class2_coul_long.h b/src/CLASS2/pair_lj_class2_coul_long.h
index 870b15981..4d4978c4d 100644
--- a/src/CLASS2/pair_lj_class2_coul_long.h
+++ b/src/CLASS2/pair_lj_class2_coul_long.h
@@ -1,84 +1,79 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(lj/class2/coul/long,PairLJClass2CoulLong)
 
 #else
 
 #ifndef LMP_PAIR_LJ_CLASS2_COUL_LONG_H
 #define LMP_PAIR_LJ_CLASS2_COUL_LONG_H
 
 #include "pair.h"
 
 namespace LAMMPS_NS {
 
 class PairLJClass2CoulLong : public Pair {
  public:
   PairLJClass2CoulLong(class LAMMPS *);
   virtual ~PairLJClass2CoulLong();
   virtual void compute(int, int);
   void settings(int, char **);
   void coeff(int, char **);
   void init_style();
   double init_one(int, int);
   void write_restart(FILE *);
   void read_restart(FILE *);
   void write_restart_settings(FILE *);
   void read_restart_settings(FILE *);
   double single(int, int, int, int, double, double, double, double &);
   void *extract(const char *, int &);
 
  protected:
   double cut_lj_global;
   double **cut_lj,**cut_ljsq;
   double cut_coul,cut_coulsq;
   double **epsilon,**sigma;
   double **lj1,**lj2,**lj3,**lj4,**offset;
   double g_ewald;
 
   void allocate();
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Incorrect args for pair coefficients
 
 Self-explanatory.  Check the input script or data file.
 
 E: Pair style lj/class2/coul/long requires atom attribute q
 
 The atom style defined does not have this attribute.
 
 E: Pair style requires a KSpace style
 
-UNDOCUMENTED
-
-U: Pair style is incompatible with KSpace style
-
-If a pair style with a long-range Coulombic component is selected,
-then a kspace style must also be used.
+This pair style is designed for use with a KSpace style.
 
 */
diff --git a/src/FLD/pair_brownian.h b/src/FLD/pair_brownian.h
index 88798d90e..88ae714e6 100644
--- a/src/FLD/pair_brownian.h
+++ b/src/FLD/pair_brownian.h
@@ -1,102 +1,102 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(brownian,PairBrownian)
 
 #else
 
 #ifndef LMP_PAIR_BROWNIAN_H
 #define LMP_PAIR_BROWNIAN_H
 
 #include "pair.h"
 
 namespace LAMMPS_NS {
 
 class PairBrownian : public Pair {
  public:
   PairBrownian(class LAMMPS *);
   virtual ~PairBrownian();
   virtual void compute(int, int);
   void settings(int, char **);
   void coeff(int, char **);
   virtual double init_one(int, int);
   virtual void init_style();
   void write_restart(FILE *);
   void read_restart(FILE *);
   void write_restart_settings(FILE *);
   void read_restart_settings(FILE *);
 
  protected:
   double cut_inner_global,cut_global;
   double t_target,mu;
   int flaglog,flagfld;
   int flagHI, flagVF;
   int flagdeform, flagwall;
   double vol_P;
   double rad;
   class FixWall *wallfix;
 
   int seed;
   double **cut_inner,**cut;
   double R0,RT0;
 
   class RanMars *random;
 
   void set_3_orthogonal_vectors(double*,double*,double*);
   void allocate();
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 W: Cannot include log terms without 1/r terms; setting flagHI to 1
 
 Self-explanatory.
 
 E: Incorrect args for pair coefficients
 
 Self-explanatory.  Check the input script or data file.
 
 E: Pair brownian requires atom style sphere
 
 Self-explanatory.
 
 W: Pair brownian needs newton pair on for momentum conservation
 
 Self-explanatory.
 
 E: Pair brownian requires extended particles
 
 One of the particles has radius 0.0.
 
 E: Pair brownian requires monodisperse particles
 
 All particles must be the same finite size.
 
 E: Cannot use multiple fix wall commands with pair brownian
 
-UNDOCUMENTED
+Self-explanatory.
 
 */
diff --git a/src/FLD/pair_brownian_poly.h b/src/FLD/pair_brownian_poly.h
index 90602ac96..bbf0c93be 100644
--- a/src/FLD/pair_brownian_poly.h
+++ b/src/FLD/pair_brownian_poly.h
@@ -1,59 +1,59 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(brownian/poly,PairBrownianPoly)
 
 #else
 
 #ifndef LMP_PAIR_BROWNIAN_POLY_H
 #define LMP_PAIR_BROWNIAN_POLY_H
 
 #include "pair_brownian.h"
 
 namespace LAMMPS_NS {
 
 class PairBrownianPoly : public PairBrownian {
  public:
   PairBrownianPoly(class LAMMPS *);
   ~PairBrownianPoly() {}
   void compute(int, int);
   double init_one(int, int);
   void init_style();
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Pair brownian/poly requires newton pair off
 
 Self-explanatory.
 
 E: Pair brownian/poly requires atom style sphere
 
 Self-explanatory.
 
 E: Pair brownian/poly requires extended particles
 
 One of the particles has radius 0.0.
 
 E: Cannot use multiple fix wall commands with pair brownian
 
-UNDOCUMENTED
+Self-explanatory.
 
 */
diff --git a/src/FLD/pair_lubricate.h b/src/FLD/pair_lubricate.h
index 4e3b5b298..e93aa55a2 100644
--- a/src/FLD/pair_lubricate.h
+++ b/src/FLD/pair_lubricate.h
@@ -1,103 +1,103 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(lubricate,PairLubricate)
 
 #else
 
 #ifndef LMP_PAIR_LUBRICATE_H
 #define LMP_PAIR_LUBRICATE_H
 
 #include "pair.h"
 
 namespace LAMMPS_NS {
 
 class PairLubricate : public Pair {
  public:
   PairLubricate(class LAMMPS *);
   virtual ~PairLubricate();
   virtual void compute(int, int);
   void settings(int, char **);
   void coeff(int, char **);
   double init_one(int, int);
   virtual void init_style();
   void write_restart(FILE *);
   void read_restart(FILE *);
   void write_restart_settings(FILE *);
   void read_restart_settings(FILE *);
   int pre_adapt(char *, int, int, int, int);
   void adapt(int, int, int, int, int, double);
 
   int pack_comm(int, int *, double *, int, int *);
   void unpack_comm(int, int, double *);
 
  protected:
   double mu,cut_inner_global,cut_global;
   double rad;
   int flaglog,flagfld,shearing;
   int flagdeform, flagwall;
   double vol_P;
   class FixWall *wallfix;
   int flagVF, flagHI;
 
   double Ef[3][3];
   double R0,RT0,RS0;
   double **cut_inner,**cut;
 
   void allocate();
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 W: Cannot include log terms without 1/r terms; setting flagHI to 1
 
 Self-explanatory.
 
 E: Incorrect args for pair coefficients
 
 Self-explanatory.  Check the input script or data file.
 
 E: Pair lubricate requires atom style sphere
 
 Self-explanatory.
 
 E: Pair lubricate requires ghost atoms store velocity
 
 Use the communicate vel yes command to enable this.
 
 E: Pair lubricate requires monodisperse particles
 
 All particles must be the same finite size.
 
 E: Using pair lubricate with inconsistent fix deform remap option
 
 Must use remap v option with fix deform with this pair style.
 
 E: Cannot use multiple fix wall commands with pair lubricate
 
-UNDOCUMENTED
+Self-explanatory.
 
 */
diff --git a/src/FLD/pair_lubricateU.h b/src/FLD/pair_lubricateU.h
index 77aa6bd80..1af1ec281 100644
--- a/src/FLD/pair_lubricateU.h
+++ b/src/FLD/pair_lubricateU.h
@@ -1,114 +1,114 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(lubricateU,PairLubricateU)
 
 #else
 
 #ifndef LMP_PAIR_LUBRICATEU_H
 #define LMP_PAIR_LUBRICATEU_H
 
 #include "pair.h"
 
 namespace LAMMPS_NS {
 
 class PairLubricateU : public Pair {
  public:
   PairLubricateU(class LAMMPS *);
   virtual ~PairLubricateU();
   virtual void compute(int, int);
   virtual void settings(int, char **);
   void coeff(int, char **);
   double init_one(int, int);
   virtual void init_style();
   void write_restart(FILE *);
   void read_restart(FILE *);
   void write_restart_settings(FILE *);
   void read_restart_settings(FILE *);
   int pack_comm(int, int *, double *, int, int *);
   void unpack_comm(int, int, double *);
 
  protected:
   double cut_inner_global,cut_global;
   double mu;
   double rad;
   int flaglog;
   int flagdeform, flagwall;
   int flagVF, flagHI;
   double vol_P;
   class FixWall *wallfix;
 
   double gdot,Ef[3][3];
   double **cut_inner,**cut;
   void allocate();
   double R0,RT0,RS0;
 
   int nmax;
   double **fl,**Tl,**xl;
 
   int cgmax;
   double *bcg,*xcg,*rcg,*rcg1,*pcg,*RU;
 
   void compute_RE();
   virtual void compute_RE(double **);
   void compute_RU();
   virtual void compute_RU(double **);
   virtual void compute_Fh(double **);
   void stage_one();
   void intermediates(int, double **);
   void stage_two(double **);
   void copy_vec_uo(int, double *, double **, double **);
   void copy_uo_vec(int, double **, double **, double *);
   double dot_vec_vec(int , double *, double *);
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 W: Cannot include log terms without 1/r terms; setting flagHI to 1.
 
 Self-explanatory.
 
 E: Incorrect args for pair coefficients
 
 Self-explanatory.  Check the input script or data file.
 
 E: Pair lubricateU requires atom style sphere
 
 Self-explanatory.
 
 E: Pair lubricateU requires ghost atoms store velocity
 
 Use the communicate vel yes command to enable this.
 
 E: Pair lubricateU requires monodisperse particles
 
 All particles must be the same finite size.
 
 E: Cannot use multiple fix wall commands with pair lubricateU
 
-UNDOCUMENTED
+Self-explanatory.
 
 */
diff --git a/src/FLD/pair_lubricateU_poly.h b/src/FLD/pair_lubricateU_poly.h
index 10a98d4a4..d7562993a 100644
--- a/src/FLD/pair_lubricateU_poly.h
+++ b/src/FLD/pair_lubricateU_poly.h
@@ -1,83 +1,83 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(lubricateU/poly,PairLubricateUPoly)
 
 #else
 
 #ifndef LMP_PAIR_LUBRICATEU_POLY_H
 #define LMP_PAIR_LUBRICATEU_POLY_H
 
 #include "pair_lubricateU.h"
 
 namespace LAMMPS_NS {
 
 class PairLubricateUPoly : public PairLubricateU {
  public:
   PairLubricateUPoly(class LAMMPS *);
   ~PairLubricateUPoly() {}
   void compute(int, int);
   void settings(int, char **);
   void init_style();
 
  private:
   double vol_P;
   int flagdeform, flagwall, flagVF, flagHI;
   class FixWall *wallfix;
 
   void iterate(double **, int);
   void compute_RE(double **);
   void compute_RU(double **);
   void compute_Fh(double **);
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 W: Cannot include log terms without 1/r terms; setting flagHI to 1
 
 Self-explanatory.
 
 E: Pair lubricateU/poly requires newton pair off
 
 Self-explanatory.
 
 E: Pair lubricateU/poly requires ghost atoms store velocity
 
 Use the communicate vel yes command to enable this.
 
 E: Pair lubricate/poly requires atom style sphere
 
 Self-explanatory.
 
 E: Pair lubricate/poly requires extended particles
 
 One of the particles has radius 0.0.
 
 E: Cannot use multiple fix wall commands with pair lubricateU
 
-UNDOCUMENTED
+Self-explanatory.
 
 */
diff --git a/src/FLD/pair_lubricate_poly.h b/src/FLD/pair_lubricate_poly.h
index 83afa0493..274852823 100644
--- a/src/FLD/pair_lubricate_poly.h
+++ b/src/FLD/pair_lubricate_poly.h
@@ -1,70 +1,70 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(lubricate/poly,PairLubricatePoly)
 
 #else
 
 #ifndef LMP_PAIR_LUBRICATE_POLY_H
 #define LMP_PAIR_LUBRICATE_POLY_H
 
 #include "pair_lubricate.h"
 
 namespace LAMMPS_NS {
 
 class PairLubricatePoly : public PairLubricate {
  public:
   PairLubricatePoly(class LAMMPS *);
   ~PairLubricatePoly() {}
   void compute(int, int);
   void init_style();
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Pair lubricate/poly requires newton pair off
 
 Self-explanatory.
 
 E: Pair lubricate/poly requires ghost atoms store velocity
 
 Use the communicate vel yes command to enable this.
 
 E: Pair lubricate/poly requires atom style sphere
 
 Self-explanatory.
 
 E: Pair lubricate/poly requires extended particles
 
 One of the particles has radius 0.0.
 
 E: Using pair lubricate with inconsistent fix deform remap option
 
 Must use remap v option with fix deform with this pair style.
 
 E: Cannot use multiple fix wall commands with pair lubricate/poly
 
-UNDOCUMENTED
+Self-explanatory.
 
 E: Using pair lubricate/poly with inconsistent fix deform remap option
 
 If fix deform is used, the remap v option is required.
 
 */
diff --git a/src/GPU/pair_born_coul_long_gpu.cpp b/src/GPU/pair_born_coul_long_gpu.cpp
index 8f08623ee..073d25ed2 100644
--- a/src/GPU/pair_born_coul_long_gpu.cpp
+++ b/src/GPU/pair_born_coul_long_gpu.cpp
@@ -1,298 +1,298 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
    
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
    
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Trung Dac Nguyen (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "lmptype.h"
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_born_coul_long_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "kspace.h"
 #include "gpu_extra.h"
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 // External functions from cuda library for atom decomposition
 
 int borncl_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
                     double **host_born1, double **host_born2, 
                     double **host_born3, double **host_a, 
                     double **host_c, double **host_d, 
                     double **sigma, double **offset, double *special_lj,
                     const int inum, const int nall, const int max_nbors,
                     const int maxspecial, const double cell_size,
                     int &gpu_mode, FILE *screen, double **host_cut_ljsq,
                     double host_cut_coulsq, double *host_special_coul,
                     const double qqrd2e, const double g_ewald);
 void borncl_gpu_clear();
 int** borncl_gpu_compute_n(const int ago, const int inum_full, const int nall,
                            double **host_x, int *host_type, double *sublo,
                            double *subhi, int *tag, int **nspecial, 
                            int **special, const bool eflag, const bool vflag,
                            const bool eatom, const bool vatom, int &host_start,
                            int **ilist, int **jnum,  const double cpu_time,
                            bool &success, double *host_q, double *boxlo,
                            double *prd);
 void borncl_gpu_compute(const int ago, const int inum_full, const int nall,
                         double **host_x, int *host_type, int *ilist, int *numj,
                         int **firstneigh, const bool eflag, const bool vflag,
                         const bool eatom, const bool vatom, int &host_start,
                         const double cpu_time, bool &success, double *host_q,
                         const int nlocal, double *boxlo, double *prd);
 double borncl_gpu_bytes();
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 /* ---------------------------------------------------------------------- */
 
 PairBornCoulLongGPU::PairBornCoulLongGPU(LAMMPS *lmp) : 
   PairBornCoulLong(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); 
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairBornCoulLongGPU::~PairBornCoulLongGPU()
 {
   borncl_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBornCoulLongGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
   
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;    
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = borncl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
                                       atom->type, domain->sublo, domain->subhi,
                                       atom->tag, atom->nspecial, atom->special,
                                       eflag, vflag, eflag_atom, vflag_atom,
                                       host_start, &ilist, &numneigh, cpu_time,
                                       success, atom->q, domain->boxlo,
                                       domain->prd);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     borncl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                        ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                        vflag_atom, host_start, cpu_time, success, atom->q,
                        atom->nlocal, domain->boxlo, domain->prd);
   }
   if (!success)
-    error->one(FLERR,"Out of memory on GPGPU");
+    error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairBornCoulLongGPU::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,
       "Pair style born/coul/long/gpu requires atom attribute q");
   if (force->newton_pair) 
     error->all(FLERR,
        "Cannot use newton pair with born/coul/long/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   cut_coulsq = cut_coul * cut_coul;
 
   // insure use of KSpace long-range solver, set g_ewald
 
   if (force->kspace == NULL)
     error->all(FLERR,"Pair style is incompatible with KSpace style");
   g_ewald = force->kspace->g_ewald;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = borncl_gpu_init(atom->ntypes+1, cutsq,  rhoinv, 
                                 born1, born2, born3, a, c, d, sigma,
                                 offset, force->special_lj, atom->nlocal,
                   	        atom->nlocal+atom->nghost, 300, maxspecial,
                    	        cell_size, gpu_mode, screen, cut_ljsq,
                                 cut_coulsq, force->special_coul, 
                                 force->qqrd2e, g_ewald);
 
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
     int irequest = neighbor->request(this);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairBornCoulLongGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + borncl_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBornCoulLongGPU::cpu_compute(int start, int inum, int eflag,
                                       int vflag, int *ilist, int *numneigh,
                                       int **firstneigh)
 {
   int i,j,ii,jj,jnum,itype,jtype,itable;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double fraction,table;
   double r,rexp,r2inv,r6inv,forcecoul,forceborn,factor_coul,factor_lj;
   double grij,expm2,prefactor,t,erfc;
   int *jlist;
   double rsq;
 
   evdwl = ecoul = 0.0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   double qqrd2e = force->qqrd2e;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
         r = sqrt(rsq);
 
         if (rsq < cut_coulsq) {
           grij = g_ewald * r;
           expm2 = exp(-grij*grij);
           t = 1.0 / (1.0 + EWALD_P*grij);
           erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
           prefactor = qqrd2e * qtmp*q[j]/r;
           forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
           if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
         } else forcecoul = 0.0;
 
         if (rsq < cut_ljsq[itype][jtype]) {
           r6inv = r2inv*r2inv*r2inv;
           rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
           forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv
             + born3[itype][jtype]*r2inv*r6inv;
         } else forceborn = 0.0;
 
         fpair = (forcecoul + factor_lj*forceborn) * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             ecoul = prefactor*erfc;
             if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
           } else ecoul = 0.0;
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv 
               + d[itype][jtype]*r6inv*r2inv - offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_born_coul_long_gpu.h b/src/GPU/pair_born_coul_long_gpu.h
index 78f08c4b7..5fc5a716f 100644
--- a/src/GPU/pair_born_coul_long_gpu.h
+++ b/src/GPU/pair_born_coul_long_gpu.h
@@ -1,66 +1,68 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(born/coul/long/gpu,PairBornCoulLongGPU)
 
 #else
 
 #ifndef LMP_PAIR_BORN_COUL_LONG_GPU_H
 #define LMP_PAIR_BORN_COUL_LONG_GPU_H
 
 #include "pair_born_coul_long.h"
 
 namespace LAMMPS_NS {
 
 class PairBornCoulLongGPU : public PairBornCoulLong {
  public:
   PairBornCoulLongGPU(LAMMPS *lmp);
   ~PairBornCoulLongGPU();
   void cpu_compute(int, int, int, int, int *, int *, int **);
   void compute(int, int);
   void init_style();
   double memory_usage();
 
  enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
 
  private:
   int gpu_mode;
   double cpu_time;
   int *gpulist;
 };
 
 }
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
-E: Out of memory on GPGPU
+E: Insufficient memory on accelerator
 
-UNDOCUMENTED
+There is insufficient memory on one of the devices specified for the gpu
+package
 
 E: Pair style born/coul/long/gpu requires atom attribute q
 
 The atom style defined does not have this attribute.
 
 E: Cannot use newton pair with born/coul/long/gpu pair style
 
-UNDOCUMENTED
+Self-explanatory.
 
 E: Pair style is incompatible with KSpace style
 
-UNDOCUMENTED
+If a pair style with a long-range Coulombic component is selected,
+then a kspace style must also be used.
 
 */
diff --git a/src/GPU/pair_born_coul_wolf_gpu.cpp b/src/GPU/pair_born_coul_wolf_gpu.cpp
index 6f8fb9734..aa6c989f8 100644
--- a/src/GPU/pair_born_coul_wolf_gpu.cpp
+++ b/src/GPU/pair_born_coul_wolf_gpu.cpp
@@ -1,292 +1,292 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
    
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
    
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Trung Dac Nguyen (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "lmptype.h"
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_born_coul_wolf_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 // External functions from cuda library for atom decomposition
 
 int borncw_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
                     double **host_born1, double **host_born2, 
                     double **host_born3, double **host_a, double **host_c, 
                     double **host_d, double **sigma, double **offset, 
                     double *special_lj, const int inum,
                     const int nall, const int max_nbors, const int maxspecial,
                     const double cell_size, int &gpu_mode, FILE *screen,
                     double **host_cut_ljsq, double host_cut_coulsq,
                     double *host_special_coul, const double qqrd2e,
                     const double alf, const double e_shift, const double f_shift);
 void borncw_gpu_clear();
 int ** borncw_gpu_compute_n(const int ago, const int inum_full, const int nall,
                             double **host_x, int *host_type, double *sublo,
                             double *subhi, int *tag, int **nspecial, 
                             int **special, const bool eflag, const bool vflag,
                             const bool eatom, const bool vatom, int &host_start,
                             int **ilist, int **jnum, const double cpu_time,
                             bool &success, double *host_q, double *boxlo,
                             double *prd);
 void borncw_gpu_compute(const int ago, const int inum_full, const int nall,
                         double **host_x, int *host_type, int *ilist, int *numj,
                         int **firstneigh, const bool eflag, const bool vflag,
                         const bool eatom, const bool vatom, int &host_start,
                         const double cpu_time, bool &success, double *host_q,
                         const int nlocal, double *boxlo, double *prd);
 double borncw_gpu_bytes();
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 /* ---------------------------------------------------------------------- */
 
 PairBornCoulWolfGPU::PairBornCoulWolfGPU(LAMMPS *lmp) : PairBornCoulWolf(lmp), 
 						      gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); 
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairBornCoulWolfGPU::~PairBornCoulWolfGPU()
 {
   borncw_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBornCoulWolfGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
   
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = borncw_gpu_compute_n(neighbor->ago, inum, nall,
                                       atom->x, atom->type, domain->sublo,
                                       domain->subhi, atom->tag, atom->nspecial,
                                       atom->special, eflag, vflag, eflag_atom,
                                       vflag_atom, host_start, 
                                       &ilist, &numneigh, cpu_time, success, 
                                       atom->q, domain->boxlo, domain->prd);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     borncw_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                        ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                        vflag_atom, host_start, cpu_time, success, atom->q,
                        atom->nlocal, domain->boxlo, domain->prd);
   }
   if (!success)
-    error->one(FLERR,"Out of memory on GPGPU");
+    error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairBornCoulWolfGPU::init_style()
 {
   if (force->newton_pair) 
     error->all(FLERR,
       "Cannot use newton pair with born/coul/wolf/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   cut_coulsq = cut_coul * cut_coul;
 
   double e_shift = erfc(alf*cut_coul)/cut_coul;
   double f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) / 
     cut_coul; 
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = borncw_gpu_init(atom->ntypes+1, cutsq, rhoinv, 
                                 born1, born2, born3, a, c, d, sigma, offset, 
                                 force->special_lj, atom->nlocal,
                                 atom->nlocal+atom->nghost, 300, maxspecial,
                                 cell_size, gpu_mode, screen, cut_ljsq,
                                 cut_coulsq, force->special_coul, force->qqrd2e, 
                                 alf, e_shift, f_shift);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
     int irequest = neighbor->request(this);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   } 
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairBornCoulWolfGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + borncw_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBornCoulWolfGPU::cpu_compute(int start, int inum, int eflag, int vflag,
                                       int *ilist, int *numneigh,
                                       int **firstneigh) {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul,fpair;
   double rsq,r2inv,r6inv,forcecoul,forceborn,factor_coul,factor_lj;
   double erfcc,erfcd,v_sh,dvdrr,e_self,qisq;
   double prefactor;
   double r,rexp;
   int *jlist;
 
   evdwl = ecoul = 0.0;
   
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   double qqrd2e = force->qqrd2e;
   
   double e_shift = erfc(alf*cut_coul)/cut_coul;
   double f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) / 
     cut_coul; 
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     qisq = qtmp*qtmp;
     e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e;
     if (evflag) ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0);
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
   
         if (rsq < cut_coulsq) {
           r = sqrt(rsq);
           prefactor = qqrd2e*qtmp*q[j]/r;
           erfcc = erfc(alf*r); 
           erfcd = exp(-alf*alf*r*r);
           v_sh = (erfcc - e_shift*r) * prefactor; 
           dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
           forcecoul = dvdrr*rsq*prefactor;
           if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
         } else forcecoul = 0.0;
 
         if (rsq < cut_ljsq[itype][jtype]) {
           r6inv = r2inv*r2inv*r2inv;
           r = sqrt(rsq);
           rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
           forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv + 
             born3[itype][jtype]*r2inv*r6inv;
         } else forceborn = 0.0;
 	
         fpair = (factor_coul*forcecoul + factor_lj*forceborn) * r2inv;
 	
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             ecoul = v_sh;
             if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
           } else ecoul = 0.0;
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv +
               d[itype][jtype]*r6inv*r2inv - offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_born_coul_wolf_gpu.h b/src/GPU/pair_born_coul_wolf_gpu.h
index 09b0777ad..290d7a6fb 100644
--- a/src/GPU/pair_born_coul_wolf_gpu.h
+++ b/src/GPU/pair_born_coul_wolf_gpu.h
@@ -1,62 +1,59 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(born/coul/wolf/gpu,PairBornCoulWolfGPU)
 
 #else
 
 #ifndef LMP_PAIR_BORN_COUL_WOLF_GPU_H
 #define LMP_PAIR_BORN_COUL_WOLF_GPU_H
 
 #include "pair_born_coul_wolf.h"
 
 namespace LAMMPS_NS {
 
 class PairBornCoulWolfGPU : public PairBornCoulWolf {
  public:
   PairBornCoulWolfGPU(LAMMPS *lmp);
   ~PairBornCoulWolfGPU();
   void cpu_compute(int, int, int, int, int *, int *, int **);
   void compute(int, int);
   void init_style();
   double memory_usage();
 
  enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
 
  private:
   int gpu_mode;
   double cpu_time;
   int *gpulist;
 };
 
 }
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
-E: Out of memory on GPGPU
+E: Insufficient memory on accelerator
 
-UNDOCUMENTED
+There is insufficient memory on one of the devices specified for the gpu
+package
 
 E: Cannot use newton pair with born/coul/wolf/gpu pair style
 
-UNDOCUMENTED
-
-U: Pair style born/coul/wolf/gpu requires atom attribute q
-
-The atom style defined does not have this attribute.
+Self-explanatory.
 
 */
diff --git a/src/GPU/pair_born_gpu.cpp b/src/GPU/pair_born_gpu.cpp
index f73c82cc9..edd84a6a1 100644
--- a/src/GPU/pair_born_gpu.cpp
+++ b/src/GPU/pair_born_gpu.cpp
@@ -1,233 +1,233 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
    
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
    
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Trung Dac Nguyen (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "lmptype.h"
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_born_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 // External functions from cuda library for atom decomposition
 
 int born_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
                   double **host_born1, double **host_born2, double **host_born3, 
                   double **host_a, double **host_c, double **host_d, 
                   double **host_sigma, double **offset, double *special_lj, 
                   const int inum, const int nall, const int max_nbors,
                   const int maxspecial, const double cell_size, 
                   int &gpu_mode, FILE *screen);
 void born_gpu_clear();
 int ** born_gpu_compute_n(const int ago, const int inum_full, 
                           const int nall, double **host_x, int *host_type, 
                           double *sublo, double *subhi, int *tag, int **nspecial,
                           int **special, const bool eflag, const bool vflag,
                           const bool eatom, const bool vatom, int &host_start,
                           int **ilist, int **jnum, const double cpu_time,
                           bool &success);
 void born_gpu_compute(const int ago, const int inum_full, const int nall,
                       double **host_x, int *host_type, int *ilist, int *numj,
                       int **firstneigh, const bool eflag, const bool vflag,
                       const bool eatom, const bool vatom, int &host_start,
                       const double cpu_time, bool &success);
 double born_gpu_bytes();
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairBornGPU::PairBornGPU(LAMMPS *lmp) : PairBorn(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); 
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairBornGPU::~PairBornGPU()
 {
   born_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBornGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
   
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = born_gpu_compute_n(neighbor->ago, inum, nall,
                                     atom->x, atom->type, domain->sublo,
                                     domain->subhi, atom->tag, atom->nspecial,
                                     atom->special, eflag, vflag, eflag_atom,
                                     vflag_atom, host_start, 
                                     &ilist, &numneigh, cpu_time, success);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     born_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                      ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                      vflag_atom, host_start, cpu_time, success);
   }
   if (!success)
-    error->one(FLERR,"Out of memory on GPGPU");
+    error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairBornGPU::init_style()
 {
   if (force->newton_pair) 
     error->all(FLERR,"Cannot use newton pair with born/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = born_gpu_init(atom->ntypes+1, cutsq, rhoinv, 
                               born1, born2, born3, a, c, d, sigma,
                               offset, force->special_lj, atom->nlocal,
                               atom->nlocal+atom->nghost, 300, maxspecial,
 	      cell_size, gpu_mode, screen);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
     int irequest = neighbor->request(this);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairBornGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + born_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBornGPU::cpu_compute(int start, int inum, int eflag, int vflag, 
                               int *ilist, int *numneigh, int **firstneigh) {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r2inv,r6inv,forceborn,factor_lj;
   double r,rexp;
   int *jlist;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   double *special_lj = force->special_lj;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
         r6inv = r2inv*r2inv*r2inv;
         r = sqrt(rsq);
         rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
         forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv + 
           born3[itype][jtype]*r2inv*r6inv;
         fpair = factor_lj*forceborn*r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv + 
             d[itype][jtype]*r6inv*r2inv - offset[itype][jtype];
           evdwl *= factor_lj;
         }
 
         if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_born_gpu.h b/src/GPU/pair_born_gpu.h
index ef03ecffd..de1dd54d0 100644
--- a/src/GPU/pair_born_gpu.h
+++ b/src/GPU/pair_born_gpu.h
@@ -1,58 +1,59 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(born/gpu,PairBornGPU)
 
 #else
 
 #ifndef LMP_PAIR_BORN_GPU_H
 #define LMP_PAIR_BORN_GPU_H
 
 #include "pair_born.h"
 
 namespace LAMMPS_NS {
 
 class PairBornGPU : public PairBorn {
  public:
   PairBornGPU(LAMMPS *lmp);
   ~PairBornGPU();
   void cpu_compute(int, int, int, int, int *, int *, int **);
   void compute(int, int);
   void init_style();
   double memory_usage();
 
  enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
 
  private:
   int gpu_mode;
   double cpu_time;
   int *gpulist;
 };
 
 }
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
-E: Out of memory on GPGPU
+E: Insufficient memory on accelerator
 
-UNDOCUMENTED
+There is insufficient memory on one of the devices specified for the gpu
+package
 
 E: Cannot use newton pair with born/gpu pair style
 
-UNDOCUMENTED
+Self-explantory.
 
 */
diff --git a/src/GPU/pair_colloid_gpu.h b/src/GPU/pair_colloid_gpu.h
index c810f5d1b..861bc0a6b 100644
--- a/src/GPU/pair_colloid_gpu.h
+++ b/src/GPU/pair_colloid_gpu.h
@@ -1,67 +1,67 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(colloid/gpu,PairColloidGPU)
 
 #else
 
 #ifndef LMP_PAIR_COLLOID_GPU_H
 #define LMP_PAIR_COLLOID_GPU_H
 
 #include "pair_colloid.h"
 
 namespace LAMMPS_NS {
 
 class PairColloidGPU : public PairColloid {
  public:
   PairColloidGPU(LAMMPS *lmp);
   ~PairColloidGPU();
   void cpu_compute(int, int, int, int, int *, int *, int **);
   void compute(int, int);
   void init_style();
   double memory_usage();
 
  enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
 
  private:
   int gpu_mode;
   double cpu_time;
   int *gpulist;
 };
 
 }
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Insufficient memory on accelerator
 
 There is insufficient memory on one of the devices specified for the gpu
 package
 
 E: Cannot use newton pair with colloid/gpu pair style
 
 Self-explanatory.
 
 E: Overlapping small/large in pair colloid
 
-UNDOCUMENTED
+This potential is infinite when there is an overlap.
 
 E: Overlapping large/large in pair colloid
 
-UNDOCUMENTED
+This potential is infinite when there is an overlap.
 
 */
diff --git a/src/GPU/pair_dipole_cut_gpu.h b/src/GPU/pair_dipole_cut_gpu.h
index 30b1ecee8..9087a2345 100644
--- a/src/GPU/pair_dipole_cut_gpu.h
+++ b/src/GPU/pair_dipole_cut_gpu.h
@@ -1,67 +1,63 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(dipole/cut/gpu,PairDipoleCutGPU)
 
 #else
 
 #ifndef LMP_PAIR_DIPOLE_CUT_GPU_H
 #define LMP_PAIR_DIPOLE_CUT_GPU_H
 
 #include "pair_dipole_cut.h"
 
 namespace LAMMPS_NS {
 
 class PairDipoleCutGPU : public PairDipoleCut {
  public:
   PairDipoleCutGPU(LAMMPS *lmp);
   ~PairDipoleCutGPU();
   void cpu_compute(int, int, int, int, int *, int *, int **);
   void compute(int, int);
   void init_style();
   double memory_usage();
 
  enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
 
  private:
   int gpu_mode;
   double cpu_time;
   int *gpulist;
 };
 
 }
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Insufficient memory on accelerator
 
 There is insufficient memory on one of the devices specified for the gpu
 package
 
 E: Pair dipole/cut/gpu requires atom attributes q, mu, torque
 
-UNDOCUMENTED
+The atom style defined does not have this attribute.
 
 E: Cannot use newton pair with dipole/cut/gpu pair style
 
 Self-explanatory.
 
-U: Pair style dipole/cut/gpu requires atom attribute q
-
-The atom style defined does not have this attribute.
-
 */
diff --git a/src/GPU/pair_gauss_gpu.cpp b/src/GPU/pair_gauss_gpu.cpp
index fe90eaf5c..05c2aedc8 100644
--- a/src/GPU/pair_gauss_gpu.cpp
+++ b/src/GPU/pair_gauss_gpu.cpp
@@ -1,226 +1,226 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
    
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
    
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Trung Dac Nguyen (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "lmptype.h"
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_gauss_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 // External functions from cuda library for atom decomposition
 
 int gauss_gpu_init(const int ntypes, double **cutsq, double **host_a,
                    double **b, double **offset, double *special_lj, const int nlocal, 
                    const int nall, const int max_nbors, const int maxspecial,
                    const double cell_size, int &gpu_mode, FILE *screen);
 void gauss_gpu_clear();
 int ** gauss_gpu_compute_n(const int ago, const int inum,
                            const int nall, double **host_x, int *host_type, 
                            double *sublo, double *subhi, int *tag, int **nspecial,
                            int **special, const bool eflag, const bool vflag,
                            const bool eatom, const bool vatom, int &host_start,
                            int **ilist, int **jnum,
                            const double cpu_time, bool &success);
 void gauss_gpu_compute(const int ago, const int inum, const int nall, 
                        double **host_x, int *host_type, int *ilist, int *numj,
                        int **firstneigh, const bool eflag, const bool vflag,
                        const bool eatom, const bool vatom, int &host_start,
                        const double cpu_time, bool &success);
 double gauss_gpu_bytes();
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairGaussGPU::PairGaussGPU(LAMMPS *lmp) : PairGauss(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); 
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairGaussGPU::~PairGaussGPU()
 {
   gauss_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairGaussGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
   
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = gauss_gpu_compute_n(neighbor->ago, inum, nall,
                                      atom->x, atom->type, domain->sublo,
                                      domain->subhi, atom->tag, atom->nspecial,
                                      atom->special, eflag, vflag, eflag_atom,
                                      vflag_atom, host_start, 
                                      &ilist, &numneigh, cpu_time, success);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     gauss_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                       ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                       vflag_atom, host_start, cpu_time, success);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairGaussGPU::init_style()
 {
   if (force->newton_pair) 
-    error->all(FLERR,"Cannot use newton pair with lj/cut/gpu pair style");
+    error->all(FLERR,"Cannot use newton pair with gauss/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = gauss_gpu_init(atom->ntypes+1, cutsq, a, b,
                                offset, force->special_lj, atom->nlocal,
                                atom->nlocal+atom->nghost, 300, maxspecial,
                                cell_size, gpu_mode, screen);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
     int irequest = neighbor->request(this);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairGaussGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + gauss_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairGaussGPU::cpu_compute(int start, int inum, int eflag, int vflag, 
                                int *ilist, int *numneigh, int **firstneigh) {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double r,rsq,r2inv,forcelj,factor_lj;
   int *jlist;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   double *special_lj = force->special_lj;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
         r = sqrt(rsq);
         forcelj = - 2.0*a[itype][jtype]*b[itype][jtype] * rsq * 
           exp(-b[itype][jtype]*rsq); 
         fpair = factor_lj*forcelj*r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           evdwl = -(a[itype][jtype]*exp(-b[itype][jtype]*rsq) -
             offset[itype][jtype]);
           evdwl *= factor_lj;
         }
 
         if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_gauss_gpu.h b/src/GPU/pair_gauss_gpu.h
index ba8c04350..6334ac985 100644
--- a/src/GPU/pair_gauss_gpu.h
+++ b/src/GPU/pair_gauss_gpu.h
@@ -1,63 +1,59 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(gauss/gpu,PairGaussGPU)
 
 #else
 
 #ifndef LMP_PAIR_GAUSS_GPU_H
 #define LMP_PAIR_GAUSS_GPU_H
 
 #include "pair_gauss.h"
 
 namespace LAMMPS_NS {
 
 class PairGaussGPU : public PairGauss {
  public:
   PairGaussGPU(LAMMPS *lmp);
   ~PairGaussGPU();
   void cpu_compute(int, int, int, int, int *, int *, int **);
   void compute(int, int);
   void init_style();
   double memory_usage();
 
  enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
 
  private:
   int gpu_mode;
   double cpu_time;
   int *gpulist;
 };
 
 }
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Insufficient memory on accelerator
 
 There is insufficient memory on one of the devices specified for the gpu
 package
 
-E: Cannot use newton pair with lj/cut/gpu pair style
-
-UNDOCUMENTED
-
-U: Cannot use newton pair with gauss/gpu pair style
+E: Cannot use newton pair with gauss/gpu pair style
 
 Self-explanatory.
 
 */
diff --git a/src/GPU/pair_yukawa_colloid_gpu.h b/src/GPU/pair_yukawa_colloid_gpu.h
index c84bcd904..c845a8d87 100644
--- a/src/GPU/pair_yukawa_colloid_gpu.h
+++ b/src/GPU/pair_yukawa_colloid_gpu.h
@@ -1,63 +1,63 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(yukawa/colloid/gpu,PairYukawaColloidGPU)
 
 #else
 
 #ifndef LMP_PAIR_YUKAWA_COLLOID_GPU_H
 #define LMP_PAIR_YUKAWA_COLLOID_GPU_H
 
 #include "pair_yukawa_colloid.h"
 
 namespace LAMMPS_NS {
 
 class PairYukawaColloidGPU : public PairYukawaColloid {
  public:
   PairYukawaColloidGPU(LAMMPS *lmp);
   ~PairYukawaColloidGPU();
   void cpu_compute(int, int, int, int, int *, int *, int **);
   void compute(int, int);
   void init_style();
   double memory_usage();
 
  enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
 
  private:
   int gpu_mode;
   double cpu_time;
   int *gpulist;
 };
 
 }
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Insufficient memory on accelerator
 
 There is insufficient memory on one of the devices specified for the gpu
 package
 
 E: Pair yukawa/colloid/gpu requires atom style sphere
 
-UNDOCUMENTED
+Self-explanatory.
 
 E: Cannot use newton pair with yukawa/colloid/gpu pair style
 
 Self-explanatory.
 
 */
diff --git a/src/GPU/pppm_gpu.cpp b/src/GPU/pppm_gpu.cpp
index af78ea4f2..bb475b0eb 100644
--- a/src/GPU/pppm_gpu.cpp
+++ b/src/GPU/pppm_gpu.cpp
@@ -1,720 +1,720 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Mike Brown (ORNL), Axel Kohlmeyer (Temple)
 ------------------------------------------------------------------------- */
 
 #include "lmptype.h"
 #include "mpi.h"
 #include "string.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "math.h"
 #include "pppm_gpu.h"
 #include "atom.h"
 #include "comm.h"
 #include "commgrid.h"
 #include "neighbor.h"
 #include "force.h"
 #include "pair.h"
 #include "bond.h"
 #include "angle.h"
 #include "domain.h"
 #include "fft3d_wrap.h"
 #include "remap_wrap.h"
 #include "gpu_extra.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 #include "update.h"
 #include "universe.h"
 #include "fix.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 #define MAXORDER 7
 #define OFFSET 16384
 #define SMALL 0.00001
 #define LARGE 10000.0
 #define EPS_HOC 1.0e-7
 
 enum{REVERSE_RHO};
 enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM};
 
 #ifdef FFT_SINGLE
 #define ZEROF 0.0f
 #define ONEF  1.0f
 #else
 #define ZEROF 0.0
 #define ONEF  1.0
 #endif
 
 // external functions from cuda library for atom decomposition
 
 #ifdef FFT_SINGLE
 #define PPPM_GPU_API(api)  pppm_gpu_ ## api ## _f
 #else
 #define PPPM_GPU_API(api)  pppm_gpu_ ## api ## _d
 #endif
 
 FFT_SCALAR* PPPM_GPU_API(init)(const int nlocal, const int nall, FILE *screen,
                                const int order, const int nxlo_out,
                                const int nylo_out, const int nzlo_out,
                                const int nxhi_out, const int nyhi_out,
                                const int nzhi_out, FFT_SCALAR **rho_coeff,
                                FFT_SCALAR **_vd_brick,
                                const double slab_volfactor,
                                const int nx_pppm, const int ny_pppm,
                                const int nz_pppm, const bool split,
                                int &success);
 void PPPM_GPU_API(clear)(const double poisson_time);
 int PPPM_GPU_API(spread)(const int ago, const int nlocal, const int nall,
                       double **host_x, int *host_type, bool &success,
                       double *host_q, double *boxlo, const double delxinv,
                       const double delyinv, const double delzinv);
 void PPPM_GPU_API(interp)(const FFT_SCALAR qqrd2e_scale);
 double PPPM_GPU_API(bytes)();
 void PPPM_GPU_API(forces)(double **f);
 
 /* ---------------------------------------------------------------------- */
 
 PPPMGPU::PPPMGPU(LAMMPS *lmp, int narg, char **arg) : PPPM(lmp, narg, arg)
 {
   if (narg != 1) error->all(FLERR,"Illegal kspace_style pppm/gpu command");
 
   density_brick_gpu = vd_brick = NULL;
   kspace_split = false;
   im_real_space = false;
 
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
 
 /* ----------------------------------------------------------------------
    free all memory
 ------------------------------------------------------------------------- */
 
 PPPMGPU::~PPPMGPU()
 {
   PPPM_GPU_API(clear)(poisson_time);
 }
 
 /* ----------------------------------------------------------------------
    called once before run
 ------------------------------------------------------------------------- */
 
 void PPPMGPU::init()
 {
   // PPPM init manages all arrays except density_brick_gpu and vd_brick
   //      thru its deallocate(), allocate()
   // NOTE: could free density_brick and vdxyz_brick after PPPM allocates them,
   //       before allocating db_gpu and vd_brick down below, if don't need,
   //       if do this, make sure to set them to NULL
 
   destroy_3d_offset(density_brick_gpu,nzlo_out,nylo_out);
   destroy_3d_offset(vd_brick,nzlo_out,nylo_out);
   density_brick_gpu = vd_brick = NULL;
 
   PPPM::init();
 
   // insure no conflict with fix balance
 
   for (int i = 0; i < modify->nfix; i++)
     if (strcmp(modify->fix[i]->style,"balance") == 0)
       error->all(FLERR,"Cannot currently use pppm/gpu with fix balance.");
 
   // unsupported option
 
   if (differentiation_flag == 1)
-    error->all(FLERR,"Cannot (yet) do analytic differentiation with pppm/gpu");
+    error->all(FLERR,"Cannot do analytic differentiation with pppm/gpu");
 
   if (strcmp(update->integrate_style,"verlet/split") == 0) {
     kspace_split=true;
     old_nlocal = 0;
   }
 
   if (kspace_split && universe->iworld == 0) {
     im_real_space = true;
     return;
   }
 
   // GPU precision specific init
 
   if (order>8)
     error->all(FLERR,"Cannot use order greater than 8 with pppm/gpu.");
   PPPM_GPU_API(clear)(poisson_time);
 
   int success;
   FFT_SCALAR *data, *h_brick;
   h_brick = PPPM_GPU_API(init)(atom->nlocal, atom->nlocal+atom->nghost, screen,
                                order, nxlo_out, nylo_out, nzlo_out, nxhi_out,
                                nyhi_out, nzhi_out, rho_coeff, &data,
                                slab_volfactor,nx_pppm,ny_pppm,nz_pppm,
                                kspace_split,success);
 
   GPU_EXTRA::check_flag(success,error,world);
 
   // allocate density_brick_gpu and vd_brick
 
   density_brick_gpu =
     create_3d_offset(nzlo_out,nzhi_out,nylo_out,nyhi_out,
                      nxlo_out,nxhi_out,"pppm:density_brick_gpu",h_brick,1);
   vd_brick =
     create_3d_offset(nzlo_out,nzhi_out,nylo_out,nyhi_out,
                      nxlo_out,nxhi_out,"pppm:vd_brick",data,4);
 
   poisson_time = 0.0;
 }
 
 /* ----------------------------------------------------------------------
    compute the PPPMGPU long-range force, energy, virial
 ------------------------------------------------------------------------- */
 
 void PPPMGPU::compute(int eflag, int vflag)
 {
   int i,j;
 
   int nago;
   if (kspace_split) {
     if (im_real_space) return;
     if (atom->nlocal > old_nlocal) {
       nago=0;
       old_nlocal = atom->nlocal;
     } else nago = 1;
   } else nago = neighbor->ago;
 
   // set energy/virial flags
   // invoke allocate_peratom() if needed for first time
 
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = evflag_atom = eflag_global = vflag_global = 
         eflag_atom = vflag_atom = 0;
 
   // If need per-atom energies/virials, also do particle map on host
   // concurrently with GPU calculations
   if (evflag_atom && !peratom_allocate_flag) {
     allocate_peratom();
     cg_peratom->ghost_notify();
     cg_peratom->setup();
     peratom_allocate_flag = 1;
   }
 
   bool success = true;
   int flag=PPPM_GPU_API(spread)(nago, atom->nlocal, atom->nlocal +
                              atom->nghost, atom->x, atom->type, success,
                              atom->q, domain->boxlo, delxinv, delyinv,
                              delzinv);
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
   if (flag != 0)
     error->one(FLERR,"Out of range atoms - cannot compute PPPM");
 
   // convert atoms from box to lamda coords
 
   if (triclinic == 0) boxlo = domain->boxlo;
   else {
     boxlo = domain->boxlo_lamda;
     domain->x2lamda(atom->nlocal);
   }
 
   // extend size of per-atom arrays if necessary
 
   if (evflag_atom && atom->nlocal > nmax) {
     memory->destroy(part2grid);
     nmax = atom->nmax;
     memory->create(part2grid,nmax,3,"pppm:part2grid");
     particle_map();
   }
 
   double t3 = MPI_Wtime();
 
   // all procs communicate density values from their ghost cells
   //   to fully sum contribution in their 3d bricks
   // remap from 3d decomposition to FFT decomposition
 
   cg->reverse_comm(this,REVERSE_RHO);
   brick2fft();
 
   // compute potential gradient on my FFT grid and
   //   portion of e_long on this proc's FFT grid
   // return gradients (electric fields) in 3d brick decomposition
 
   poisson();
 
   // all procs communicate E-field values
   // to fill ghost cells surrounding their 3d bricks
 
   if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD);
   else cg->forward_comm(this,FORWARD_IK);
 
   // extra per-atom energy/virial communication
 
   if (evflag_atom) {
     if (differentiation_flag == 1 && vflag_atom) 
       cg_peratom->forward_comm(this,FORWARD_AD_PERATOM);
     else if (differentiation_flag == 0)
       cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
   }
 
   poisson_time += MPI_Wtime()-t3;
 
   // calculate the force on my particles
 
   FFT_SCALAR qscale = force->qqrd2e * scale;
   PPPM_GPU_API(interp)(qscale);
 
   // per-atom energy/virial
   // energy includes self-energy correction
 
   if (evflag_atom) fieldforce_peratom();
 
   // sum energy across procs and add in volume-dependent term
 
   if (eflag_global) {
     double energy_all;
     MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
     energy = energy_all;
 
     energy *= 0.5*volume;
     energy -= g_ewald*qsqsum/1.772453851 +
       MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
     energy *= qscale;
   }
 
   // sum virial across procs
 
   if (vflag_global) {
     double virial_all[6];
     MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
     for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
   }
 
   // per-atom energy/virial
   // energy includes self-energy correction
 
   if (evflag_atom) {
     double *q = atom->q;
     int nlocal = atom->nlocal;
 
     if (eflag_atom) {
       for (i = 0; i < nlocal; i++) {
         eatom[i] *= 0.5;
         eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
           (g_ewald*g_ewald*volume);
         eatom[i] *= qscale;
       }
     }
 
     if (vflag_atom) {
       for (i = 0; i < nlocal; i++)
         for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale;
     }
   }
 
   // 2d slab correction
 
   if (slabflag) slabcorr();
 
   // convert atoms back from lamda to box coords
 
   if (triclinic) domain->lamda2x(atom->nlocal);
 
   if (kspace_split) PPPM_GPU_API(forces)(atom->f);
 }
 
 /* ----------------------------------------------------------------------
    remap density from 3d brick decomposition to FFT decomposition
 ------------------------------------------------------------------------- */
 
 void PPPMGPU::brick2fft()
 {
   int n,ix,iy,iz;
 
   // copy grabs inner portion of density from 3d brick
   // remap could be done as pre-stage of FFT,
   //   but this works optimally on only double values, not complex values
 
   n = 0;
   for (iz = nzlo_in; iz <= nzhi_in; iz++)
     for (iy = nylo_in; iy <= nyhi_in; iy++)
       for (ix = nxlo_in; ix <= nxhi_in; ix++)
         density_fft[n++] = density_brick_gpu[iz][iy][ix];
 
   remap->perform(density_fft,density_fft,work1);
 }
 
 /* ----------------------------------------------------------------------
    FFT-based Poisson solver
 ------------------------------------------------------------------------- */
 
 void PPPMGPU::poisson_ik()
 {
   int i,j,k,n;
   double eng;
 
   // transform charge density (r -> k)
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work1[n++] = density_fft[i];
     work1[n++] = ZEROF;
   }
 
   fft1->compute(work1,work1,1);
 
   // if requested, compute energy and virial contribution
 
   double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
   double s2 = scaleinv*scaleinv;
 
   if (eflag_global || vflag_global) {
     if (vflag_global) {
       n = 0;
       for (i = 0; i < nfft; i++) {
         eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
         for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j];
         if (eflag_global) energy += eng;
         n += 2;
       }
     } else {
       n = 0;
       for (i = 0; i < nfft; i++) {
         energy +=
           s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
         n += 2;
       }
     }
   }
 
   // scale by 1/total-grid-pts to get rho(k)
   // multiply by Green's function to get V(k)
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work1[n++] *= scaleinv * greensfn[i];
     work1[n++] *= scaleinv * greensfn[i];
   }
 
   // extra FFTs for per-atom energy/virial
 
   if (evflag_atom) poisson_peratom();
 
   // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
   // FFT leaves data in 3d brick decomposition
   // copy it into inner portion of vdx,vdy,vdz arrays
 
   // x direction gradient
 
   n = 0;
   for (k = nzlo_fft; k <= nzhi_fft; k++)
     for (j = nylo_fft; j <= nyhi_fft; j++)
       for (i = nxlo_fft; i <= nxhi_fft; i++) {
         work2[n] = fkx[i]*work1[n+1];
         work2[n+1] = -fkx[i]*work1[n];
         n += 2;
       }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   int x_hi = nxhi_in * 4 + 3;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in * 4; i < x_hi; i+=4) {
         vd_brick[k][j][i] = work2[n];
         n += 2;
       }
 
   // y direction gradient
 
   n = 0;
   for (k = nzlo_fft; k <= nzhi_fft; k++)
     for (j = nylo_fft; j <= nyhi_fft; j++)
       for (i = nxlo_fft; i <= nxhi_fft; i++) {
         work2[n] = fky[j]*work1[n+1];
         work2[n+1] = -fky[j]*work1[n];
         n += 2;
       }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in * 4 + 1; i < x_hi; i+=4) {
         vd_brick[k][j][i] = work2[n];
         n += 2;
       }
 
   // z direction gradient
 
   n = 0;
   for (k = nzlo_fft; k <= nzhi_fft; k++)
     for (j = nylo_fft; j <= nyhi_fft; j++)
       for (i = nxlo_fft; i <= nxhi_fft; i++) {
         work2[n] = fkz[k]*work1[n+1];
         work2[n+1] = -fkz[k]*work1[n];
         n += 2;
       }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in * 4 + 2; i < x_hi; i+=4) {
         vd_brick[k][j][i] = work2[n];
         n += 2;
       }
 }
 
 /* ----------------------------------------------------------------------
    pack own values to buf to send to another proc
 ------------------------------------------------------------------------- */
 
 void PPPMGPU::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
 {
   int n = 0;
 
   if (flag == FORWARD_IK) {
     int offset;
     FFT_SCALAR *src = &vd_brick[nzlo_out][nylo_out][4*nxlo_out];
     for (int i = 0; i < nlist; i++) {
       offset = 4*list[i];
       buf[n++] = src[offset++];
       buf[n++] = src[offset++];
       buf[n++] = src[offset];
     }
   } else if (flag == FORWARD_AD) {
     FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++)
       buf[i] = src[list[i]];
   } else if (flag == FORWARD_IK_PERATOM) {
     FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       if (eflag_atom) buf[n++] = esrc[list[i]];
       if (vflag_atom) {
         buf[n++] = v0src[list[i]];
         buf[n++] = v1src[list[i]];
         buf[n++] = v2src[list[i]];
         buf[n++] = v3src[list[i]];
         buf[n++] = v4src[list[i]];
         buf[n++] = v5src[list[i]];
       }
     }
   } else if (flag == FORWARD_AD_PERATOM) {
     FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       buf[n++] = v0src[list[i]];
       buf[n++] = v1src[list[i]];
       buf[n++] = v2src[list[i]];
       buf[n++] = v3src[list[i]];
       buf[n++] = v4src[list[i]];
       buf[n++] = v5src[list[i]];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    unpack another proc's own values from buf and set own ghost values
 ------------------------------------------------------------------------- */
 
 void PPPMGPU::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
 {
   int n = 0;
 
   if (flag == FORWARD_IK) {
     int offset;
     FFT_SCALAR *dest = &vd_brick[nzlo_out][nylo_out][4*nxlo_out];
     for (int i = 0; i < nlist; i++) {
       offset = 4*list[i];
       dest[offset++] = buf[n++];
       dest[offset++] = buf[n++];
       dest[offset] = buf[n++];
     }
   } else if (flag == FORWARD_AD) {
     FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++)
       dest[list[i]] = buf[i];
   } else if (flag == FORWARD_IK_PERATOM) {
     FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       if (eflag_atom) esrc[list[i]] = buf[n++];
       if (vflag_atom) {
         v0src[list[i]] = buf[n++];
         v1src[list[i]] = buf[n++];
         v2src[list[i]] = buf[n++];
         v3src[list[i]] = buf[n++];
         v4src[list[i]] = buf[n++];
         v5src[list[i]] = buf[n++];
       }
     }
   } else if (flag == FORWARD_AD_PERATOM) {
     FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       v0src[list[i]] = buf[n++];
       v1src[list[i]] = buf[n++];
       v2src[list[i]] = buf[n++];
       v3src[list[i]] = buf[n++];
       v4src[list[i]] = buf[n++];
       v5src[list[i]] = buf[n++];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    pack ghost values into buf to send to another proc
 ------------------------------------------------------------------------- */
 
 void PPPMGPU::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
 {
   if (flag == REVERSE_RHO) {
     FFT_SCALAR *src = &density_brick_gpu[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++)
       buf[i] = src[list[i]];
   }
 }
 
 /* ----------------------------------------------------------------------
    unpack another proc's ghost values from buf and add to own values
 ------------------------------------------------------------------------- */
 
 void PPPMGPU::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
 {
   if (flag == REVERSE_RHO) {
     FFT_SCALAR *dest = &density_brick_gpu[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++)
       dest[list[i]] += buf[i];
   } 
 }
 
 /* ----------------------------------------------------------------------
    create array using offsets from pinned memory allocation
 ------------------------------------------------------------------------- */
 
 FFT_SCALAR ***PPPMGPU::create_3d_offset(int n1lo, int n1hi, int n2lo, int n2hi,
                                         int n3lo, int n3hi, const char *name,
                                         FFT_SCALAR *data, int vec_length)
 {
   int i,j;
   int n1 = n1hi - n1lo + 1;
   int n2 = n2hi - n2lo + 1;
   int n3 = n3hi - n3lo + 1;
 
   FFT_SCALAR **plane = (FFT_SCALAR **)
     memory->smalloc(n1*n2*sizeof(FFT_SCALAR *),name);
   FFT_SCALAR ***array = (FFT_SCALAR ***)
     memory->smalloc(n1*sizeof(FFT_SCALAR **),name);
 
   int n = 0;
   for (i = 0; i < n1; i++) {
     array[i] = &plane[i*n2];
     for (j = 0; j < n2; j++) {
       plane[i*n2+j] = &data[n];
       n += n3*vec_length;
     }
   }
 
   for (i = 0; i < n1*n2; i++) array[0][i] -= n3lo*vec_length;
   for (i = 0; i < n1; i++) array[i] -= n2lo;
   return array-n1lo;
 }
 
 /* ----------------------------------------------------------------------
    3d memory offsets
 ------------------------------------------------------------------------- */
 
 void PPPMGPU::destroy_3d_offset(FFT_SCALAR ***array, int n1_offset,
                                  int n2_offset)
 {
   if (array == NULL) return;
   memory->sfree(&array[n1_offset][n2_offset]);
   memory->sfree(array + n1_offset);
 }
 
 
 /* ----------------------------------------------------------------------
    memory usage of local arrays
 ------------------------------------------------------------------------- */
 
 double PPPMGPU::memory_usage()
 {
   double bytes = PPPM::memory_usage();
 
   // NOTE: add tallying here for density_brick_gpu and vd_brick
   //       could subtract out density_brick and vdxyz_brick if freed them above
   //       it the net efffect is zero, do nothing
 
   return bytes + PPPM_GPU_API(bytes)();
 }
 
 /* ----------------------------------------------------------------------
    perform and time the 1d FFTs required for N timesteps
 ------------------------------------------------------------------------- */
 
 int PPPMGPU::timing_1d(int n, double &time1d)
 {
   if (im_real_space) {
     time1d = 1.0;
     return 4;
   }
   PPPM::timing_1d(n,time1d);
   return 4;
 }
 
 /* ----------------------------------------------------------------------
    perform and time the 3d FFTs required for N timesteps
 ------------------------------------------------------------------------- */
 
 int PPPMGPU::timing_3d(int n, double &time3d)
 {
   if (im_real_space) {
     time3d = 1.0;
     return 4;
   }
   PPPM::timing_3d(n,time3d);
   return 4;
 }
 
 /* ----------------------------------------------------------------------
    adjust PPPM coeffs, called initially and whenever volume has changed
 ------------------------------------------------------------------------- */
 
 void PPPMGPU::setup()
 {
   if (im_real_space) return;
   PPPM::setup();
 }
diff --git a/src/GPU/pppm_gpu.h b/src/GPU/pppm_gpu.h
index 0ac2afe89..c6a3f45e7 100644
--- a/src/GPU/pppm_gpu.h
+++ b/src/GPU/pppm_gpu.h
@@ -1,107 +1,103 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef KSPACE_CLASS
 
 KSpaceStyle(pppm/gpu,PPPMGPU)
 
 #else
 
 #ifndef LMP_PPPM_GPU_H
 #define LMP_PPPM_GPU_H
 
 #include "pppm.h"
 
 namespace LAMMPS_NS {
 
 class PPPMGPU : public PPPM {
  public:
   PPPMGPU(class LAMMPS *, int, char **);
   virtual ~PPPMGPU();
   void init();
   void setup();
   void compute(int, int);
   int timing_1d(int, double &);
   int timing_3d(int, double &);
   double memory_usage();
 
  protected:
   FFT_SCALAR ***density_brick_gpu, ***vd_brick;
   bool kspace_split, im_real_space;
   int old_nlocal;
   double poisson_time;
 
   void brick2fft();
   virtual void poisson_ik();
 
   void pack_forward(int, FFT_SCALAR *, int, int *);
   void unpack_forward(int, FFT_SCALAR *, int, int *);
   void pack_reverse(int, FFT_SCALAR *, int, int *);
   void unpack_reverse(int, FFT_SCALAR *, int, int *);
 
   FFT_SCALAR ***create_3d_offset(int, int, int, int, int, int, const char *,
                                  FFT_SCALAR *, int);
   void destroy_3d_offset(FFT_SCALAR ***, int, int);
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Cannot currently use pppm/gpu with fix balance.
 
 Self-explanatory.
 
 E: Cannot (yet) do analytic differentiation with pppm/gpu
 
-UNDOCUMENTED
+This is a current restriction of this command.
 
 E: Cannot use order greater than 8 with pppm/gpu.
 
 Self-explanatory.
 
 E: Insufficient memory on accelerator
 
 There is insufficient memory on one of the devices specified for the gpu
 package
 
 E: Out of range atoms - cannot compute PPPM
 
 One or more atoms are attempting to map their charge to a PPPM grid
 point that is not owned by a processor.  This is likely for one of two
 reasons, both of them bad.  First, it may mean that an atom near the
 boundary of a processor's sub-domain has moved more than 1/2 the
 "neighbor skin distance"_neighbor.html without neighbor lists being
 rebuilt and atoms being migrated to new processors.  This also means
 you may be missing pairwise interactions that need to be computed.
 The solution is to change the re-neighboring criteria via the
 "neigh_modify"_neigh_modify command.  The safest settings are "delay 0
 every 1 check yes".  Second, it may mean that an atom has moved far
 outside a processor's sub-domain or even the entire simulation box.
 This indicates bad physics, e.g. due to highly overlapping atoms, too
 large a timestep, etc.
 
-U: Cannot (yet) do analytic differentiation with pppm/gpu.
-
-Self-explanatory.
-
 */
diff --git a/src/KIM/pair_kim.cpp b/src/KIM/pair_kim.cpp
index e14c0b41e..58c63f2dc 100644
--- a/src/KIM/pair_kim.cpp
+++ b/src/KIM/pair_kim.cpp
@@ -1,1139 +1,1139 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Ryan S. Elliott,
                          Valeriu Smirichinski,
                          Ellad Tadmor (U Minn)
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Designed for use with the openkim-api-v1.1.0 (and newer) package
 ------------------------------------------------------------------------- */
 
 #include <cstring>
 #include <cstdlib>
 
 // includes from LAMMPS
 #include "pair_kim.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "update.h"
 #include "memory.h"
 #include "domain.h"
 #include "error.h"
 
 // includes from KIM
 #include "KIM_API.h"
 #include "KIM_API_status.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairKIM::PairKIM(LAMMPS *lmp) :
    Pair(lmp),
    kim_modelname(0),
    lmps_map_types_to_unique(0),
    lmps_unique_elements(0),
    lmps_num_unique_elements(0),
    lmps_units(METAL),
    pkim(0),
    kim_ind_coordinates(-1),
    kim_ind_numberOfParticles(-1),
    kim_ind_numberContributingParticles(-1),
    kim_ind_numberParticleTypes(-1),
    kim_ind_particleTypes(-1),
    kim_ind_get_neigh(-1),
    kim_ind_neighObject(-1),
    kim_ind_cutoff(-1),
    kim_ind_energy(-1),
    kim_ind_particleEnergy(-1),
    kim_ind_forces(-1),
    kim_ind_virial(-1),
    kim_ind_particleVirial(-1),
    kim_particle_codes(0),
    lmps_local_tot_num_atoms(0),   
    kim_global_cutoff(0.0),
    lmps_maxalloc(0),
    kim_particleTypes(0),
    lmps_force_tmp(0),
    lmps_stripped_neigh_list(0),
    kim_iterator_position(0)
 {
    // Initialize Pair data members to appropriate values
    single_enable = 0;  // We do not provide the Single() function
    restartinfo = 0;    // We do not write any restart info
    one_coeff = 1;      // We only allow one coeff * * call
 
    // BEGIN: initial values that determine the KIM state 
    // (used by kim_free(), etc.)
    kim_model_init_ok = false;
    kim_init_ok = false;
    // END
 
    // allocate enough memory to ensure we are safe (by using neighbor->oneatom)
    memory->create(Rij,3*(neighbor->oneatom),"pair:Rij");
 
    return;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairKIM::~PairKIM()
 {
    // clean up kim_modelname
    if (kim_modelname != 0) delete [] kim_modelname;
 
    // clean up lammps atom type number to unique particle names mapping
    if (lmps_unique_elements)
       for (int i = 0; i < lmps_num_unique_elements; i++) 
         delete [] lmps_unique_elements[i];
    delete [] lmps_unique_elements;
 
    // clean up local memory used to support KIM interface
    memory->destroy(kim_particleTypes);
    memory->destroy(lmps_force_tmp);
    memory->destroy(lmps_stripped_neigh_list);
 
    // clean up allocated memory for standard Pair class usage
    // also, we allocate lmps_map_types_to_uniuqe in the allocate() function
    if (allocated) {
       memory->destroy(setflag);
       memory->destroy(cutsq);
       delete [] lmps_map_types_to_unique;
    }
 
    // clean up Rij array
    memory->destroy(Rij);
 
    // clean up KIM interface (if necessary)
    kim_free();
 
    return;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairKIM::compute(int eflag , int vflag)
 {
    int kimerror;
 
    if (eflag || vflag)
       ev_setup(eflag,vflag);
    else
       ev_unset();
 
    // grow kim_particleTypes array if necessary
    // needs to be atom->nmax in length
    if (atom->nmax > lmps_maxalloc) {
       memory->destroy(kim_particleTypes);
       memory->destroy(lmps_force_tmp);
       
       lmps_maxalloc = atom->nmax;
       memory->create(kim_particleTypes,lmps_maxalloc,"pair:kim_particleTypes");
       memory->create(lmps_force_tmp,lmps_maxalloc,3,"pair:lmps_force_tmp");
    }
 
    // kim_particleTypes = KIM atom type for each LAMMPS atom
    // set ielement to valid 0 if lmps_map_types_to_unique[] stores an un-used -1
 
    int *type = atom->type;
    int nall = atom->nlocal + atom->nghost;
    int ielement;
 
    for (int i = 0; i < nall; i++) {
       ielement = lmps_map_types_to_unique[type[i]];
       ielement = MAX(ielement,0);
       // @@ this (above line) provides bogus info 
       // (when lmps_map_types_to_unique[type[i]]==-1) to KIM, but I guess
       // @@ this only happens when lmps_hybrid==true, 
       // and we are sure that iterator mode will
       // @@ not use these atoms.... (?)
       kim_particleTypes[i] = kim_particle_codes[ielement];
    }
 
    // pass current atom pointers to KIM
    set_volatiles();
 
    pkim->setm_compute_by_index(&kimerror,3*3,
                                kim_ind_particleEnergy, eflag_atom,
                                (int) kim_model_has_particleEnergy,
                                kim_ind_particleVirial, vflag_atom,
                                (int) kim_model_has_particleVirial,
                                kim_ind_virial, vflag_global!=0, 
                                no_virial_fdotr_compute);
    kim_error(__LINE__,"setm_compute_by_index",kimerror);
 
    // compute via KIM model
    kimerror = pkim->model_compute();
    kim_error(__LINE__,"PairKIM::pkim->model_compute() error",kimerror);
    // assemble force and particleVirial if needed
    if (!lmps_using_newton) comm->reverse_comm_pair(this);
 
    // sum lmps_force_tmp to f if running in hybrid mode
    if (lmps_hybrid) {
       double **f = atom->f;
       for (int i = 0; i < nall; i++) {
          f[i][0] += lmps_force_tmp[i][0];
          f[i][1] += lmps_force_tmp[i][1];
          f[i][2] += lmps_force_tmp[i][2];
       }
    }
 
    if ((no_virial_fdotr_compute == 1) && (vflag_global))
    {  // flip sign and order of virial if KIM is computing it
       for (int i = 0; i < 3; ++i) virial[i] = -1.0*virial[i];
       double tmp = virial[3];
       virial[3] = -virial[5];
       virial[4] = -virial[4];
       virial[5] = -tmp;
    }
    else
    {  // compute virial via LAMMPS fdotr mechanism
       if (vflag_fdotr) virial_fdotr_compute();
    }
 
    if ((kim_model_has_particleVirial) && (vflag_atom))
    {  // flip sign and order of virial if KIM is computing it
       double tmp;
       for (int i = 0; i < nall; ++i)
       {
          for (int j = 0; j < 3; ++j) vatom[i][j] = -1.0*vatom[i][j];
          tmp = vatom[i][3];
          vatom[i][3] = -vatom[i][5];
          vatom[i][4] = -vatom[i][4];
          vatom[i][5] = -tmp;
       }
    }
 
    return;
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairKIM::allocate()
 {
    int n = atom->ntypes;
    
    // allocate standard Pair class arrays
    memory->create(setflag,n+1,n+1,"pair:setflag");
    memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
    // allocate mapping array
    lmps_map_types_to_unique = new int[n+1];
 
    allocated = 1;
 
    return;
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairKIM::settings(int narg, char **arg)
 {
    // This is called when "pair_style kim ..." is read from input
    // may be called multiple times
 
    if (narg != 2) error->all(FLERR,"Illegal pair_style command");
    // arg[0] is the virial handling option: "LAMMPSvirial" or "KIMvirial"
    // arg[1] is the KIM Model name
 
    // ensure we are in a clean state for KIM (needed on repeated call)
    // first time called will do nothing...
    kim_free();
 
    // make sure things are allocated
    if (allocated != 1) allocate();
 
    // clear setflag to ensure coeff() is called after settings()
    int n = atom->ntypes;
    for (int i = 1; i <= n; i++)
       for (int j = i; j <= n; j++)
          setflag[i][j] = 0;
 
    // set virial handling
    if (strcmp(arg[0],"LAMMPSvirial") == 0)
    {
       no_virial_fdotr_compute = 0;
    }
    else if (strcmp(arg[0],"KIMvirial") == 0)
    {
       no_virial_fdotr_compute = 1;
    }
    else
    {
       error->all(FLERR,"Unrecognized virial argument in pair_style command");
    }
 
    // set KIM Model name
    int nmlen = strlen(arg[1]);
    if (kim_modelname != 0)
    {
       delete [] kim_modelname;
       kim_modelname = 0;
    }
    kim_modelname = new char[nmlen+1];
    strcpy(kim_modelname, arg[1]);
 
    return;
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairKIM::coeff(int narg, char **arg)
 {
    // This is called when "pair_coeff ..." is read from input
    // may be called multiple times
    
    int i,j,n;
 
    if (!allocated) allocate();
 
    if (narg != 2 + atom->ntypes)
       error->all(FLERR,"Incorrect args for pair coefficients");
 
    // ensure I,J args are * *
 
    if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
       error->all(FLERR,"Incorrect args for pair coefficients");
 
    // read args that map atom types to KIM elements
    // lmps_map_types_to_unique[i] = 
    // which element the Ith atom type is, -1 if NULL
    // lmps_num_unique_elements = # of unique elements
    // lmps_unique_elements = list of element names
 
    // if called multiple times: update lmps_unique_elements
    if (lmps_unique_elements) {
       for (i = 0; i < lmps_num_unique_elements; i++) 
         delete [] lmps_unique_elements[i];
       delete [] lmps_unique_elements;
    }
    lmps_unique_elements = new char*[atom->ntypes];
    for (i = 0; i < atom->ntypes; i++) lmps_unique_elements[i] = 0;
 
    lmps_num_unique_elements = 0;
    for (i = 2; i < narg; i++) {
       if (strcmp(arg[i],"NULL") == 0) {
          if (!lmps_hybrid) 
            error->all(FLERR,"Invalid args for non-hybrid pair coefficients");
          lmps_map_types_to_unique[i-1] = -1;
          continue;
       }
       for (j = 0; j < lmps_num_unique_elements; j++)
          if (strcmp(arg[i],lmps_unique_elements[j]) == 0) break;
       lmps_map_types_to_unique[i-1] = j;
       if (j == lmps_num_unique_elements) {
          n = strlen(arg[i]) + 1;
          lmps_unique_elements[j] = new char[n];
          strcpy(lmps_unique_elements[j],arg[i]);
          lmps_num_unique_elements++;
       }
    }
 
    // clear setflag since coeff() called once with I,J = * *
    n = atom->ntypes;
    for (int i = 1; i <= n; i++)
       for (int j = i; j <= n; j++)
          setflag[i][j] = 0;
 
    // set setflag i,j for type pairs where both are mapped to elements
    int count = 0;
    for (int i = 1; i <= n; i++)
       for (int j = i; j <= n; j++)
          if (lmps_map_types_to_unique[i] >= 0 && 
              lmps_map_types_to_unique[j] >= 0) {
             setflag[i][j] = 1;
             count++;
          }
    if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 
    return;
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairKIM::init_style()
 {
    // This is called for each "run ...", "minimize ...", etc. read from input
 
    if (domain->dimension != 3)
       error->all(FLERR,"PairKIM only works with 3D problems.");
 
    // set lmps_* bool flags
    set_lmps_flags();
    
    int kimerror;
    // KIM and Model initialization (only once)
    // also sets kim_ind_* and kim_* bool flags
    if (!kim_init_ok)
    {
       kim_init();
       kimerror = pkim->model_init();
       if (kimerror != KIM_STATUS_OK)
          kim_error(__LINE__, "KIM API:model_init() failed", kimerror);
       else
          kim_model_init_ok = true;
    }
 
    // request none, half, or full neighbor list
    // depending on KIM model requirement
 
    int irequest = neighbor->request(this);
    if (kim_model_using_cluster)
    {
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->full = 0;
    }
    else
    {
       // make sure comm_reverse expects (at most) 9 values when newton is off
       if (!lmps_using_newton) comm_reverse_off = 9;
 
       if (kim_model_using_half)
       {
          neighbor->requests[irequest]->half = 1;
          neighbor->requests[irequest]->full = 0;
          // make sure half lists also include local-ghost pairs
          if (lmps_using_newton) neighbor->requests[irequest]->newton = 2;
       }
       else
       {
          neighbor->requests[irequest]->half = 0;
          neighbor->requests[irequest]->full = 1;
          // make sure full lists also include local-ghost pairs
          if (lmps_using_newton) neighbor->requests[irequest]->newton = 0;
       }
    }
 
    return;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairKIM::init_one(int i, int j)
 {
    // This is called once of each (unordered) i,j pair for each
    // "run ...", "minimize ...", etc. read from input
 
    if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
 
    return kim_global_cutoff;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairKIM::pack_reverse_comm(int n, int first, double *buf)
 {
    int i,m,last;
    double *fp;
    if (lmps_hybrid) fp = &(lmps_force_tmp[0][0]);
    else fp = &(atom->f[0][0]);
 
    m = 0;
    last = first + n;
    if ((kim_model_has_forces) && ((vflag_atom == 0) || 
                                   (!kim_model_has_particleVirial)))
    {
       for (i = first; i < last; i++)
       {
          buf[m++] = fp[3*i+0];
          buf[m++] = fp[3*i+1];
          buf[m++] = fp[3*i+2];
       }
       return 3;
    }
    else if ((kim_model_has_forces) && (vflag_atom == 1) && 
             (kim_model_has_particleVirial))
    {
       double *va=&(vatom[0][0]);
       for (i = first; i < last; i++)
       {
          buf[m++] = fp[3*i+0];
          buf[m++] = fp[3*i+1];
          buf[m++] = fp[3*i+2];
 
          buf[m++] = va[6*i+0];
          buf[m++] = va[6*i+1];
          buf[m++] = va[6*i+2];
          buf[m++] = va[6*i+3];
          buf[m++] = va[6*i+4];
          buf[m++] = va[6*i+5];
       }
       return 9;
    }
    else if ((!kim_model_has_forces) && (vflag_atom == 1) && 
             (kim_model_has_particleVirial))
    {
       double *va=&(vatom[0][0]);
       for (i = first; i < last; i++)
       {
          buf[m++] = va[6*i+0];
          buf[m++] = va[6*i+1];
          buf[m++] = va[6*i+2];
          buf[m++] = va[6*i+3];
          buf[m++] = va[6*i+4];
          buf[m++] = va[6*i+5];
       }
       return 6;
    }
    else
       return 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairKIM::unpack_reverse_comm(int n, int *list, double *buf)
 {
    int i,j,m;
    double *fp;
    if (lmps_hybrid) fp = &(lmps_force_tmp[0][0]);
    else fp = &(atom->f[0][0]);
 
    m = 0;
    if ((kim_model_has_forces) && ((vflag_atom == 0) || 
                                   (!kim_model_has_particleVirial)))
    {
       for (i = 0; i < n; i++)
       {
          j = list[i];
          fp[3*j+0]+= buf[m++];
          fp[3*j+1]+= buf[m++];
          fp[3*j+2]+= buf[m++];
       }
    }
    else if ((kim_model_has_forces) && (vflag_atom == 1) && 
             (kim_model_has_particleVirial))
    {
       double *va=&(vatom[0][0]);
       for (i = 0; i < n; i++)
       {
          j = list[i];
          fp[3*j+0]+= buf[m++];
          fp[3*j+1]+= buf[m++];
          fp[3*j+2]+= buf[m++];
 
          va[j*6+0]+=buf[m++];
          va[j*6+1]+=buf[m++];
          va[j*6+2]+=buf[m++];
          va[j*6+3]+=buf[m++];
          va[j*6+4]+=buf[m++];
          va[j*6+5]+=buf[m++];
       }
    }
    else if ((!kim_model_has_forces) && (vflag_atom == 1) && 
             (kim_model_has_particleVirial))
    {
       double *va=&(vatom[0][0]);
       for (i = 0; i < n; i++)
       {
          j = list[i];
          va[j*6+0]+=buf[m++];
          va[j*6+1]+=buf[m++];
          va[j*6+2]+=buf[m++];
          va[j*6+3]+=buf[m++];
          va[j*6+4]+=buf[m++];
          va[j*6+5]+=buf[m++];
       }
    }
    else
       ;// do nothing
 
    return;
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 double PairKIM::memory_usage()
 {
    double bytes = lmps_maxalloc * sizeof(int);
    return bytes;
 }
 
 /* ----------------------------------------------------------------------
    KIM-specific interface
 ------------------------------------------------------------------------- */
 
 void PairKIM::kim_error(int ln, const char* msg, int errcode)
 {
    if (errcode == KIM_STATUS_OK) return;
    KIM_API_model::report_error(ln,(char *) __FILE__, (char *) msg,errcode);
    error->all(__FILE__,ln,"Internal KIM error");
 
    return;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairKIM::get_neigh(void **kimmdl,int *mode,int *request,
                        int *atom, int *numnei, int **nei1atom, double **pRij)
 {
    KIM_API_model *pkim = (KIM_API_model *) *kimmdl;
 
    int kimerror;
    PairKIM *self = (PairKIM *) pkim->get_test_buffer(&kimerror);
 
    *pRij = &(self->Rij[0]);
 
    // subvert KIM api by using direct access to self->list
    //
    // get neighObj from KIM API obj
    // NeighList * neiobj = (NeighList * ) 
    // (*pkim).get_data_by_index(self->kim_ind_neighObject, &kimerror);
    NeighList * neiobj = self->list;
 
    // subvert KIM api by using direct acces to self->lmps_local_tot_num_atoms
    //
    //int * pnAtoms = (int *)
    // (*pkim).get_data_by_index(self->kim_ind_numberOfParticles, &kimerror);
    //int nAtoms = *pnAtoms;
    int nAtoms = self->lmps_local_tot_num_atoms;
 
    int j, jj, inum, *ilist, *numneigh, **firstneigh;
    inum = neiobj->inum;             //# of I atoms neighbors are stored for
    ilist = neiobj->ilist;           //local indices of I atoms
    numneigh = neiobj->numneigh;     // # of J neighbors for each I atom
    firstneigh = neiobj->firstneigh; // ptr to 1st J int value of each I atom
 
    if (*mode==0){ //iterator mode
       if (*request==1) { //increment iterator
          if (self->kim_iterator_position < inum) {
             *atom = ilist[self->kim_iterator_position];
             *numnei = numneigh[*atom];
 
             // strip off neighbor mask for molecular systems
             if (!self->lmps_using_molecular)
                *nei1atom = firstneigh[*atom];
             else
             {
                int n = *numnei;
                int *ptr = firstneigh[*atom];
                int *lmps_stripped_neigh_list = self->lmps_stripped_neigh_list;
                for (int i = 0; i < n; i++)
                   lmps_stripped_neigh_list[i] = *(ptr++) & NEIGHMASK;
                *nei1atom = lmps_stripped_neigh_list;
             }
 
             // set Rij if needed
             if (self->kim_model_using_Rij) {
                double* x = (double *) 
                  (*pkim).get_data_by_index(self->kim_ind_coordinates, 
                                            &kimerror);
                for (jj=0; jj < *numnei; jj++) {
                   int i = *atom;
                   j = (*nei1atom)[jj];
                   self->Rij[jj*3 +0] = -x[i*3+0] + x[j*3+0];
                   self->Rij[jj*3 +1] = -x[i*3+1] + x[j*3+1];
                   self->Rij[jj*3 +2] = -x[i*3+2] + x[j*3+2];
                }
             }
 
             // increment iterator
             self->kim_iterator_position++;
 
             return KIM_STATUS_OK; //successful increment
          } else if (self->kim_iterator_position == inum) {
             *numnei = 0;
             return KIM_STATUS_NEIGH_ITER_PAST_END; //reached end by iterator
          } else if (self->kim_iterator_position > inum || inum < 0){
             self->error->one(FLERR, "KIM neighbor iterator exceeded range");
          }
       } else if (*request == 0){ //restart iterator
          self->kim_iterator_position = 0;
          *numnei = 0;
          return KIM_STATUS_NEIGH_ITER_INIT_OK; //succsesful restart
       }
    } else if (*mode == 1){//locator mode
       //...
       if (*request < inum) {
          *atom = *request;
          *numnei = numneigh[*atom];
 
          // strip off neighbor mask for molecular systems
          if (!self->lmps_using_molecular)
             *nei1atom = firstneigh[*atom];
          else
          {
             int n = *numnei;
             int *ptr = firstneigh[*atom];
             int *lmps_stripped_neigh_list = self->lmps_stripped_neigh_list;
             for (int i = 0; i < n; i++)
                lmps_stripped_neigh_list[i] = *(ptr++) & NEIGHMASK;
             *nei1atom = lmps_stripped_neigh_list;
          }
 
          // set Rij if needed
          if (self->kim_model_using_Rij){
             double* x = (double *) 
               (*pkim).get_data_by_index(self->kim_ind_coordinates, &kimerror);
             for(int jj=0; jj < *numnei; jj++){
                int i = *atom;
                int j = (*nei1atom)[jj];
                self->Rij[jj*3 +0] = -x[i*3+0] + x[j*3+0];
                self->Rij[jj*3 +1] = -x[i*3+1] + x[j*3+1];
                self->Rij[jj*3 +2] = -x[i*3+2] + x[j*3+2];
             }
          }
          return KIM_STATUS_OK; //successful end
       }
       else if (*request >= nAtoms || inum < 0)
          return KIM_STATUS_NEIGH_INVALID_REQUEST;
       else if (*request >= inum) {
          *atom = *request;
          *numnei = 0;
          return KIM_STATUS_OK; //successfull but no neighbors in the list
       }
    } else return KIM_STATUS_NEIGH_INVALID_MODE; //invalid mode
 
    return -16; //should not get here: unspecified error
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairKIM::kim_free()
 {
    int kimerror;
 
    if (kim_model_init_ok)
    {
       kimerror = pkim->model_destroy();
       kim_model_init_ok = false;
    }
    if (kim_init_ok)
    {
       pkim->free(&kimerror);
       kim_init_ok = false;
    }
    if (pkim != 0)
    {
       delete pkim;
       pkim = 0;
    }
    if (kim_particle_codes_ok)
    {
       delete [] kim_particle_codes;
       kim_particle_codes = 0;
       kim_particle_codes_ok = false;
    }
 
    return;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairKIM::kim_init()
 {
    int kimerror;
 
    // determine KIM Model capabilities (used in this function below)
    set_kim_model_has_flags();
 
    // create appropriate KIM descriptor file
    char* test_descriptor_string = 0;
    // allocate memory for test_descriptor_string and write descriptor file
    write_descriptor(&test_descriptor_string);
 
    // initialize KIM model
    pkim = new KIM_API_model();
    kimerror = pkim->string_init(test_descriptor_string, kim_modelname);
    if (kimerror != KIM_STATUS_OK)
       kim_error(__LINE__,"KIM initialization failed", kimerror);
    else
    {
       kim_init_ok = true;
       delete [] test_descriptor_string;
       test_descriptor_string = 0;
    }
 
    // determine kim_model_using_* true/false values
    //
    // check for half or full list
    kim_model_using_half = (pkim->is_half_neighbors(&kimerror));
    //
    char * NBC_method =(char *) pkim->get_NBC_method(&kimerror);
    kim_error(__LINE__,"NBC method not set",kimerror);
    // check for CLUSTER mode
    kim_model_using_cluster = (strcmp(NBC_method,"CLUSTER")==0);
    // check if Rij needed for get_neigh
    kim_model_using_Rij = (strcmp(NBC_method,"NEIGH_RVEC_F")==0);
    free((void*)NBC_method);
 
    // get correct index of each variable in kim_api object
    pkim->getm_index(&kimerror, 3*13,
                     "coordinates",                 &kim_ind_coordinates,                 1,
                     "cutoff",                      &kim_ind_cutoff,                      1,
                     "numberOfParticles",           &kim_ind_numberOfParticles,           1,
                     "numberParticleTypes",         &kim_ind_numberParticleTypes,         1,
                     "particleTypes",               &kim_ind_particleTypes,               1,
                     "numberContributingParticles", &kim_ind_numberContributingParticles, kim_model_using_half,
                     "particleEnergy",              &kim_ind_particleEnergy,              (int) kim_model_has_particleEnergy,
                     "energy",                      &kim_ind_energy,                      (int) kim_model_has_energy,
                     "forces",                      &kim_ind_forces,                      (int) kim_model_has_forces,
                     "neighObject",                 &kim_ind_neighObject,                 (int) !kim_model_using_cluster,
                     "get_neigh",                   &kim_ind_get_neigh,                   (int) !kim_model_using_cluster,
                     "particleVirial",              &kim_ind_particleVirial,              (int) kim_model_has_particleVirial,
                     "virial",                      &kim_ind_virial,                      no_virial_fdotr_compute);
    kim_error(__LINE__,"getm_index",kimerror);
 
    // setup mapping between LAMMPS unique elements and KIM particle type codes
    kim_particle_codes = new int[lmps_num_unique_elements];
    kim_particle_codes_ok = true;
    for(int i = 0; i < lmps_num_unique_elements; i++){
       int kimerror;
       kim_particle_codes[i] = pkim->get_partcl_type_code(lmps_unique_elements[i], &kimerror);
       kim_error(__LINE__, "create_kim_particle_codes: symbol not found ", kimerror);
    }
 
    // set pointer values in KIM API object that will not change during run
    set_statics();
 
    return;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairKIM::set_statics()
 {
    // set total number of atoms
    lmps_local_tot_num_atoms = (int) (atom->nghost + atom->nlocal);
 
    int kimerror;
    pkim->setm_data_by_index(&kimerror, 4*7,
                             kim_ind_numberParticleTypes,         1, (void *) &(atom->ntypes),            1,
                             kim_ind_cutoff,                      1, (void *) &(kim_global_cutoff),       1,
                             kim_ind_numberOfParticles,           1, (void *) &lmps_local_tot_num_atoms,  1,
                             kim_ind_numberContributingParticles, 1, (void *) &(atom->nlocal),            (int) kim_model_using_half,
                             kim_ind_energy,                      1, (void *) &(eng_vdwl),                (int) kim_model_has_energy,
                             kim_ind_get_neigh,                   1, (void *) &get_neigh,                 (int) !kim_model_using_cluster,
                             kim_ind_virial,                      1, (void *) &(virial[0]),               no_virial_fdotr_compute);
    kim_error(__LINE__, "setm_data_by_index", kimerror);
 
    pkim->set_test_buffer((void *)this, &kimerror);
    kim_error(__LINE__, "set_test_buffer", kimerror);
 
    return;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairKIM::set_volatiles()
 {
    int kimerror;
    lmps_local_tot_num_atoms = (int) (atom->nghost + atom->nlocal);
    intptr_t nall = (intptr_t) lmps_local_tot_num_atoms;
 
    pkim->setm_data_by_index(&kimerror, 4*2,
                             kim_ind_coordinates,    3*nall, (void*) &(atom->x[0][0]),  1,
                             kim_ind_particleTypes,  nall,   (void*) kim_particleTypes, 1);
    kim_error(__LINE__, "setm_data_by_index", kimerror);
 
    if (kim_model_has_particleEnergy && (eflag_atom == 1))
    {
       kimerror = pkim->set_data_by_index(kim_ind_particleEnergy, nall, (void*) eatom);
       kim_error(__LINE__, "set_data_by_index", kimerror);
    }
 
    if (kim_model_has_particleVirial && (vflag_atom == 1))
    {
       kimerror = pkim->set_data_by_index(kim_ind_particleVirial, 6*nall, (void*) &(vatom[0][0]));
       kim_error(__LINE__, "set_data_by_index", kimerror);
    }
 
    if (kim_model_has_forces)
    {
       if (lmps_hybrid)
          kimerror = pkim->set_data_by_index(kim_ind_forces, nall*3, (void*) &(lmps_force_tmp[0][0]));
       else
          kimerror = pkim->set_data_by_index(kim_ind_forces, nall*3, (void*) &(atom->f[0][0]));
       kim_error(__LINE__, "setm_data_by_index", kimerror);
    }
 
    // subvert the KIM api by direct access to this->list in get_neigh
    //
    //if (!kim_model_using_cluster)
    //   kimerror = pkim->set_data_by_index(kim_ind_neighObject, 1, (void*) this->list);
 
    if (kim_model_has_particleVirial)
    {
       if(vflag_atom != 1) {
          pkim->set_compute_by_index(kim_ind_particleVirial, KIM_COMPUTE_FALSE, &kimerror);
       } else {
          pkim->set_compute_by_index(kim_ind_particleVirial, KIM_COMPUTE_TRUE, &kimerror);
       }
    }
 
    if (no_virial_fdotr_compute == 1)
    {
       pkim->set_compute_by_index(kim_ind_virial,
                                  ((vflag_global != 1) ? KIM_COMPUTE_FALSE : KIM_COMPUTE_TRUE),
                                  &kimerror);
    }
 
    return;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairKIM::set_lmps_flags()
 {
    // determint if newton is on or off
    lmps_using_newton = (force->newton_pair == 1);
 
    // setup lmps_stripped_neigh_list for neighbors of one atom, if needed
    lmps_using_molecular = (atom->molecular == 1);
    if (lmps_using_molecular) {
       memory->destroy(lmps_stripped_neigh_list);
       memory->create(lmps_stripped_neigh_list,neighbor->oneatom,
                      "pair:lmps_stripped_neigh_list");
    }
 
    // determine if running with pair hybrid
    lmps_hybrid = (force->pair_match("hybrid",0));
 
    // support cluster mode if everything is just right
    lmps_support_cluster = ((domain->xperiodic == 0 &&
                             domain->yperiodic == 0 &&
                             domain->zperiodic == 0
                            )
                            &&
                            (comm->nprocs == 1)
                           );
 
    // determine unit system and set lmps_units flag
    if ((strcmp(update->unit_style,"real")==0))
       lmps_units = REAL;
    else if ((strcmp(update->unit_style,"metal")==0))
       lmps_units = METAL;
    else if ((strcmp(update->unit_style,"si")==0))
       lmps_units = SI;
    else if ((strcmp(update->unit_style,"cgs")==0))
       lmps_units = CGS;
    else if ((strcmp(update->unit_style,"electron")==0))
       lmps_units = ELECTRON;
    else if ((strcmp(update->unit_style,"lj")==0))
       error->all(FLERR,"LAMMPS unit_style lj not supported by KIM models");
    else
       error->all(FLERR,"Unknown unit_style");
 
    return;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairKIM::set_kim_model_has_flags()
 {
    KIM_API_model mdl;
 
    int kimerror;
 
    // get KIM API object representing the KIM Model only
    kimerror = mdl.model_info(kim_modelname);
    kim_error(__LINE__,"KIM initialization failed.", kimerror);
 
    // determine if the KIM Model can compute the total energy
    mdl.get_index((char*) "energy", &kimerror);
    kim_model_has_energy = (kimerror == KIM_STATUS_OK);
    if (!kim_model_has_energy) 
      error->warning(FLERR,"KIM Model does not provide `energy'; "
                     "Potential energy will be zero");
 
    // determine if the KIM Model can compute the forces
    mdl.get_index((char*) "forces", &kimerror);
    kim_model_has_forces = (kimerror == KIM_STATUS_OK);
    if (!kim_model_has_forces) 
      error->warning(FLERR,"KIM Model does not provide `forces'; "
                     "Forces will be zero");
 
    // determine if the KIM Model can compute the particleEnergy
    mdl.get_index((char*) "particleEnergy", &kimerror);
    kim_model_has_particleEnergy = (kimerror == KIM_STATUS_OK);
    if (!kim_model_has_particleEnergy) 
      error->warning(FLERR,"KIM Model does not provide `particleEnergy'; "
                     "energy per atom will be zero");
 
    // determine if the KIM Model can compute the particleVerial
    mdl.get_index((char*) "particleVirial", &kimerror);
    kim_model_has_particleVirial = (kimerror == KIM_STATUS_OK);
    mdl.get_index((char*) "process_dEdr", &kimerror);
    kim_model_has_particleVirial = kim_model_has_particleVirial || 
      (kimerror == KIM_STATUS_OK);
    if (!kim_model_has_particleVirial) 
      error->warning(FLERR,"KIM Model does not provide `particleVirial'; "
                     "virial per atom will be zero");
 
    // tear down KIM API object
    mdl.free(&kimerror);
    // now destructor will do the remaining tear down for mdl
 
    return;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairKIM::write_descriptor(char** test_descriptor_string)
 {
    // allocate memory
    if (*test_descriptor_string != 0) 
-     error->all(FLERR, "test_descriptor_string already allocated.");
+     error->all(FLERR, "Test_descriptor_string already allocated");
    // assuming 75 lines at 100 characters each (should be plenty)
    *test_descriptor_string = new char[100*75]; 
    // initialize
    strcpy(*test_descriptor_string, "");
 
    // Write Test name and units
    strcat(*test_descriptor_string,
       "# This file is automatically generated from LAMMPS pair_style PairKIM command\n"
       "TEST_NAME        := test_LAMMPS\n\n"
       "\n"
       "# Base units\n");
    switch (lmps_units)
    {
       case REAL:
          strcat(*test_descriptor_string,
       "Unit_length      := A\n"
       "Unit_energy      := kcal/mol\n"
       "Unit_charge      := e\n"
       "Unit_temperature := K\n"
       "Unit_time        := fs\n\n");
       break;
       case METAL:
          strcat(*test_descriptor_string,
       "Unit_length      := A\n"
       "Unit_energy      := eV\n"
       "Unit_charge      := e\n"
       "Unit_temperature := K\n"
       "Unit_time        := ps\n\n");
       break;
       case SI:
          strcat(*test_descriptor_string,
       "Unit_length      := m\n"
       "Unit_energy      := J\n"
       "Unit_charge      := C\n"
       "Unit_temperature := K\n"
       "Unit_time        := s\n\n");
       break;
       case CGS:
          strcat(*test_descriptor_string,
       "Unit_length      := cm\n"
       "Unit_energy      := erg\n"
       "Unit_charge      := statC\n"
       "Unit_temperature := K\n"
       "Unit_time        := s\n\n");
       break;
       case ELECTRON:
          strcat(*test_descriptor_string,
       "Unit_length      := Bohr\n"
       "Unit_energy      := Hartree\n"
       "Unit_charge      := e\n"
       "Unit_temperature := K\n"
       "Unit_time        := fs\n\n");
       break;
    }
 
    // Write Supported types section
    strcat(*test_descriptor_string,
       "\n"
       "SUPPORTED_ATOM/PARTICLES_TYPES:\n"
       "# Symbol/name           Type            code\n\n");
    int code=1;
    char* tmp_line = 0;
    tmp_line = new char[100];
    for (int i=0; i < lmps_num_unique_elements; i++){
       sprintf(tmp_line, "%-24s%-16s%-3i\n", lmps_unique_elements[i], 
               "spec", code++);
       strcat(*test_descriptor_string, tmp_line);
    }
    delete [] tmp_line;
    tmp_line = 0;
    strcat(*test_descriptor_string, "\n");
 
    // Write conventions section
    strcat(*test_descriptor_string,
       "\n"
       "CONVENTIONS:\n"
       "# Name                  Type\n\n"
       "ZeroBasedLists          flag\n");
    // can use iterator or locator neighbor mode, unless in hybrid mode
    if (lmps_hybrid)
       strcat(*test_descriptor_string,
       "Neigh_IterAccess        flag\n");
    else
       strcat(*test_descriptor_string,
       "Neigh_BothAccess        flag\n\n");
 
    strcat(*test_descriptor_string,
       "NEIGH_PURE_H            flag\n"
       "NEIGH_PURE_F            flag\n"
       "NEIGH_RVEC_F            flag\n");
    // @@ add code for MI_OPBC_? support ????
    if (lmps_support_cluster)
    {
       strcat(*test_descriptor_string,
       "CLUSTER                 flag\n\n");
    }
    else
    {
       strcat(*test_descriptor_string, "\n");
    }
 
    // Write input section
    strcat(*test_descriptor_string,
       "\n"
       "MODEL_INPUT:\n"
       "# Name                         Type         Unit    Shape\n\n"
       "numberOfParticles              integer      none    []\n\n"
       "numberContributingParticles    integer      none    []\n\n"
       "numberParticleTypes            integer      none    []\n\n"
       "particleTypes                  integer      none    [numberOfParticles]\n\n"
       "coordinates                    real*8       length  [numberOfParticles,3]\n\n"
       "neighObject                    pointer      none    []\n\n"
       "get_neigh                      method       none    []\n\n");
 
    // Write output section
    strcat(*test_descriptor_string,
       "\n"
       "MODEL_OUPUT:\n"
       "# Name                         Type         Unit    Shape\n\n"
       "compute                        method       none    []\n\n"
       "destroy                        method       none    []\n\n"
       "cutoff                         real*8       length  []\n\n");
    if (kim_model_has_energy) strcat(*test_descriptor_string,
       "energy                         real*8       energy  []\n\n");
    if (kim_model_has_forces) strcat(*test_descriptor_string,
       "forces                         real*8       force   [numberOfParticles,3]\n\n");
    if (kim_model_has_particleEnergy) strcat(*test_descriptor_string,
       "particleEnergy                 real*8       energy  [numberOfParticles]\n\n");
    if (no_virial_fdotr_compute == 1) strcat(*test_descriptor_string,
       "virial                         real*8       energy  [6] \n\n");
    if (kim_model_has_particleVirial) strcat(*test_descriptor_string,
       "particleVirial                 real*8       energy  [numberOfParticles,6] \n\n");
 
    return;
 }
diff --git a/src/KIM/pair_kim.h b/src/KIM/pair_kim.h
index d2b099fb4..fcdf34500 100644
--- a/src/KIM/pair_kim.h
+++ b/src/KIM/pair_kim.h
@@ -1,222 +1,210 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Ryan S. Elliott,
                          Valeriu Smirichinski,
                          Ellad Tadmor (U Minn)
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Designed for use with the openkim-api-v1.1.0 (and newer) package
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(kim,PairKIM)
 
 #else
 
 #ifndef LMP_PAIR_KIM_H
 #define LMP_PAIR_KIM_H
 
 // includes from KIM & LAMMPS
+
 class KIM_API_model;
 #include "pair.h"
 
-
 namespace LAMMPS_NS {
 
    class PairKIM : public Pair {
    public:
       PairKIM(class LAMMPS*);
       ~PairKIM();
 
       // LAMMPS Pair class virtual function prototypes
       virtual void compute(int, int);
       virtual void settings(int, char**);
       virtual void coeff(int, char**);
       virtual void init_style();
       virtual double init_one(int, int);
       virtual int pack_reverse_comm(int, int, double*);
       virtual void unpack_reverse_comm(int, int*, double*);
       virtual double memory_usage();
 
    private:
       // (nearly) all bool flags are not initialized in constructor, but set
       // explicitly in the indicated function.  All other data members are
       // initialized in constructor
 
       // values set in settings()
       char* kim_modelname;
 
       // values set in coeff()
 
       // values set in allocate(), called by coeff()
       void allocate();
       int* lmps_map_types_to_unique;
 
       // values set in coeff(), after calling allocate()
       char** lmps_unique_elements;  // names of unique elements given in pair_coeff command
       int lmps_num_unique_elements;
 
       // values set in set_lmps_flags(), called from init_style()
       bool lmps_using_newton;
       bool lmps_using_molecular;
       bool lmps_hybrid;             // true if running with pair hybrid
       bool lmps_support_cluster;    // true if running in mode compat. with CLUSTER
       enum unit_sys {REAL, METAL, SI, CGS, ELECTRON};
       unit_sys lmps_units;
 
       // values set in set_kim_model_has_flags(), called by kim_init()
       KIM_API_model* pkim;
       bool kim_model_has_energy;
       bool kim_model_has_forces;
       bool kim_model_has_particleEnergy;
       bool kim_model_has_particleVirial;
 
       // values set in kim_init(), after call to string_init(_)
       bool kim_init_ok;
       bool kim_model_using_half;
       bool kim_model_using_cluster;
       bool kim_model_using_Rij;
       int kim_ind_coordinates;
       int kim_ind_numberOfParticles;
       int kim_ind_numberContributingParticles;
       int kim_ind_numberParticleTypes;
       int kim_ind_particleTypes;
       int kim_ind_get_neigh;
       int kim_ind_neighObject;
       int kim_ind_cutoff;
       int kim_ind_energy;
       int kim_ind_particleEnergy;
       int kim_ind_forces;
       int kim_ind_virial;
       int kim_ind_particleVirial;
 
       // values set in init_style(), after calling pkim->model_init()
       bool kim_model_init_ok;
       bool kim_particle_codes_ok;
       int *kim_particle_codes;
 
       // values set in set_statics(), called at end of kim_init(),
       //   then again in set_volatiles(), called in compute()
       int lmps_local_tot_num_atoms;
       double kim_global_cutoff;     // KIM Model cutoff value
 
       // values set in compute()
       int lmps_maxalloc;            // max allocated memory value
       int* kim_particleTypes;       // array of KIM particle types
       double** lmps_force_tmp;      // temp storage for f, when running in hybrid mode
                                     // needed to avoid reseting f to zero in each object
       int* lmps_stripped_neigh_list;// neighbors of one atom, used when LAMMPS is in
                                     // molecular mode
 
       // values used in get_neigh()
       int kim_iterator_position;    //get_neigh iterator current position
       double *Rij;
 
       // KIM specific helper functions
       void kim_error(int, const char *, int);
       void kim_init();
       void kim_free();
       void set_statics();
       void set_volatiles();
       void set_lmps_flags();
       void set_kim_model_has_flags();
       void write_descriptor(char** test_descriptor_string);
       // static methods used as callbacks from KIM
       static int get_neigh(void** kimmdl, int* mode, int* request,
                            int* atom, int* numnei, int** nei1atom, double** pRij);
    };
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Unrecognized virial argument in pair_style command
 
 Only two options are supported: LAMMPSvirial and KIMvirial
 
 E: Incorrect args for pair coefficients
 
 Self-explanatory.  Check the input script or data file.
 
 E: Invalid args for non-hybrid pair coefficients
 
 "NULL" is only supported in pair_coeff calls when using pair hybrid
 
 E: PairKIM only works with 3D problems.
 
-UNDOCUMENTED
+This is a current restriction of this pair style.
 
 E: All pair coeffs are not set
 
 All pair coefficients must be set in the data file or by the
 pair_coeff command before running a simulation.
 
 E: KIM neighbor iterator exceeded range
 
 This should not happen.  It likely indicates a bug
 in the KIM implementation of the interatomic potential
 where it is requesting neighbors incorrectly.
 
 E: LAMMPS unit_style lj not supported by KIM models
 
 Self-explanatory. Check the input script or data file.
 
 E: Unknown unit_style
 
 Self-explanatory. Check the input script or data file.
 
 W: KIM Model does not provide `energy'; Potential energy will be zero
 
 Self-explanatory.
 
 W: KIM Model does not provide `forces'; Forces will be zero
 
 Self-explanatory.
 
 W: KIM Model does not provide `particleEnergy'; energy per atom will be zero
 
 Self-explanatory.
 
 W: KIM Model does not provide `particleVirial'; virial per atom will be zero
 
 Self-explanatory.
 
-E: test_descriptor_string already allocated.
-
-UNDOCUMENTED
-
-U: PairKIM only works with 3D problems
-
-The KIM API does not explicitly support anything other than 3D problems
-
-U: Internal KIM error
-
-Self-explanatory. Check the output and kim.log file for more details.
-
-U: test_descriptor_string already allocated
+E: Test_descriptor_string already allocated
 
 This should not happen. It likely indicates a bug in the pair_kim implementation.
 
 */
diff --git a/src/KSPACE/ewald_disp.cpp b/src/KSPACE/ewald_disp.cpp
index b5ef8a9e1..8fbc2da6f 100644
--- a/src/KSPACE/ewald_disp.cpp
+++ b/src/KSPACE/ewald_disp.cpp
@@ -1,1215 +1,1214 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Pieter in 't Veld (SNL)
 ------------------------------------------------------------------------- */
 
 #include "mpi.h"
 #include "string.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "math.h"
 #include "ewald_disp.h"
 #include "math_vector.h"
 #include "math_const.h"
 #include "math_special.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "pair.h"
 #include "domain.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
 #define SMALL 0.00001
 
 
 enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER};   // same as in pair.h
 
 //#define DEBUG
 
 /* ---------------------------------------------------------------------- */
 
 EwaldDisp::EwaldDisp(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
 {
   if (narg!=1) error->all(FLERR,"Illegal kspace_style ewald/n command");
   accuracy_relative = fabs(atof(arg[0]));
   memset(function, 0, EWALD_NORDER*sizeof(int));
   kenergy = kvirial = NULL;
   cek_local = cek_global = NULL;
   ekr_local = NULL;
   hvec = NULL;
   kvec = NULL;
   B = NULL;
   first_output = 0;
   energy_self_peratom = NULL;
   virial_self_peratom = NULL;
   nmax = 0;
   q2 = 0;
   b2 = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 EwaldDisp::~EwaldDisp()
 {
   deallocate();
   deallocate_peratom();
   delete [] ekr_local;
   delete [] B;
 }
 
 /* --------------------------------------------------------------------- */
 
 void EwaldDisp::init()
 {
   nkvec = nkvec_max = nevec = nevec_max = 0;
   nfunctions = nsums = sums = 0;
   nbox = -1;
   bytes = 0.0;
 
   if (!comm->me) {
     if (screen) fprintf(screen,"EwaldDisp initialization ...\n");
     if (logfile) fprintf(logfile,"EwaldDisp initialization ...\n");
   }
 
   if (domain->dimension == 2)
     error->all(FLERR,"Cannot use EwaldDisp with 2d simulation");
   if (slabflag == 0 && domain->nonperiodic > 0)
     error->all(FLERR,"Cannot use nonperiodic boundaries with EwaldDisp");
   if (slabflag == 1) {
     if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
         domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
       error->all(FLERR,"Incorrect boundaries with slab EwaldDisp");
   }
 
   scale = 1.0;
   //mumurd2e = force->mumurd2e;
   //dielectric = force->dielectric;
   mumurd2e = dielectric = 1.0;
 
   pair_check();
 
   int tmp;
   Pair *pair = force->pair;
   int *ptr = pair ? (int *) pair->extract("ewald_order",tmp) : NULL;
   double *cutoff = pair ? (double *) pair->extract("cut_coul",tmp) : NULL;
   if (!(ptr||cutoff))
     error->all(FLERR,"KSpace style is incompatible with Pair style");
   int ewald_order = ptr ? *((int *) ptr) : 1<<1;
   int ewald_mix = ptr ? *((int *) pair->extract("ewald_mix",tmp)) : GEOMETRIC;
   memset(function, 0, EWALD_NFUNCS*sizeof(int));
   for (int i=0; i<=EWALD_NORDER; ++i)                        // transcribe order
     if (ewald_order&(1<<i)) {                                // from pair_style
       int n[] = EWALD_NSUMS, k = 0;
       char str[128];
       switch (i) {
         case 1:
           k = 0; break;
         case 3:
           k = 3; break;
         case 6:
           if (ewald_mix==GEOMETRIC) { k = 1; break; }
           else if (ewald_mix==ARITHMETIC) { k = 2; break; }
           error->all(FLERR,
                      "Unsupported mixing rule in kspace_style ewald/disp");
         default:
           error->all(FLERR,"Unsupported order in kspace_style ewald/disp");
       }
       nfunctions += function[k] = 1;
       nsums += n[k];
     }
 
   g_ewald = 0;
   pair->init();  // so B is defined
   init_coeffs();
   init_coeff_sums();
 
   double qsum, qsqsum, bsbsum;
   qsum = qsqsum = bsbsum = 0.0;
   if (function[0]) {
     qsum = sum[0].x;
     qsqsum = sum[0].x2;
   }
   if (function[1]) bsbsum = sum[1].x2;
   if (function[2]) bsbsum = sum[2].x2;
 
   if (qsqsum == 0.0 && bsbsum == 0.0)
       error->all(FLERR,"Cannot use Ewald/disp solver "
                  "on system with no charge or LJ particles");
   if (fabs(qsum) > SMALL && comm->me == 0) {
       char str[128];
       sprintf(str,"System is not charge neutral, net charge = %g",qsum);
       error->warning(FLERR,str);
   }
 
-  //set accuracy (force units) from accuracy_relative or accuracy_absolute
+  // set accuracy (force units) from accuracy_relative or accuracy_absolute
 
   if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
   else accuracy = accuracy_relative * two_charge_force;
 
   // setup K-space resolution
 
   q2 = qsqsum * force->qqrd2e / force->dielectric;
   b2 = bsbsum; //Are these units right?
   bigint natoms = atom->natoms;
 
   if (function[0]) {
     g_ewald = accuracy*sqrt(natoms*(*cutoff)*shape_det(domain->h)) / (2.0*q2);
     if (g_ewald >= 1.0)
         error->all(FLERR,"KSpace accuracy too large to estimate G vector");
     g_ewald = sqrt(-log(g_ewald)) / *cutoff;
   }
   else if (function[1] || function[2]) {
     double *cutoffLJ = pair ? (double *) pair->extract("cut_LJ",tmp) : NULL;
     //Try Newton Solver
     //Use old method to get guess
     g_ewald = (1.35 - 0.15*log(accuracy))/ *cutoffLJ;
 
     double g_ewald_new = 
       NewtonSolve(g_ewald,(*cutoffLJ),natoms,shape_det(domain->h),b2);
     if (g_ewald_new > 0.0) g_ewald = g_ewald_new;
     else error->warning(FLERR,"Ewald/disp Newton solver failed, "
                         "using old method to estimate g_ewald");
     if (g_ewald >= 1.0)
         error->all(FLERR,"KSpace accuracy too large to estimate G vector");
   }
 
   if (!comm->me) {
       if (screen) fprintf(screen, "  G vector = %g\n", g_ewald);
       if (logfile) fprintf(logfile, "  G vector = %g\n", g_ewald);
   }
 
   g_ewald_6 = g_ewald;
   deallocate_peratom();
   peratom_allocate_flag = 0;
 }
 
-
 /* ----------------------------------------------------------------------
    adjust EwaldDisp coeffs, called initially and whenever volume has changed
 ------------------------------------------------------------------------- */
 
 void EwaldDisp::setup()
 {
   volume = shape_det(domain->h)*slab_volfactor;
   memcpy(unit, domain->h_inv, sizeof(shape));
   shape_scalar_mult(unit, 2.0*MY_PI);
   unit[2] /= slab_volfactor;
 
-  //int nbox_old = nbox, nkvec_old = nkvec;
+  // int nbox_old = nbox, nkvec_old = nkvec;
 
-  if (accuracy>=1) {
+  if (accuracy >= 1) {
     nbox = 0;
     error->all(FLERR,"KSpace accuracy too low");
   }
 
   bigint natoms = atom->natoms;
   double err;
   int kxmax = 1;
   int kymax = 1;
   int kzmax = 1;
   err = rms(kxmax,domain->h[0],natoms,q2,b2);
   while (err > accuracy) {
     kxmax++;
     err = rms(kxmax,domain->h[0],natoms,q2,b2);
   }
   err = rms(kymax,domain->h[1],natoms,q2,b2);
   while (err > accuracy) {
     kymax++;
     err = rms(kymax,domain->h[1],natoms,q2,b2);
   }
   err = rms(kzmax,domain->h[2]*slab_volfactor,natoms,q2,b2);
   while (err > accuracy) {
     kzmax++;
     err = rms(kzmax,domain->h[2]*slab_volfactor,natoms,q2,b2);
   }
   nbox = MAX(kxmax,kymax);
   nbox = MAX(nbox,kzmax);
   double gsqxmx = unit[0]*unit[0]*kxmax*kxmax;
   double gsqymx = unit[1]*unit[1]*kymax*kymax;
   double gsqzmx = unit[2]*unit[2]*kzmax*kzmax;
   gsqmx = MAX(gsqxmx,gsqymx);
   gsqmx = MAX(gsqmx,gsqzmx);
   gsqmx *= 1.00001;
 
   reallocate();
   coefficients();
   init_coeffs();
   init_coeff_sums();
   init_self();
 
   if (!(first_output||comm->me)) {
     first_output = 1;
     if (screen) fprintf(screen,
                "  vectors: nbox = %d, nkvec = %d\n", nbox, nkvec);
     if (logfile) fprintf(logfile,
         "  vectors: nbox = %d, nkvec = %d\n", nbox, nkvec);
   }
 }
 
 /* ----------------------------------------------------------------------
    compute RMS accuracy for a dimension
 ------------------------------------------------------------------------- */
 
 double EwaldDisp::rms(int km, double prd, bigint natoms, double q2, double b2)
 {
   double value = 0.0;
 
   // Coulombic
 
   double g2 = g_ewald*g_ewald;
 
   value += 2.0*q2*g_ewald/prd *
     sqrt(1.0/(MY_PI*km*natoms)) *
     exp(-MY_PI*MY_PI*km*km/(g2*prd*prd));
 
   // Lennard-Jones
 
   double g7 = g2*g2*g2*g_ewald;
 
   value += 4.0*b2*g7/3.0 *
     sqrt(1.0/(MY_PI*natoms)) *
     (exp(-MY_PI*MY_PI*km*km/(g2*prd*prd)) *
     (MY_PI*km/(g_ewald*prd) + 1));
 
   return value;
 }
 
 void EwaldDisp::reallocate()
 {
   int ix, iy, iz;
   int nkvec_max = nkvec;
   vector h;
 
   nkvec = 0;
   int *kflag = new int[(nbox+1)*(2*nbox+1)*(2*nbox+1)];
   int *flag = kflag;
 
   for (ix=0; ix<=nbox; ++ix)
     for (iy=-nbox; iy<=nbox; ++iy)
       for (iz=-nbox; iz<=nbox; ++iz)
         if (!(ix||iy||iz)) *(flag++) = 0;
         else if ((!ix)&&(iy<0)) *(flag++) = 0;
         else if ((!(ix||iy))&&(iz<0)) *(flag++) = 0;        // use symmetry
         else {
           h[0] = unit[0]*ix;
           h[1] = unit[5]*ix+unit[1]*iy;
           h[2] = unit[4]*ix+unit[3]*iy+unit[2]*iz;
           if ((*(flag++) = h[0]*h[0]+h[1]*h[1]+h[2]*h[2]<=gsqmx)) ++nkvec;
         }
 
   if (nkvec>nkvec_max) {
     deallocate();                                        // free memory
     hvec = new hvector[nkvec];                                // hvec
     bytes += (nkvec-nkvec_max)*sizeof(hvector);
     kvec = new kvector[nkvec];                                // kvec
     bytes += (nkvec-nkvec_max)*sizeof(kvector);
     kenergy = new double[nkvec*nfunctions];                // kenergy
     bytes += (nkvec-nkvec_max)*nfunctions*sizeof(double);
     kvirial = new double[6*nkvec*nfunctions];                // kvirial
     bytes += 6*(nkvec-nkvec_max)*nfunctions*sizeof(double);
     cek_local = new complex[nkvec*nsums];                // cek_local
     bytes += (nkvec-nkvec_max)*nsums*sizeof(complex);
     cek_global = new complex[nkvec*nsums];                // cek_global
     bytes += (nkvec-nkvec_max)*nsums*sizeof(complex);
     nkvec_max = nkvec;
   }
 
   flag = kflag;                                           // create index and
   kvector *k = kvec;                                      // wave vectors
   hvector *hi = hvec;
   for (ix=0; ix<=nbox; ++ix)
     for (iy=-nbox; iy<=nbox; ++iy)
       for (iz=-nbox; iz<=nbox; ++iz)
         if (*(flag++)) {
           hi->x = unit[0]*ix;
           hi->y = unit[5]*ix+unit[1]*iy;
           (hi++)->z = unit[4]*ix+unit[3]*iy+unit[2]*iz;
           k->x = ix+nbox; k->y = iy+nbox; (k++)->z = iz+nbox; }
 
   delete [] kflag;
 }
 
 
 void EwaldDisp::reallocate_atoms()
 {
   if (eflag_atom || vflag_atom)
     if (atom->nlocal > nmax) {
       deallocate_peratom();
       allocate_peratom();
       nmax = atom->nmax;
     }
 
   if ((nevec = atom->nmax*(2*nbox+1))<=nevec_max) return;
   delete [] ekr_local;
   ekr_local = new cvector[nevec];
   bytes += (nevec-nevec_max)*sizeof(cvector);
   nevec_max = nevec;
 }
 
 
 void EwaldDisp::allocate_peratom()
 {
   memory->create(energy_self_peratom,
       atom->nmax,EWALD_NFUNCS,"ewald/n:energy_self_peratom");
   memory->create(virial_self_peratom,
       atom->nmax,EWALD_NFUNCS,"ewald/n:virial_self_peratom");
 }
 
 
 void EwaldDisp::deallocate_peratom()                        // free memory
 {
   memory->destroy(energy_self_peratom);
   memory->destroy(virial_self_peratom);
 }
 
 
 void EwaldDisp::deallocate()                                // free memory
 {
   delete [] hvec;                hvec = NULL;
   delete [] kvec;                kvec = NULL;
   delete [] kenergy;                kenergy = NULL;
   delete [] kvirial;                kvirial = NULL;
   delete [] cek_local;                cek_local = NULL;
   delete [] cek_global;                cek_global = NULL;
 }
 
 
 void EwaldDisp::coefficients()
 {
   vector h;
   hvector *hi = hvec, *nh;
   double eta2 = 0.25/(g_ewald*g_ewald);
   double b1, b2, expb2, h1, h2, c1, c2;
   double *ke = kenergy, *kv = kvirial;
   int func0 = function[0], func12 = function[1]||function[2],
       func3 = function[3];
 
   for (nh = (hi = hvec)+nkvec; hi<nh; ++hi) {                // wave vectors
     memcpy(h, hi, sizeof(vector));
     expb2 = exp(-(b2 = (h2 = vec_dot(h, h))*eta2));
     if (func0) {                                        // qi*qj/r coeffs
       *(ke++) = c1 = expb2/h2;
       *(kv++) = c1-(c2 = 2.0*c1*(1.0+b2)/h2)*h[0]*h[0];
       *(kv++) = c1-c2*h[1]*h[1];                        // lammps convention
       *(kv++) = c1-c2*h[2]*h[2];                        // instead of voigt
       *(kv++) = -c2*h[1]*h[0];
       *(kv++) = -c2*h[2]*h[0];
       *(kv++) = -c2*h[2]*h[1];
     }
     if (func12) {                                        // -Bij/r^6 coeffs
       b1 = sqrt(b2);                                        // minus sign folded
       h1 = sqrt(h2);                                        // into constants
       *(ke++) = c1 = -h1*h2*((c2=MY_PIS*erfc(b1))+(0.5/b2-1.0)*expb2/b1);
       *(kv++) = c1-(c2 = 3.0*h1*(c2-expb2/b1))*h[0]*h[0];
       *(kv++) = c1-c2*h[1]*h[1];                        // lammps convention
       *(kv++) = c1-c2*h[2]*h[2];                        // instead of voigt
       *(kv++) = -c2*h[1]*h[0];
       *(kv++) = -c2*h[2]*h[0];
       *(kv++) = -c2*h[2]*h[1];
     }
     if (func3) {                                        // dipole coeffs
       *(ke++) = c1 = expb2/h2;
       *(kv++) = c1-(c2 = 2.0*c1*(1.0+b2)/h2)*h[0]*h[0];
       *(kv++) = c1-c2*h[1]*h[1];                        // lammps convention
       *(kv++) = c1-c2*h[2]*h[2];                        // instead of voigt
       *(kv++) = -c2*h[1]*h[0];
       *(kv++) = -c2*h[2]*h[0];
       *(kv++) = -c2*h[2]*h[1];
     }
   }
 }
 
 void EwaldDisp::init_coeffs()
 {
   int tmp;
   int n = atom->ntypes;
 
   if (function[1]) {                                        // geometric 1/r^6
     double **b = (double **) force->pair->extract("B",tmp);
     delete [] B;
     B = new double[n+1];
     bytes += (n+1)*sizeof(double);
     for (int i=0; i<=n; ++i) B[i] = sqrt(fabs(b[i][i]));
   }
   if (function[2]) {                                        // arithmetic 1/r^6
     double **epsilon = (double **) force->pair->extract("epsilon",tmp);
     double **sigma = (double **) force->pair->extract("sigma",tmp);
     double eps_i, sigma_i, sigma_n, *bi = B = new double[7*n+7];
     double c[7] = {
       1.0, sqrt(6.0), sqrt(15.0), sqrt(20.0), sqrt(15.0), sqrt(6.0), 1.0};
 
     if (!(epsilon&&sigma))
       error->all(
-          FLERR,"epsilon or sigma reference not set by pair style in ewald/n");
+          FLERR,"Epsilon or sigma reference not set by pair style in ewald/n");
     for (int i=0; i<=n; ++i) {
       eps_i = sqrt(epsilon[i][i]);
       sigma_i = sigma[i][i];
       sigma_n = 1.0;
       for (int j=0; j<7; ++j) {
         *(bi++) = sigma_n*eps_i*c[j]; sigma_n *= sigma_i;
       }
     }
   }
 }
 
 void EwaldDisp::init_coeff_sums()
 {
   if (sums) return;                            // calculated only once
   sums = 1;
 
   Sum sum_local[EWALD_MAX_NSUMS];
 
   memset(sum_local, 0, EWALD_MAX_NSUMS*sizeof(Sum));
   if (function[0]) {                                        // 1/r
     double *q = atom->q, *qn = q+atom->nlocal;
     for (double *i=q; i<qn; ++i) {
       sum_local[0].x += i[0]; sum_local[0].x2 += i[0]*i[0]; }
   }
   if (function[1]) {                                        // geometric 1/r^6
     int *type = atom->type, *ntype = type+atom->nlocal;
     for (int *i=type; i<ntype; ++i) {
       sum_local[1].x += B[i[0]]; sum_local[1].x2 += B[i[0]]*B[i[0]]; }
   }
   if (function[2]) {                                        // arithmetic 1/r^6
     double *bi;
     int *type = atom->type, *ntype = type+atom->nlocal;
     for (int *i=type; i<ntype; ++i) {
       bi = B+7*i[0];
       sum_local[2].x2 += bi[0]*bi[6];
       for (int k=2; k<9; ++k) sum_local[k].x += *(bi++);
     }
   }
   if (function[3]&&atom->mu) {                                // dipole
     double *mu = atom->mu[0], *nmu = mu+4*atom->nlocal;
     for (double *i = mu; i < nmu; i += 4)
       sum_local[9].x2 += i[3]*i[3];
   }
   MPI_Allreduce(sum_local, sum, 2*EWALD_MAX_NSUMS, MPI_DOUBLE, MPI_SUM, world);
 }
 
 
 void EwaldDisp::init_self()
 {
   double g1 = g_ewald, g2 = g1*g1, g3 = g1*g2;
   const double qscale = force->qqrd2e * scale;
 
   memset(energy_self, 0, EWALD_NFUNCS*sizeof(double));        // self energy
   memset(virial_self, 0, EWALD_NFUNCS*sizeof(double));
 
   if (function[0]) {                                        // 1/r
     virial_self[0] = -0.5*MY_PI*qscale/(g2*volume)*sum[0].x*sum[0].x;
     energy_self[0] = sum[0].x2*qscale*g1/MY_PIS-virial_self[0];
   }
   if (function[1]) {                                        // geometric 1/r^6
     virial_self[1] = MY_PI*MY_PIS*g3/(6.0*volume)*sum[1].x*sum[1].x;
     energy_self[1] = -sum[1].x2*g3*g3/12.0+virial_self[1];
   }
   if (function[2]) {                                        // arithmetic 1/r^6
     virial_self[2] = MY_PI*MY_PIS*g3/(48.0*volume)*(sum[2].x*sum[8].x+
         sum[3].x*sum[7].x+sum[4].x*sum[6].x+0.5*sum[5].x*sum[5].x);
     energy_self[2] = -sum[2].x2*g3*g3/3.0+virial_self[2];
   }
   if (function[3]) {                                        // dipole
     virial_self[3] = 0;                                        // in surface
     energy_self[3] = sum[9].x2*mumurd2e*2.0*g3/3.0/MY_PIS-virial_self[3];
   }
 }
 
 
 void EwaldDisp::init_self_peratom()
 {
   if (!(vflag_atom || eflag_atom)) return;
 
   double g1 = g_ewald, g2 = g1*g1, g3 = g1*g2;
   const double qscale = force->qqrd2e * scale;
   double *energy = energy_self_peratom[0];
   double *virial = virial_self_peratom[0];
   int nlocal = atom->nlocal;
 
   memset(energy, 0, EWALD_NFUNCS*nlocal*sizeof(double));
   memset(virial, 0, EWALD_NFUNCS*nlocal*sizeof(double));
 
   if (function[0]) {                                        // 1/r
     double *ei = energy;
     double *vi = virial;
     double ce = qscale*g1/MY_PIS;
     double cv = -0.5*MY_PI*qscale/(g2*volume);
     double *qi = atom->q, *qn = qi + nlocal;
     for (; qi < qn; qi++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) {
       double q = *qi;
       *vi = cv*q*sum[0].x;
       *ei = ce*q*q-vi[0];
     }
   }
   if (function[1]) {                                        // geometric 1/r^6
     double *ei = energy+1;
     double *vi = virial+1;
     double ce = -g3*g3/12.0;
     double cv = MY_PI*MY_PIS*g3/(6.0*volume);
     int *typei = atom->type, *typen = typei + atom->nlocal;
     for (; typei < typen; typei++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) {
       double b = B[*typei];
       *vi = cv*b*sum[1].x;
       *ei = ce*b*b+vi[0];
     }
   }
   if (function[2]) {                                        // arithmetic 1/r^6
     double *bi;
     double *ei = energy+2;
     double *vi = virial+2;
     double ce = -g3*g3/3.0;
     double cv = 0.5*MY_PI*MY_PIS*g3/(48.0*volume);
     int *typei = atom->type, *typen = typei + atom->nlocal;
     for (; typei < typen; typei++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) {
       bi = B+7*typei[0]+7;
       for (int k=2; k<9; ++k) *vi += cv*sum[k].x*(--bi)[0];
 
       /* PJV 20120225:
          should this be this instead?  above implies an inverse dependence
          seems to be the above way in original;  i recall having tested
          arithmetic mixing in the conception phase, but an extra test would
          be prudent (pattern repeats in multiple functions below)
 
       bi = B+7*typei[0];
       for (int k=2; k<9; ++k) *vi += cv*sum[k].x*(bi++)[0];
 
       */
 
       *ei = ce*bi[0]*bi[6]+vi[0];
     }
   }
   if (function[3]&&atom->mu) {                                // dipole
     double *ei = energy+3;
     double *vi = virial+3;
     double *imu = atom->mu[0], *nmu = imu+4*atom->nlocal;
     double ce = mumurd2e*2.0*g3/3.0/MY_PIS;
     for (; imu < nmu; imu += 4, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) {
       *vi = 0;                                                // in surface
       *ei = ce*imu[3]*imu[3]-vi[0];
     }
   }
 }
 
 
 /* ----------------------------------------------------------------------
    compute the EwaldDisp long-range force, energy, virial
 ------------------------------------------------------------------------- */
 
 void EwaldDisp::compute(int eflag, int vflag)
 {
   if (!nbox) return;
 
   // set energy/virial flags
   // invoke allocate_peratom() if needed for first time
 
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = eflag_global = vflag_global = eflag_atom = vflag_atom = 0;
 
   if (!peratom_allocate_flag && (eflag_atom || vflag_atom)) {
       allocate_peratom();
       peratom_allocate_flag = 1;
       nmax = atom->nmax;
   }
 
   reallocate_atoms();
   init_self_peratom();
   compute_ek();
   compute_force();
   compute_surface();
   compute_energy();
   compute_energy_peratom();
   compute_virial();
   compute_virial_peratom();
 }
 
 
 void EwaldDisp::compute_ek()
 {
   cvector *ekr = ekr_local;
   int lbytes = (2*nbox+1)*sizeof(cvector);
   hvector *h = NULL;
   kvector *k, *nk = kvec+nkvec;
   cvector *z = new cvector[2*nbox+1];
   cvector z1, *zx, *zy, *zz, *zn = z+2*nbox;
   complex *cek, zxyz, zxy = COMPLEX_NULL, cx = COMPLEX_NULL;
   vector mui;
   double *x = atom->x[0], *xn = x+3*atom->nlocal, *q = atom->q, qi = 0.0;
   double bi = 0.0, ci[7];
   double *mu = atom->mu ? atom->mu[0] : NULL;
   int i, kx, ky, n = nkvec*nsums, *type = atom->type, tri = domain->triclinic;
   int func[EWALD_NFUNCS];
 
   memcpy(func, function, EWALD_NFUNCS*sizeof(int));
   memset(cek_local, 0, n*sizeof(complex));                // reset sums
   while (x<xn) {
     zx = (zy = (zz = z+nbox)+1)-2;
     C_SET(zz->x, 1, 0); C_SET(zz->y, 1, 0); C_SET(zz->z, 1, 0);        // z[0]
     if (tri) {                                                // triclinic z[1]
       C_ANGLE(z1.x, unit[0]*x[0]+unit[5]*x[1]+unit[4]*x[2]);
       C_ANGLE(z1.y, unit[1]*x[1]+unit[3]*x[2]);
       C_ANGLE(z1.z, x[2]*unit[2]); x += 3;
     }
     else {                                                // orthogonal z[1]
       C_ANGLE(z1.x, *(x++)*unit[0]);
       C_ANGLE(z1.y, *(x++)*unit[1]);
       C_ANGLE(z1.z, *(x++)*unit[2]);
     }
     for (; zz<zn; --zx, ++zy, ++zz) {                  // set up z[k]=e^(ik.r)
       C_RMULT(zy->x, zz->x, z1.x);                        // 3D k-vector
       C_RMULT(zy->y, zz->y, z1.y); C_CONJ(zx->y, zy->y);
       C_RMULT(zy->z, zz->z, z1.z); C_CONJ(zx->z, zy->z);
     }
     kx = ky = -1;
     cek = cek_local;
     if (func[0]) qi = *(q++);
     if (func[1]) bi = B[*type];
     if (func[2]) memcpy(ci, B+7*type[0], 7*sizeof(double));
     if (func[3]) {
       memcpy(mui, mu, sizeof(vector));
       vec_scalar_mult(mui, mu[3]);
       mu += 4;
       h = hvec;
     }
     for (k=kvec; k<nk; ++k) {                                // compute rho(k)
       if (ky!=k->y) {                                   // based on order in
         if (kx!=k->x) cx = z[kx = k->x].x;                // reallocate
         C_RMULT(zxy, z[ky = k->y].y, cx);
       }
       C_RMULT(zxyz, z[k->z].z, zxy);
       if (func[0]) {
                cek->re += zxyz.re*qi; (cek++)->im += zxyz.im*qi;
       }
       if (func[1]) {
                cek->re += zxyz.re*bi; (cek++)->im += zxyz.im*bi;
       }
       if (func[2]) for (i=0; i<7; ++i) {
         cek->re += zxyz.re*ci[i]; (cek++)->im += zxyz.im*ci[i];
       }
       if (func[3]) {
         register double muk = mui[0]*h->x+mui[1]*h->y+mui[2]*h->z; ++h;
         cek->re += zxyz.re*muk; (cek++)->im += zxyz.im*muk;
       }
     }
     ekr = (cvector *) ((char *) memcpy(ekr, z, lbytes)+lbytes);
     ++type;
   }
   MPI_Allreduce(cek_local, cek_global, 2*n, MPI_DOUBLE, MPI_SUM, world);
 
   delete [] z;
 }
 
 
 void EwaldDisp::compute_force()
 {
   kvector *k;
   hvector *h, *nh;
   cvector *z = ekr_local;
   vector sum[EWALD_MAX_NSUMS], mui = COMPLEX_NULL;
   complex *cek, zc, zx = COMPLEX_NULL, zxy = COMPLEX_NULL;
   double *f = atom->f[0], *fn = f+3*atom->nlocal, *q = atom->q, *t = NULL;
   double *mu = atom->mu ? atom->mu[0] : NULL;
   const double qscale = force->qqrd2e * scale;
   double *ke, c[EWALD_NFUNCS] = {
     8.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(12.0*volume),
     2.0*MY_PI*MY_PIS/(192.0*volume), 8.0*MY_PI*mumurd2e/volume};
   double kt = 4.0*cube(g_ewald)/3.0/MY_PIS/c[3];
   int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type;
   int func[EWALD_NFUNCS];
 
   if (atom->torque) t = atom->torque[0];
   memcpy(func, function, EWALD_NFUNCS*sizeof(int));
   memset(sum, 0, EWALD_MAX_NSUMS*sizeof(vector));        // fj = -dE/dr =
   for (; f<fn; f+=3) {                                    //      -i*qj*fac*
     k = kvec;                                         //       Sum[conj(d)-d]
     kx = ky = -1;                                        // d = k*conj(ekj)*ek
     ke = kenergy;
     cek = cek_global;
     memset(sum, 0, EWALD_MAX_NSUMS*sizeof(vector));
     if (func[3]) {
       register double di = mu[3] * c[3];
       mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0];
       mu++;
     }
     for (nh = (h = hvec)+nkvec; h<nh; ++h, ++k) {
       if (ky!=k->y) {                                   // based on order in
         if (kx!=k->x) zx = z[kx = k->x].x;                 // reallocate
         C_RMULT(zxy, z[ky = k->y].y, zx);
       }
       C_CRMULT(zc, z[k->z].z, zxy);
       if (func[0]) {                                        // 1/r
         register double im = *(ke++)*(zc.im*cek->re+cek->im*zc.re); ++cek;
         sum[0][0] += h->x*im; sum[0][1] += h->y*im; sum[0][2] += h->z*im;
       }
       if (func[1]) {                                        // geometric 1/r^6
         register double im = *(ke++)*(zc.im*cek->re+cek->im*zc.re); ++cek;
         sum[1][0] += h->x*im; sum[1][1] += h->y*im; sum[1][2] += h->z*im;
       }
       if (func[2]) {                                        // arithmetic 1/r^6
         register double im, c = *(ke++);
         for (i=2; i<9; ++i) {
           im = c*(zc.im*cek->re+cek->im*zc.re); ++cek;
           sum[i][0] += h->x*im; sum[i][1] += h->y*im; sum[i][2] += h->z*im;
         }
       }
       if (func[3]) {                                        // dipole
         register double im = *(ke++)*(zc.im*cek->re+
             cek->im*zc.re)*(mui[0]*h->x+mui[1]*h->y+mui[2]*h->z); ++cek;
         sum[9][0] += h->x*im; sum[9][1] += h->y*im; sum[9][2] += h->z*im;
       }
     }
     if (func[0]) {                                        // 1/r
       register double qi = *(q++)*c[0];
       f[0] -= sum[0][0]*qi; f[1] -= sum[0][1]*qi; f[2] -= sum[0][2]*qi;
     }
     if (func[1]) {                                        // geometric 1/r^6
       register double bi = B[*type]*c[1];
       f[0] -= sum[1][0]*bi; f[1] -= sum[1][1]*bi; f[2] -= sum[1][2]*bi;
     }
     if (func[2]) {                                        // arithmetic 1/r^6
       register double *bi = B+7*type[0]+7;
       for (i=2; i<9; ++i) {
         register double c2 = (--bi)[0]*c[2];
         f[0] -= sum[i][0]*c2; f[1] -= sum[i][1]*c2; f[2] -= sum[i][2]*c2;
       }
     }
     if (func[3]) {                                        // dipole
       f[0] -= sum[9][0]; f[1] -= sum[9][1]; f[2] -= sum[9][2];
       *(t++) -= mui[1]*sum[0][2]+mui[2]*sum[0][1]-mui[0]*kt;        // torque
       *(t++) -= mui[2]*sum[0][0]+mui[0]*sum[0][2]-mui[1]*kt;
       *(t++) -= mui[0]*sum[0][1]+mui[1]*sum[0][0]-mui[2]*kt;
     }
     z = (cvector *) ((char *) z+lbytes);
     ++type;
   }
 }
 
 
 void EwaldDisp::compute_surface()
 {
   if (!function[3]) return;
   if (!atom->mu) return;
 
   vector sum_local = VECTOR_NULL, sum_total;
   memset(sum_local, 0, sizeof(vector));
   double *i, *n, *mu = atom->mu[0];
 
   for (n = (i = mu) + 4*atom->nlocal; i < n; ++i) {
     register double di = i[3];
     sum_local[0] += di*(i++)[0];
     sum_local[1] += di*(i++)[0];
     sum_local[2] += di*(i++)[0];
   }
   MPI_Allreduce(sum_local, sum_total, 3, MPI_DOUBLE, MPI_SUM, world);
 
   energy_self[3] += virial_self[3];
   virial_self[3] =
     mumurd2e*(2.0*MY_PI*vec_dot(sum_total,sum_total)/(2.0*dielectric+1)/volume);
   energy_self[3] -= virial_self[3];
 
   if (!(vflag_atom || eflag_atom)) return;
 
   double *ei = energy_self_peratom[0]+3;
   double *vi = virial_self_peratom[0]+3;
   double cv = 2.0*mumurd2e*MY_PI/(2.0*dielectric+1)/volume;
 
   for (i = mu; i < n; i += 4, ei += EWALD_NFUNCS, vi += EWALD_NFUNCS) {
     *ei += *vi;
     *vi = cv*i[3]*(i[0]*sum_total[0]+i[1]*sum_total[1]+i[2]*sum_total[2]);
     *ei -= *vi;
   }
 }
 
 
 void EwaldDisp::compute_energy()
 {
   energy = 0.0;
   if (!eflag_global) return;
 
   complex *cek = cek_global;
   double *ke = kenergy;
   const double qscale = force->qqrd2e * scale;
   double c[EWALD_NFUNCS] = {
     4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume),
     2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume};
   double sum[EWALD_NFUNCS];
   int func[EWALD_NFUNCS];
 
   memcpy(func, function, EWALD_NFUNCS*sizeof(int));
   memset(sum, 0, EWALD_NFUNCS*sizeof(double));                // reset sums
   for (int k=0; k<nkvec; ++k) {                       // sum over k vectors
     if (func[0]) {                                        // 1/r
       sum[0] += *(ke++)*(cek->re*cek->re+cek->im*cek->im); ++cek; }
     if (func[1]) {                                        // geometric 1/r^6
       sum[1] += *(ke++)*(cek->re*cek->re+cek->im*cek->im); ++cek; }
     if (func[2]) {                                        // arithmetic 1/r^6
       register double r =
             (cek[0].re*cek[6].re+cek[0].im*cek[6].im)+
             (cek[1].re*cek[5].re+cek[1].im*cek[5].im)+
             (cek[2].re*cek[4].re+cek[2].im*cek[4].im)+
         0.5*(cek[3].re*cek[3].re+cek[3].im*cek[3].im); cek += 7;
       sum[2] += *(ke++)*r;
     }
     if (func[3]) {                                        // dipole
       sum[3] += *(ke++)*(cek->re*cek->re+cek->im*cek->im); ++cek; }
   }
   for (int k=0; k<EWALD_NFUNCS; ++k) energy += c[k]*sum[k]-energy_self[k];
   if (slabflag) compute_slabcorr();
 }
 
 
 void EwaldDisp::compute_energy_peratom()
 {
   if (!eflag_atom) return;
 
   kvector *k;
   hvector *h, *nh;
   cvector *z = ekr_local;
   vector  mui = VECTOR_NULL;
   double sum[EWALD_MAX_NSUMS];
   complex *cek, zc = COMPLEX_NULL, zx = COMPLEX_NULL, zxy = COMPLEX_NULL;
   double *q = atom->q;
   double *eatomj = eatom;
   double *mu = atom->mu ? atom->mu[0] : NULL;
   const double qscale = force->qqrd2e * scale;
   double *ke = kenergy;
   double c[EWALD_NFUNCS] = {
       4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume),
       2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume};
   int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type;
   int func[EWALD_NFUNCS];
 
   memcpy(func, function, EWALD_NFUNCS*sizeof(int));
   for (int j = 0; j < atom->nlocal; j++, ++eatomj) {
     k = kvec;
     kx = ky = -1;
     ke = kenergy;
     cek = cek_global;
     memset(sum, 0, EWALD_MAX_NSUMS*sizeof(double));
     if (func[3]) {
       register double di = mu[3] * c[3];
       mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0];
       mu++;
     }
     for (nh = (h = hvec)+nkvec; h<nh; ++h, ++k) {
       if (ky!=k->y) {                              // based on order in
         if (kx!=k->x) zx = z[kx = k->x].x;                 // reallocate
         C_RMULT(zxy, z[ky = k->y].y, zx);
       }
       C_CRMULT(zc, z[k->z].z, zxy);
       if (func[0]) {                                        // 1/r
         sum[0] += *(ke++)*(cek->re*zc.re - cek->im*zc.im); ++cek; }
       if (func[1]) {                                        // geometric 1/r^6
         sum[1] += *(ke++)*(cek->re*zc.re - cek->im*zc.im); ++cek; }
       if (func[2]) {                                        // arithmetic 1/r^6
         register double im, c = *(ke++);
         for (i=2; i<9; ++i) {
           im = c*(cek->re*zc.re - cek->im*zc.im); ++cek;
           sum[i] += im;
         }
       }
       if (func[3]) {                                        // dipole
         sum[9] += *(ke++)*(cek->re*zc.re +
                   cek->im*zc.im)*(mui[0]*h->x+mui[1]*h->y+mui[2]*h->z); ++cek;
       }
     }
 
     if (func[0]) {                                        // 1/r
       register double qj = *(q++)*c[0];
       *eatomj += sum[0]*qj - energy_self_peratom[j][0];
     }
     if (func[1]) {                                        // geometric 1/r^6
       register double bj = B[*type]*c[1];
       *eatomj += sum[1]*bj - energy_self_peratom[j][1];
     }
     if (func[2]) {                                        // arithmetic 1/r^6
       register double *bj = B+7*type[0]+7;
       for (i=2; i<9; ++i) {
         register double c2 = (--bj)[0]*c[2];
         *eatomj += 0.5*sum[i]*c2;
       }
       *eatomj -= energy_self_peratom[j][2];
     }
     if (func[3]) {                                        // dipole
       *eatomj += sum[9] - energy_self_peratom[j][3];
     }
     z = (cvector *) ((char *) z+lbytes);
     ++type;
   }
 }
 
 
 #define swap(a, b) { register double t = a; a= b; b = t; }
 
 void EwaldDisp::compute_virial()
 {
   memset(virial, 0, sizeof(shape));
   if (!vflag_global) return;
 
   complex *cek = cek_global;
   double *kv = kvirial;
   const double qscale = force->qqrd2e * scale;
   double c[EWALD_NFUNCS] = {
     4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume),
     2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume};
   shape sum[EWALD_NFUNCS];
   int func[EWALD_NFUNCS];
 
   memcpy(func, function, EWALD_NFUNCS*sizeof(int));
   memset(sum, 0, EWALD_NFUNCS*sizeof(shape));
   for (int k=0; k<nkvec; ++k) {                      // sum over k vectors
     if (func[0]) {                                         // 1/r
       register double r = cek->re*cek->re+cek->im*cek->im; ++cek;
       sum[0][0] += *(kv++)*r; sum[0][1] += *(kv++)*r; sum[0][2] += *(kv++)*r;
       sum[0][3] += *(kv++)*r; sum[0][4] += *(kv++)*r; sum[0][5] += *(kv++)*r;
     }
     if (func[1]) {                                        // geometric 1/r^6
       register double r = cek->re*cek->re+cek->im*cek->im; ++cek;
       sum[1][0] += *(kv++)*r; sum[1][1] += *(kv++)*r; sum[1][2] += *(kv++)*r;
       sum[1][3] += *(kv++)*r; sum[1][4] += *(kv++)*r; sum[1][5] += *(kv++)*r;
     }
     if (func[2]) {                                        // arithmetic 1/r^6
       register double r =
             (cek[0].re*cek[6].re+cek[0].im*cek[6].im)+
             (cek[1].re*cek[5].re+cek[1].im*cek[5].im)+
             (cek[2].re*cek[4].re+cek[2].im*cek[4].im)+
         0.5*(cek[3].re*cek[3].re+cek[3].im*cek[3].im); cek += 7;
       sum[2][0] += *(kv++)*r; sum[2][1] += *(kv++)*r; sum[2][2] += *(kv++)*r;
       sum[2][3] += *(kv++)*r; sum[2][4] += *(kv++)*r; sum[2][5] += *(kv++)*r;
     }
     if (func[3]) {
       register double r = cek->re*cek->re+cek->im*cek->im; ++cek;
       sum[3][0] += *(kv++)*r; sum[3][1] += *(kv++)*r; sum[3][2] += *(kv++)*r;
       sum[3][3] += *(kv++)*r; sum[3][4] += *(kv++)*r; sum[3][5] += *(kv++)*r;
     }
   }
   for (int k=0; k<EWALD_NFUNCS; ++k)
     if (func[k]) {
       shape self = {virial_self[k], virial_self[k], virial_self[k], 0, 0, 0};
       shape_scalar_mult(sum[k], c[k]);
       shape_add(virial, sum[k]);
       shape_subtr(virial, self);
     }
 }
 
 void EwaldDisp::compute_virial_peratom()
 {
   if (!vflag_atom) return;
 
   kvector *k;
   hvector *h, *nh;
   cvector *z = ekr_local;
   vector  mui = VECTOR_NULL;
   complex *cek, zc = COMPLEX_NULL, zx = COMPLEX_NULL, zxy = COMPLEX_NULL;
   double *kv;
   double *q = atom->q;
   double *vatomj = vatom[0];
   double *mu = atom->mu ? atom->mu[0] : NULL;
   const double qscale = force->qqrd2e * scale;
   double c[EWALD_NFUNCS] = {
     4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume),
     2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume};
   shape sum[EWALD_MAX_NSUMS];
   int func[EWALD_NFUNCS];
 
   memcpy(func, function, EWALD_NFUNCS*sizeof(int));
   int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type;
   for (int j = 0; j < atom->nlocal; j++, vatomj += 6) {
     k = kvec;
     kx = ky = -1;
     kv = kvirial;
     cek = cek_global;
     memset(sum, 0, EWALD_MAX_NSUMS*sizeof(shape));
     if (func[3]) {
       register double di = mu[3] * c[3];
       mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[1]; mui[2] = di*(mu++)[2];
       mu++;
     }
     for (nh = (h = hvec)+nkvec; h<nh; ++h, ++k) {
       if (ky!=k->y) {                                // based on order in
           if (kx!=k->x) zx = z[kx = k->x].x;                 // reallocate
           C_RMULT(zxy, z[ky = k->y].y, zx);
       }
       C_CRMULT(zc, z[k->z].z, zxy);
       if (func[0]) {                                        // 1/r
           register double r = cek->re*zc.re - cek->im*zc.im; ++cek;
           sum[0][0] += *(kv++)*r;
           sum[0][1] += *(kv++)*r;
           sum[0][2] += *(kv++)*r;
           sum[0][3] += *(kv++)*r;
           sum[0][4] += *(kv++)*r;
           sum[0][5] += *(kv++)*r;
       }
       if (func[1]) {                                        // geometric 1/r^6
           register double r = cek->re*zc.re - cek->im*zc.im; ++cek;
           sum[1][0] += *(kv++)*r;
           sum[1][1] += *(kv++)*r;
           sum[1][2] += *(kv++)*r;
           sum[1][3] += *(kv++)*r;
           sum[1][4] += *(kv++)*r;
           sum[1][5] += *(kv++)*r;
       }
       if (func[2]) {                                        // arithmetic 1/r^6
         register double r;
         for (i=2; i<9; ++i) {
           r = cek->re*zc.re - cek->im*zc.im; ++cek;
           sum[i][0] += *(kv++)*r;
           sum[i][1] += *(kv++)*r;
           sum[i][2] += *(kv++)*r;
           sum[i][3] += *(kv++)*r;
           sum[i][4] += *(kv++)*r;
           sum[i][5] += *(kv++)*r;
       kv -= 6;
         }
     kv += 6;
       }
       if (func[3]) {                                        // dipole
          register double
            r = (cek->re*zc.re - cek->im*zc.im)
               *(mui[0]*h->x+mui[1]*h->y+mui[2]*h->z); ++cek;
          sum[9][0] += *(kv++)*r;
          sum[9][1] += *(kv++)*r;
          sum[9][2] += *(kv++)*r;
          sum[9][3] += *(kv++)*r;
          sum[9][4] += *(kv++)*r;
          sum[9][5] += *(kv++)*r;
       }
     }
 
     if (func[0]) {                                        // 1/r
       register double qi = *(q++)*c[0];
       for (int n = 0; n < 6; n++) vatomj[n] += sum[0][n]*qi;
     }
     if (func[1]) {                                        // geometric 1/r^6
       register double bi = B[*type]*c[1];
       for (int n = 0; n < 6; n++) vatomj[n] += sum[1][n]*bi;
     }
     if (func[2]) {                                        // arithmetic 1/r^6
       register double *bj = B+7*type[0]+7;
       for (i=2; i<9; ++i) {
         register double c2 = (--bj)[0]*c[2];
         for (int n = 0; n < 6; n++) vatomj[n] += 0.5*sum[i][n]*c2;
       }
     }
     if (func[3]) {                                        // dipole
       for (int n = 0; n < 6; n++) vatomj[n] += sum[9][n];
     }
 
     for (int k=0; k<EWALD_NFUNCS; ++k) {
       if (func[k]) {
         for (int n = 0; n < 3; n++) vatomj[n] -= virial_self_peratom[j][k];
       }
     }
 
     z = (cvector *) ((char *) z+lbytes);
     ++type;
   }
 }
 
 
 /* ----------------------------------------------------------------------
    Slab-geometry correction term to dampen inter-slab interactions between
    periodically repeating slabs.  Yields good approximation to 2-D EwaldDisp if
    adequate empty space is left between repeating slabs (J. Chem. Phys.
    111, 3155).  Slabs defined here to be parallel to the xy plane.
 ------------------------------------------------------------------------- */
 
 void EwaldDisp::compute_slabcorr()
 {
   // compute local contribution to global dipole moment
 
   double *q = atom->q;
   double **x = atom->x;
   int nlocal = atom->nlocal;
 
   double dipole = 0.0;
   for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];
 
   // sum local contributions to get global dipole moment
 
   double dipole_all;
   MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
 
   // compute corrections
 
   const double e_slabcorr = 2.0*MY_PI*dipole_all*dipole_all/volume;
   const double qscale = force->qqrd2e * scale;
 
   if (eflag_global) energy += qscale * e_slabcorr;
 
   // per-atom energy
 
   if (eflag_atom) {
     double efact = 2.0*MY_PI*dipole_all/volume;
     for (int i = 0; i < nlocal; i++) eatom[i] += qscale * q[i]*x[i][2]*efact;
   }
 
   // add on force corrections
 
   double ffact = -4.0*MY_PI*dipole_all/volume;
   double **f = atom->f;
 
   for (int i = 0; i < nlocal; i++) f[i][2] += qscale * q[i]*ffact;
 }
 
 /* ----------------------------------------------------------------------
   Newton solver used to find g_ewald for LJ systems
  ------------------------------------------------------------------------- */
 
 double EwaldDisp::NewtonSolve(double x, double Rc, 
                               bigint natoms, double vol, double b2)
 {
   double dx,tol;
   int maxit;
 
   maxit = 10000; //Maximum number of iterations
   tol = 0.00001; //Convergence tolerance
 
   //Begin algorithm
 
   for (int i = 0; i < maxit; i++) {
     dx = f(x,Rc,natoms,vol,b2) / derivf(x,Rc,natoms,vol,b2);
     x = x - dx; //Update x
     if (fabs(dx) < tol) return x;
   }
   return -1;
 }
 
 /* ----------------------------------------------------------------------
  Calculate f(x)
  ------------------------------------------------------------------------- */
 
 double EwaldDisp::f(double x, double Rc, bigint natoms, double vol, double b2)
 {
   double a = Rc*x;
   double f = (4.0*MY_PI*b2*powint(x,4)/vol/sqrt((double)natoms)*erfc(a) *
     (6.0*powint(a,-5) + 6.0*powint(a,-3) + 3.0/a + a) - accuracy);
   return f;
 }
 
 /* ----------------------------------------------------------------------
  Calculate numerical derivative f'(x)
  ------------------------------------------------------------------------- */
 
 double EwaldDisp::derivf(double x, double Rc, 
                          bigint natoms, double vol, double b2)
 {
   double h = 0.000001;  //Derivative step-size
   return (f(x + h,Rc,natoms,vol,b2) - f(x,Rc,natoms,vol,b2)) / h;
 }
diff --git a/src/KSPACE/ewald_disp.h b/src/KSPACE/ewald_disp.h
index 0c102da5c..b076abb16 100644
--- a/src/KSPACE/ewald_disp.h
+++ b/src/KSPACE/ewald_disp.h
@@ -1,154 +1,160 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef KSPACE_CLASS
 
 KSpaceStyle(ewald/disp,EwaldDisp)
 
 #else
 
 #ifndef LMP_EWALD_DISP_H
 #define LMP_EWALD_DISP_H
 
 #include "kspace.h"
 #include "math_complex.h"
 
 namespace LAMMPS_NS {
 
 #define EWALD_NORDER        6
 #define EWALD_NFUNCS        4
 #define EWALD_MAX_NSUMS     10
 #define EWALD_NSUMS        {1, 1, 7, 1}
 
 typedef struct cvector { complex x, y, z; } cvector;
 typedef struct hvector { double x, y, z; } hvector;
 typedef struct kvector { long x, y, z; } kvector;
 
 class EwaldDisp : public KSpace {
  public:
   EwaldDisp(class LAMMPS *, int, char **);
   ~EwaldDisp();
   void init();
   void setup();
   void compute(int, int);
   double memory_usage() {return bytes;}
 
  private:
   double unit[6];
   int function[EWALD_NFUNCS], first_output;
 
   int nkvec, nkvec_max, nevec, nevec_max,
       nbox, nfunctions, nsums, sums;
   int peratom_allocate_flag;
   int nmax;
   double bytes;
   double gsqmx,q2,b2;
   double *kenergy, energy_self[EWALD_NFUNCS];
   double *kvirial, virial_self[EWALD_NFUNCS];
   double **energy_self_peratom;
   double **virial_self_peratom;
   cvector *ekr_local;
   hvector *hvec;
   kvector *kvec;
 
   double mumurd2e, dielectric, *B, volume;
   struct Sum { double x, x2; } sum[EWALD_MAX_NSUMS];
   complex *cek_local, *cek_global;
 
   double rms(int, double, bigint, double, double);
   void reallocate();
   void allocate_peratom();
   void reallocate_atoms();
   void deallocate();
   void deallocate_peratom();
   void coefficients();
   void init_coeffs();
   void init_coeff_sums();
   void init_self();
   void init_self_peratom();
   void compute_ek();
   void compute_force();
   void compute_surface();
   void compute_energy();
   void compute_energy_peratom();
   void compute_virial();
   void compute_virial_peratom();
   void compute_slabcorr();
   double NewtonSolve(double, double, bigint, double, double);
   double f(double, double, bigint, double, double);
   double derivf(double, double, bigint, double, double);
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Cannot use EwaldDisp with 2d simulation
 
 This is a current restriction of this command.
 
 E: Cannot use nonperiodic boundaries with EwaldDisp
 
-UNDOCUMENTED
+For kspace style ewald/disp, all 3 dimensions must have periodic
+boundaries unless you use the kspace_modify command to define a 2d
+slab with a non-periodic z dimension.
 
 E: Incorrect boundaries with slab EwaldDisp
 
-UNDOCUMENTED
+Must have periodic x,y dimensions and non-periodic z dimension to use
+2d slab option with Ewald.
 
 E: KSpace style is incompatible with Pair style
 
-UNDOCUMENTED
+Setting a kspace style requires that a pair style with a long-range
+Coulombic and Dispersion component be selected.
 
 E: Unsupported mixing rule in kspace_style ewald/disp
 
-UNDOCUMENTED
+Only geometric mixing is supported.
 
 E: Unsupported order in kspace_style ewald/disp
 
-UNDOCUMENTED
+Only 1/r^6 dispersion terms are supported.
 
 E: Cannot use Ewald/disp solver on system with no charge or LJ particles
 
-UNDOCUMENTED
+No atoms in system have a non-zero charge or are LJ particles.  Change
+charges or change options of the kspace solver/pair style.
 
 W: System is not charge neutral, net charge = %g
 
 The total charge on all atoms on the system is not 0.0, which
 is not valid for Ewald or PPPM.
 
 E: KSpace accuracy too large to estimate G vector
 
-UNDOCUMENTED
+Reduce the accuracy request or specify gwald explicitly
+via the kspace_modify command.
 
 W: Ewald/disp Newton solver failed, using old method to estimate g_ewald
 
-UNDOCUMENTED
+Self-explanatory.
 
 E: KSpace accuracy too low
 
-UNDOCUMENTED
+Requested accuracy must be less than 1.0.
 
-E: epsilon or sigma reference not set by pair style in ewald/n
+E: Epsilon or sigma reference not set by pair style in ewald/n
 
-UNDOCUMENTED
+The pair style is not providing the needed epsilon or sigma values.
 
 */
diff --git a/src/KSPACE/msm.cpp b/src/KSPACE/msm.cpp
index c8076e2a1..2c32c50dd 100644
--- a/src/KSPACE/msm.cpp
+++ b/src/KSPACE/msm.cpp
@@ -1,2433 +1,2433 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Paul Crozier, Stan Moore, Stephen Bond, (all SNL)
 ------------------------------------------------------------------------- */
 
 #include "lmptype.h"
 #include "mpi.h"
 #include "string.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "math.h"
 #include "msm.h"
 #include "atom.h"
 #include "comm.h"
 #include "commgrid.h"
 #include "neighbor.h"
 #include "force.h"
 #include "pair.h"
 #include "bond.h"
 #include "angle.h"
 #include "domain.h"
 #include "memory.h"
 #include "error.h"
 
 #include "math_const.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 #define MAX_LEVELS 10
 #define OFFSET 16384
 #define SMALL 0.00001
 #define LARGE 10000.0
 
 enum{REVERSE_RHO,REVERSE_AD,REVERSE_AD_PERATOM};
 enum{FORWARD_RHO,FORWARD_AD,FORWARD_AD_PERATOM};
 /* ---------------------------------------------------------------------- */
 
 MSM::MSM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
 {
   if (narg < 1) error->all(FLERR,"Illegal kspace_style msm command");
 
   msmflag = 1;
 
   accuracy_relative = atof(arg[0]);
 
   nfactors = 1;
   factors = new int[nfactors];
   factors[0] = 2;
 
   MPI_Comm_rank(world,&me);
   MPI_Comm_size(world,&nprocs);
 
   phi1d = dphi1d = NULL;
 
   nmax = 0;
   part2grid = NULL;
 
   g_direct = NULL;
   g_direct_top = NULL;
 
   v0_direct = v1_direct = v2_direct = NULL;
   v3_direct = v4_direct = v5_direct = NULL;
 
   v0_direct_top = v1_direct_top = v2_direct_top = NULL;
   v3_direct_top = v4_direct_top = v5_direct_top = NULL;
 
   cg = cg_peratom = NULL;
 
   levels = 0;
 
   peratom_allocate_flag = 0;
 
   order = 8;
 }
 
 /* ----------------------------------------------------------------------
    free all memory
 ------------------------------------------------------------------------- */
 
 MSM::~MSM()
 {
   delete [] factors;
   deallocate();
   deallocate_peratom();
   memory->destroy(part2grid);
   memory->destroy(g_direct);
   memory->destroy(g_direct_top);
   memory->destroy(v0_direct);
   memory->destroy(v1_direct);
   memory->destroy(v2_direct);
   memory->destroy(v3_direct);
   memory->destroy(v4_direct);
   memory->destroy(v5_direct);
   memory->destroy(v0_direct_top);
   memory->destroy(v1_direct_top);
   memory->destroy(v2_direct_top);
   memory->destroy(v3_direct_top);
   memory->destroy(v4_direct_top);
   memory->destroy(v5_direct_top);
   deallocate_levels();
 }
 
 /* ----------------------------------------------------------------------
    called once before run
 ------------------------------------------------------------------------- */
 
 void MSM::init()
 {
   if (me == 0) {
     if (screen) fprintf(screen,"MSM initialization ...\n");
     if (logfile) fprintf(logfile,"MSM initialization ...\n");
   }
 
   // error check
 
   if (domain->triclinic)
     error->all(FLERR,"Cannot (yet) use MSM with triclinic box");
 
   if (domain->dimension == 2)
     error->all(FLERR,"Cannot (yet) use MSM with 2d simulation");
 
   if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
 
   if (slabflag == 1)
       error->all(FLERR,"Cannot use slab correction with MSM");
 
   if (order < 4 || order > 10) {
     char str[128];
     sprintf(str,"MSM order must be 4, 6, 8, or 10");
     error->all(FLERR,str);
   }
 
   if (order%2 != 0) error->all(FLERR,"MSM order must be 4, 6, 8, or 10");
 
   if (sizeof(FFT_SCALAR) != 8) error->all(FLERR,"Cannot (yet) use single precision with MSM (remove -DFFT_SINGLE from Makefile and recompile)");
 
   // extract short-range Coulombic cutoff from pair style
 
   qqrd2e = force->qqrd2e;
   scale = 1.0;
 
   pair_check();
 
   int itmp;
   double *p_cutoff = (double *) force->pair->extract("cut_msm",itmp);
   if (p_cutoff == NULL)
     error->all(FLERR,"KSpace style is incompatible with Pair style");
   cutoff = *p_cutoff;
 
   // compute qsum & qsqsum and give error if not charge-neutral
 
   qsum = qsqsum = 0.0;
   for (int i = 0; i < atom->nlocal; i++) {
     qsum += atom->q[i];
     qsqsum += atom->q[i]*atom->q[i];
   }
 
   double tmp;
   MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
   qsum = tmp;
   MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
   qsqsum = tmp;
   q2 = qsqsum * force->qqrd2e / force->dielectric;
 
   if (qsqsum == 0.0)
     error->all(FLERR,"Cannot use kspace solver on system with no charge");
 
   // not yet sure of the correction needed for non-neutral systems
 
   if (fabs(qsum) > SMALL) {
     char str[128];
     sprintf(str,"System is not charge neutral, net charge = %g",qsum);
     error->all(FLERR,str);
   }
 
   // set accuracy (force units) from accuracy_relative or accuracy_absolute
 
   if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
   else accuracy = accuracy_relative * two_charge_force;
 
   // setup MSM grid resolution
 
   set_grid_global();
   setup();
 
   double estimated_error = estimate_total_error();
 
   // output grid stats
 
   int ngrid_max;
   MPI_Allreduce(&ngrid[0],&ngrid_max,1,MPI_INT,MPI_MAX,world);
 
   if (me == 0) {
     if (screen) {
       fprintf(screen,"  3d grid size/proc = %d\n",
                         ngrid_max);
       fprintf(screen,"  estimated absolute RMS force accuracy = %g\n",
               estimated_error);
       fprintf(screen,"  estimated relative force accuracy = %g\n",
               estimated_error/two_charge_force);
     }
     if (logfile) {
       fprintf(logfile,"  3d grid size/proc = %d\n",
                          ngrid_max);
       fprintf(logfile,"  estimated absolute RMS force accuracy = %g\n",
               estimated_error);
       fprintf(logfile,"  estimated relative force accuracy = %g\n",
               estimated_error/two_charge_force);
     }
   }
 
   if (me == 0) {
     if (screen) {
       fprintf(screen,"  grid = %d %d %d\n",nx_msm[0],ny_msm[0],nz_msm[0]);
       fprintf(screen,"  order = %d\n",order);
     }
     if (logfile) {
       fprintf(logfile,"  grid = %d %d %d\n",nx_msm[0],ny_msm[0],nz_msm[0]);
       fprintf(logfile,"  order = %d\n",order);
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    estimate cutoff for a given grid spacing and error
 ------------------------------------------------------------------------- */
 
 double MSM::estimate_cutoff(double h, double prd)
 {
   double a;
   int p = order - 1;
 
   double Mp,cprime,error_scaling;
   Mp = cprime = error_scaling = 1;
   // Mp values from Table 5.1 of Hardy's thesis
   // cprime values from equation 4.17 of Hardy's thesis
   // error scaling from empirical fitting to convert to rms force errors
   if (p == 3) {
     Mp = 9;
     cprime = 1.0/6.0;
     error_scaling = 0.39189561;
   } else if (p == 5) {
     Mp = 825;
     cprime = 1.0/30.0;
     error_scaling = 0.150829428;
   } else if (p == 7) {
     Mp = 130095;
     cprime = 1.0/140.0;
     error_scaling = 0.049632967;
   } else if (p == 9) {
     Mp = 34096545;
     cprime = 1.0/630.0;
     error_scaling = 0.013520855;
   } else {
     error->all(FLERR,"MSM order must be 4, 6, 8, or 10");
   }
 
   // equation 4.1 from Hardy's thesis
   double C_p = 4.0*cprime*Mp/3.0;
 
   // use empirical parameters to convert to rms force errors
   C_p *= error_scaling;
 
   // equation 3.200 from Hardy's thesis
 
   a = C_p*pow(h,(p-1))/accuracy;
 
   // include dependency of error on other terms
   a *= q2/(prd*sqrt(atom->natoms));
 
   a = pow(a,1.0/double(p));
 
   return a;
 }
 
 /* ----------------------------------------------------------------------
    estimate 1d grid RMS force error for MSM
 ------------------------------------------------------------------------- */
 
 double MSM::estimate_1d_error(double h, double prd)
 {
   double a = cutoff;
   int p = order - 1;
 
   double Mp,cprime,error_scaling;
   Mp = cprime = error_scaling = 1;
   // Mp values from Table 5.1 of Hardy's thesis
   // cprime values from equation 4.17 of Hardy's thesis
   // error scaling from empirical fitting to convert to rms force errors
   if (p == 3) {
     Mp = 9;
     cprime = 1.0/6.0;
     error_scaling = 0.39189561;
   } else if (p == 5) {
     Mp = 825;
     cprime = 1.0/30.0;
     error_scaling = 0.150829428;
   } else if (p == 7) {
     Mp = 130095;
     cprime = 1.0/140.0;
     error_scaling = 0.049632967;
   } else if (p == 9) {
     Mp = 34096545;
     cprime = 1.0/630.0;
     error_scaling = 0.013520855;
   } else {
     error->all(FLERR,"MSM order must be 4, 6, 8, or 10");
   }
 
   // equation 4.1 from Hardy's thesis
   double C_p = 4.0*cprime*Mp/3.0;
 
   // use empirical parameters to convert to rms force errors
   C_p *= error_scaling;
 
   // equation 3.197 from Hardy's thesis
   double error_1d = C_p*pow(h,(p-1))/pow(a,(p+1));
 
   // include dependency of error on other terms
   error_1d *= q2*a/(prd*sqrt(atom->natoms));
 
   return error_1d;
 }
 
 /* ----------------------------------------------------------------------
    estimate 3d grid RMS force error
 ------------------------------------------------------------------------- */
 
 double MSM::estimate_3d_error()
 {
   double xprd = domain->xprd;
   double yprd = domain->yprd;
   double zprd = domain->zprd;
   double error_x = estimate_1d_error(xprd/nx_msm[0],xprd);
   double error_y = estimate_1d_error(yprd/ny_msm[0],yprd);
   double error_z = estimate_1d_error(zprd/nz_msm[0],zprd);
   double error_3d =
    sqrt(error_x*error_x + error_y*error_y + error_z*error_z) / sqrt(3.0);
   return error_3d;
 }
 
 /* ----------------------------------------------------------------------
    estimate total RMS force error
 ------------------------------------------------------------------------- */
 
 double MSM::estimate_total_error()
 {
   double xprd = domain->xprd;
   double yprd = domain->yprd;
   double zprd = domain->zprd;
   bigint natoms = atom->natoms;
 
   double grid_error = estimate_3d_error();
   double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd);
   double short_range_error = 0.0;
   double table_error =
    estimate_table_accuracy(q2_over_sqrt,short_range_error);
   double estimated_total_error = sqrt(grid_error*grid_error +
    short_range_error*short_range_error + table_error*table_error);
 
   return estimated_total_error;
 }
 
 /* ----------------------------------------------------------------------
    adjust MSM coeffs, called initially and whenever volume has changed
 ------------------------------------------------------------------------- */
 
 void MSM::setup()
 {
   double *prd;
 
   double a = cutoff;
 
   // volume-dependent factors
 
   prd = domain->prd;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   volume = xprd * yprd * zprd;
 
   // loop over grid levels
 
   for (int n=0; n<levels; n++) {
 
     delxinv[n] = nx_msm[n]/xprd;
     delyinv[n] = ny_msm[n]/yprd;
     delzinv[n] = nz_msm[n]/zprd;
 
     delvolinv[n] = delxinv[n]*delyinv[n]*delzinv[n];
   }
 
   nxhi_direct = static_cast<int> (2.0*a*delxinv[0]);
   nxlo_direct = -nxhi_direct;
   nyhi_direct = static_cast<int> (2.0*a*delyinv[0]);
   nylo_direct = -nyhi_direct;
   nzhi_direct = static_cast<int> (2.0*a*delzinv[0]);
   nzlo_direct = -nzhi_direct;
 
   nmax_direct = 8*(nxhi_direct+1)*(nyhi_direct+1)*(nzhi_direct+1);
 
   deallocate();
   deallocate_peratom();
 
   if (!peratom_allocate_flag) { // Timestep 0
     get_g_direct();
     get_virial_direct();
     if (domain->nonperiodic) {
       get_g_direct_top(levels-1);
       get_virial_direct_top(levels-1);
     }
   } else {
     get_g_direct();
     if (domain->nonperiodic) get_g_direct_top(levels-1);
     if (vflag_either) {
       get_virial_direct();
       if (domain->nonperiodic) get_virial_direct_top(levels-1);
     }
   }
 
   boxlo = domain->boxlo;
 
   set_grid_local();
 
   // allocate K-space dependent memory
   // don't invoke allocate_peratom(), compute() will allocate when needed
 
   allocate();
   peratom_allocate_flag = 0;
 
   for (int n=0; n<levels; n++) {
     cg[n]->ghost_notify();
     cg[n]->setup();
   }
 
 }
 
 /* ----------------------------------------------------------------------
    compute the MSM long-range force, energy, virial
 ------------------------------------------------------------------------- */
 
 void MSM::compute(int eflag, int vflag)
 {
   int i,j;
 
   // set energy/virial flags
   // invoke allocate_peratom() if needed for first time
 
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = evflag_atom = eflag_global = vflag_global =
     eflag_atom = vflag_atom = eflag_either = vflag_either = 0;
 
   if (vflag_atom && !peratom_allocate_flag) {
     allocate_peratom();
     for (int n=0; n<levels; n++) {
       cg_peratom[n]->ghost_notify();
       cg_peratom[n]->setup();
     }
     peratom_allocate_flag = 1;
   }
 
   // extend size of per-atom arrays if necessary
 
   if (atom->nlocal > nmax) {
     memory->destroy(part2grid);
     nmax = atom->nmax;
     memory->create(part2grid,nmax,3,"msm:part2grid");
   }
 
   // find grid points for all my particles
   // map my particle charge onto my local 3d density grid (aninterpolation)
 
   particle_map();
   make_rho();
 
   current_level = 0;
   cg[0]->reverse_comm(this,REVERSE_RHO);
 
   // all procs communicate density values from their ghost cells
   //   to fully sum contribution in their 3d bricks
 
   for (int n=0; n<=levels-2; n++) {
     current_level = n;
     cg[n]->forward_comm(this,FORWARD_RHO);
 
     direct(n);
     restriction(n);
   }
   
   // top grid level
 
   current_level = levels-1;
   cg[levels-1]->forward_comm(this,FORWARD_RHO);
   direct_top(levels-1);
 
   for (int n=levels-2; n>=0; n--) {
 
     prolongation(n);
 
     current_level = n;
     cg[n]->reverse_comm(this,REVERSE_AD);
 
     // extra per-atom virial communication
 
     if (vflag_atom)
       cg_peratom[n]->reverse_comm(this,REVERSE_AD_PERATOM);
   }
 
   // all procs communicate E-field values
   // to fill ghost cells surrounding their 3d bricks
 
   current_level = 0;
 
   cg[0]->forward_comm(this,FORWARD_AD);
 
   // extra per-atom energy/virial communication
 
   if (vflag_atom)
     cg_peratom[0]->forward_comm(this,FORWARD_AD_PERATOM);
 
   // calculate the force on my particles (interpolation)
 
   fieldforce();
 
   // calculate the per-atom energy for my particles
 
   if (evflag_atom) fieldforce_peratom();
 
   const double qscale = force->qqrd2e * scale;
 
   // Total long-range energy
     
   if (eflag_global) {
     double energy_all;
     MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
     energy = energy_all;
 
     double e_self = qsqsum*gamma(0.0)/cutoff;  // Self-energy term
     energy -= e_self;
     energy *= 0.5*qscale;
   }
 
   // Total long-range virial
 
   if (vflag_global) {
     double virial_all[6];
     MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
     for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*virial_all[i];
   }
 
   // per-atom energy/virial
   // energy includes self-energy correction
 
   if (evflag_atom) {
     double *q = atom->q;
     int nlocal = atom->nlocal;
 
     if (eflag_atom) {
       for (i = 0; i < nlocal; i++) {
         eatom[i] -= q[i]*q[i]*gamma(0.0)/cutoff;
         eatom[i] *= 0.5*qscale;
       }
     }
 
     if (vflag_atom) {
       for (i = 0; i < nlocal; i++)
         for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale;
     }
   }
 
 }
 
 /* ----------------------------------------------------------------------
    allocate memory that depends on # of grid points
 ------------------------------------------------------------------------- */
 
 void MSM::allocate()
 {
   // summation coeffs
 
   memory->create2d_offset(phi1d,3,-order,order,"msm:phi1d");
   memory->create2d_offset(dphi1d,3,-order,order,"msm:dphi1d");
 
   // allocate grid levels
 
   for (int n=0; n<levels; n++) {
     memory->create3d_offset(qgrid[n],nzlo_out[n],nzhi_out[n],
             nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:qgrid");
 
     memory->create3d_offset(egrid[n],nzlo_out[n],nzhi_out[n],
             nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:egrid");
 
     // create ghost grid object for rho and electric field communication
 
     int (*procneigh)[2] = comm->procneigh;
 
     cg[n] = new CommGrid(lmp,world,1,1,
                       nxlo_in[n],nxhi_in[n],nylo_in[n],nyhi_in[n],nzlo_in[n],nzhi_in[n],
                       nxlo_out[n],nxhi_out[n],nylo_out[n],nyhi_out[n],nzlo_out[n],nzhi_out[n],
                       procneigh[0][0],procneigh[0][1],procneigh[1][0],
                       procneigh[1][1],procneigh[2][0],procneigh[2][1]);
   }
 }
 
 /* ----------------------------------------------------------------------
    allocate per-atom memory that depends on # of grid points
 ------------------------------------------------------------------------- */
 
 void MSM::allocate_peratom()
 {
   // allocate grid levels
 
   for (int n=0; n<levels; n++) {
     memory->create3d_offset(v0grid[n],nzlo_out[n],nzhi_out[n],
             nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:v0grid");
     memory->create3d_offset(v1grid[n],nzlo_out[n],nzhi_out[n],
             nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:v1grid");
     memory->create3d_offset(v2grid[n],nzlo_out[n],nzhi_out[n],
             nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:v2grid");
     memory->create3d_offset(v3grid[n],nzlo_out[n],nzhi_out[n],
             nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:v3grid");
     memory->create3d_offset(v4grid[n],nzlo_out[n],nzhi_out[n],
             nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:v4grid");
     memory->create3d_offset(v5grid[n],nzlo_out[n],nzhi_out[n],
             nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:v5grid");
 
     // create ghost grid object for per-atom energy/virial
 
     int (*procneigh)[2] = comm->procneigh;
 
     cg_peratom[n] =
       new CommGrid(lmp,world,6,6,
                    nxlo_in[n],nxhi_in[n],nylo_in[n],nyhi_in[n],nzlo_in[n],nzhi_in[n],
                    nxlo_out[n],nxhi_out[n],nylo_out[n],nyhi_out[n],nzlo_out[n],nzhi_out[n],
                    procneigh[0][0],procneigh[0][1],procneigh[1][0],
                    procneigh[1][1],procneigh[2][0],procneigh[2][1]);
   }
 }
 
 /* ----------------------------------------------------------------------
    deallocate memory that depends on # of grid points
 ------------------------------------------------------------------------- */
 
 void MSM::deallocate()
 {
   memory->destroy2d_offset(phi1d,-order);
   memory->destroy2d_offset(dphi1d,-order);
 
   // deallocate grid levels
 
   for (int n=0; n<levels; n++) {
     if (qgrid[n])
       memory->destroy3d_offset(qgrid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
 
     if (egrid[n])
       memory->destroy3d_offset(egrid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
 
     if (cg)
       if (cg[n]) delete cg[n];
   }
 }
 
 /* ----------------------------------------------------------------------
    deallocate per-atom memory that depends on # of grid points
 ------------------------------------------------------------------------- */
 
 void MSM::deallocate_peratom()
 {
   for (int n=0; n<levels; n++) {
     if (v0grid[n])
       memory->destroy3d_offset(v0grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
     if (v1grid[n])
       memory->destroy3d_offset(v1grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
     if (v2grid[n])
       memory->destroy3d_offset(v2grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
     if (v3grid[n])
       memory->destroy3d_offset(v3grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
     if (v4grid[n])
       memory->destroy3d_offset(v4grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
     if (v5grid[n])
       memory->destroy3d_offset(v5grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
 
     if (cg_peratom)
       if (cg_peratom[n]) delete cg_peratom[n];
   }
 }
 
 /* ----------------------------------------------------------------------
    allocate memory that depends on # of grid levels
 ------------------------------------------------------------------------- */
 
 void MSM::allocate_levels()
 {
   ngrid = new int[levels];
 
   cg = new CommGrid*[levels];
   cg_peratom = new CommGrid*[levels];
   
   alpha = new int[levels];
   betax = new int[levels];
   betay = new int[levels];
   betaz = new int[levels];
 
   nx_msm = new int[levels];
   ny_msm = new int[levels];
   nz_msm = new int[levels];
 
   nxlo_in = new int[levels];
   nylo_in = new int[levels];
   nzlo_in = new int[levels];
 
   nxhi_in = new int[levels];
   nyhi_in = new int[levels];
   nzhi_in = new int[levels];
 
   nxlo_out = new int[levels];
   nylo_out = new int[levels];
   nzlo_out = new int[levels];
 
   nxhi_out = new int[levels];
   nyhi_out = new int[levels];
   nzhi_out = new int[levels];
 
   delxinv = new double[levels];
   delyinv = new double[levels];
   delzinv = new double[levels];
   delvolinv = new double[levels];
 
   qgrid = new double***[levels];
   egrid = new double***[levels];
 
   v0grid = new double***[levels];
   v1grid = new double***[levels];
   v2grid = new double***[levels];
   v3grid = new double***[levels];
   v4grid = new double***[levels];
   v5grid = new double***[levels];
 
   for (int n=0; n<levels; n++) {
     cg[n] = NULL;
     cg_peratom[n] = NULL;
 
     qgrid[n] = NULL;
     egrid[n] = NULL;
 
     v0grid[n] = NULL;
     v1grid[n] = NULL;
     v2grid[n] = NULL;
     v3grid[n] = NULL;
     v4grid[n] = NULL;
     v5grid[n] = NULL;
   }
 
 }
 
 /* ----------------------------------------------------------------------
    deallocate memory that depends on # of grid levels
 ------------------------------------------------------------------------- */
 
 void MSM::deallocate_levels()
 {
   delete [] ngrid;
 
   delete [] cg;
   delete [] cg_peratom;
 
   delete [] alpha;
   delete [] betax;
   delete [] betay;
   delete [] betaz;
 
   delete [] nx_msm;
   delete [] ny_msm;
   delete [] nz_msm;
 
   delete [] nxlo_in;
   delete [] nylo_in;
   delete [] nzlo_in;
 
   delete [] nxhi_in;
   delete [] nyhi_in;
   delete [] nzhi_in;
 
   delete [] nxlo_out;
   delete [] nylo_out;
   delete [] nzlo_out;
 
   delete [] nxhi_out;
   delete [] nyhi_out;
   delete [] nzhi_out;
 
   delete [] delxinv;
   delete [] delyinv;
   delete [] delzinv;
   delete [] delvolinv;
 
   delete [] qgrid;
   delete [] egrid;
 
   delete [] v0grid;
   delete [] v1grid;
   delete [] v2grid;
   delete [] v3grid;
   delete [] v4grid;
   delete [] v5grid;
 }
 
 /* ----------------------------------------------------------------------
    set size of MSM grids
 ------------------------------------------------------------------------- */
 
 void MSM::set_grid_global()
 {
   if (accuracy_relative <= 0.0)
     error->all(FLERR,"KSpace accuracy must be > 0");
 
   double xprd = domain->xprd;
   double yprd = domain->yprd;
   double zprd = domain->zprd;
 
   int nx_max,ny_max,nz_max;
   double hx,hy,hz;
 
   if (adjust_cutoff_flag && !gridflag) {
     int p = order - 1;
     double hmin = 3072.0*(p+1)/(p-1)/
       (448.0*MY_PI + 56.0*MY_PI*order/2 + 1701.0);
     hmin = pow(hmin,1.0/6.0)/pow(atom->natoms,1.0/3.0);
     hx = hmin*xprd;
     hy = hmin*yprd;
     hz = hmin*zprd;
 
     nx_max = static_cast<int>(xprd/hx);
     ny_max = static_cast<int>(yprd/hy);
     nz_max = static_cast<int>(zprd/hz);
   } else if (!gridflag) {
     nx_max = ny_max = nz_max = 2;
     hx = xprd/nx_max;
     hy = yprd/ny_max;
     hz = zprd/nz_max;
 
     double x_error = 2.0*accuracy;
     double y_error = 2.0*accuracy;
     double z_error = 2.0*accuracy;
 
     while (x_error > accuracy) {
       nx_max *= 2;
       hx = xprd/nx_max;
       x_error = estimate_1d_error(hx,xprd);
     }
 
     while (y_error > accuracy) {
       ny_max *= 2;
       hy = yprd/ny_max;
       y_error = estimate_1d_error(hy,yprd);
     }
 
     while (z_error > accuracy) {
       nz_max *= 2;
       hz = zprd/nz_max;
       z_error = estimate_1d_error(hz,zprd);
     }
   } else {
     nx_max = nx_msm_max;
     ny_max = ny_msm_max;
     nz_max = nz_msm_max;
   }
 
   // boost grid size until it is factorable
 
   int flag = 0;
   int xlevels,ylevels,zlevels;
 
   while (!factorable(nx_max,flag,xlevels)) nx_max++;
   while (!factorable(ny_max,flag,ylevels)) ny_max++;
   while (!factorable(nz_max,flag,zlevels)) nz_max++;
 
   if (flag && gridflag && me == 0)
     error->warning(FLERR,"Number of MSM mesh points increased to be a multiple of 2");
 
   // Find maximum number of levels
 
   levels = MAX(xlevels,ylevels);
   levels = MAX(levels,zlevels);
 
-  if (levels > MAX_LEVELS)
-    error->all(FLERR,"Too many MSM grid levels");
+  if (levels > MAX_LEVELS) error->all(FLERR,"Too many MSM grid levels");
 
 // Need at least 2 MSM levels for periodic systems
 
   if (levels <= 1) {
     levels = xlevels = ylevels = zlevels = 2;
     nx_max = ny_max = nz_max = 2;
     if (gridflag) 
-      error->warning(FLERR,"MSM mesh too small, increasing to 2 points in each direction)");
+      error->warning(FLERR,
+		     "MSM mesh too small, increasing to 2 points in each direction");
   }
 
   if (adjust_cutoff_flag) {
     hx = xprd/nx_max;
     hy = yprd/ny_max;
     hz = zprd/nz_max;
 
     double ax,ay,az;
     ax = estimate_cutoff(hx,xprd);
     ay = estimate_cutoff(hy,yprd);
     az = estimate_cutoff(hz,zprd);
 
     cutoff = sqrt(ax*ax + ay*ay + az*az)/sqrt(3.0);
     int itmp;
     double *p_cutoff = (double *) force->pair->extract("cut_msm",itmp);
     *p_cutoff = cutoff;
 
     char str[128];
     sprintf(str,"Adjusting Coulombic cutoff for MSM, new cutoff = %g",cutoff);
     if (me == 0) error->warning(FLERR,str);
   }
 
   allocate_levels();
 
   for (int n = 0; n < levels; n++) {
 
     if (xlevels-n-1 > 0)
       nx_msm[n] = static_cast<int> (pow(2.0,xlevels-n-1));
     else
       nx_msm[n] = 1;
 
     if (ylevels-n-1 > 0)
       ny_msm[n] = static_cast<int> (pow(2.0,ylevels-n-1));
     else
       ny_msm[n] = 1;
 
     if (zlevels-n-1 > 0)
       nz_msm[n] = static_cast<int> (pow(2.0,zlevels-n-1));
     else
       nz_msm[n] = 1;
   }
 
   if (nx_msm[0] >= OFFSET || ny_msm[0] >= OFFSET || nz_msm[0] >= OFFSET)
     error->all(FLERR,"MSM grid is too large");
     
   if (domain->nonperiodic) {
     alpha[0] = -(order/2 - 1);
     betax[0] = nx_msm[0] + (order/2 - 1);
     betay[0] = ny_msm[0] + (order/2 - 1);
     betaz[0] = nz_msm[0] + (order/2 - 1);
     for (int n = 1; n < levels; n++) {
       alpha[n] = -((-alpha[n-1]+1)/2) - (order/2 - 1);
       betax[n] = ((betax[n-1]+1)/2) + (order/2 - 1);
       betay[n] = ((betay[n-1]+1)/2) + (order/2 - 1);
       betaz[n] = ((betaz[n-1]+1)/2) + (order/2 - 1);
     }
   }
 
 }
 
 /* ----------------------------------------------------------------------
    set local subset of MSM grid that I own
    n xyz lo/hi in = 3d brick that I own (inclusive)
    n xyz lo/hi out = 3d brick + ghost cells in 6 directions (inclusive)
 ------------------------------------------------------------------------- */
 
 void MSM::set_grid_local()
 {
   // global indices of MSM grid range from 0 to N-1
   // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
   //   global MSM grid that I own without ghost cells
 
   // loop over grid levels
 
   for (int n=0; n<levels; n++) {
 
     // global indices of MSM grid range from 0 to N-1
     // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
     //   global MSM grid that I own without ghost cells
 
     nxlo_in[n] = static_cast<int> (comm->xsplit[comm->myloc[0]] * nx_msm[n]);
     nxhi_in[n] = static_cast<int> (comm->xsplit[comm->myloc[0]+1] * nx_msm[n]) - 1;
 
     nylo_in[n] = static_cast<int> (comm->ysplit[comm->myloc[1]] * ny_msm[n]);
     nyhi_in[n] = static_cast<int> (comm->ysplit[comm->myloc[1]+1] * ny_msm[n]) - 1;
 
     nzlo_in[n] = static_cast<int> (comm->zsplit[comm->myloc[2]] * nz_msm[n]);
     nzhi_in[n] = static_cast<int> (comm->zsplit[comm->myloc[2]+1] * nz_msm[n]) - 1;
 
     // nlower,nupper = stencil size for mapping particles to MSM grid
 
     nlower = -(order-1)/2;
     nupper = order/2;
 
     double *prd,*sublo,*subhi,*boxhi;
 
     prd = domain->prd;
     boxlo = domain->boxlo;
     boxhi = domain->boxhi;
 
     double xprd = prd[0];
     double yprd = prd[1];
     double zprd = prd[2];
 
     // shift values for particle <-> grid mapping
     // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
 
     // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of
     //   global MSM grid that my particles can contribute charge to
     // effectively nlo_in,nhi_in + ghost cells
     // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest
     //           position a particle in my box can be at
     // dist[3] = particle position bound = subbox + skin/2.0
     // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping
 
     sublo = domain->sublo;
     subhi = domain->subhi;
 
     double dist[3];
     double cuthalf = 0.0;
     if (n == 0) cuthalf = 0.5*neighbor->skin; // Only applies to finest grid
     dist[0] = dist[1] = dist[2] = cuthalf;
 
     int nlo,nhi;
 
     nlo = static_cast<int> ((sublo[0]-dist[0]-boxlo[0]) *
                             nx_msm[n]/xprd + OFFSET) - OFFSET;
     nhi = static_cast<int> ((subhi[0]+dist[0]-boxlo[0]) *
                             nx_msm[n]/xprd + OFFSET) - OFFSET;
     nxlo_out[n] = nlo + MIN(-order,nxlo_direct);
     nxhi_out[n] = nhi + MAX(order,nxhi_direct);
 
     nlo = static_cast<int> ((sublo[1]-dist[1]-boxlo[1]) *
                             ny_msm[n]/yprd + OFFSET) - OFFSET;
     nhi = static_cast<int> ((subhi[1]+dist[1]-boxlo[1]) *
                             ny_msm[n]/yprd + OFFSET) - OFFSET;
     nylo_out[n] = nlo + MIN(-order,nylo_direct);
     nyhi_out[n] = nhi + MAX(order,nyhi_direct);
 
     nlo = static_cast<int> ((sublo[2]-dist[2]-boxlo[2]) *
                             nz_msm[n]/zprd + OFFSET) - OFFSET;
     nhi = static_cast<int> ((subhi[2]+dist[2]-boxlo[2]) *
                             nz_msm[n]/zprd + OFFSET) - OFFSET;
     nzlo_out[n] = nlo + MIN(-order,nzlo_direct);
     nzhi_out[n] = nhi + MAX(order,nzhi_direct);
     
     // Add extra grid points for nonperiodic boundary conditions
 
     if (domain->nonperiodic) {
 
       if (!domain->xperiodic) {
         if (nxlo_in[n] == 0)
           nxlo_in[n] = alpha[n];
         nxlo_out[n] = MAX(nxlo_out[n],alpha[n]);
 
         if (nxhi_in[n] == nx_msm[n] - 1)
           nxhi_in[n] = betax[n];
         nxhi_out[n] = MIN(nxhi_out[n],betax[n]);
         if (nxhi_in[n] < 0)
           nxhi_in[n] = alpha[n] - 1;
       }
       
       if (!domain->yperiodic) {
         if (nylo_in[n] == 0)
           nylo_in[n] = alpha[n];
         nylo_out[n] = MAX(nylo_out[n],alpha[n]);
         
         if (nyhi_in[n] == ny_msm[n] - 1)
           nyhi_in[n] = betay[n];
         nyhi_out[n] = MIN(nyhi_out[n],betay[n]);
         if (nyhi_in[n] < 0)
           nyhi_in[n] = alpha[n] - 1;
       }
 
       if (!domain->zperiodic) {
         if (nzlo_in[n] == 0)
           nzlo_in[n] = alpha[n];
         nzlo_out[n] = MAX(nzlo_out[n],alpha[n]);
         
         if (nzhi_in[n] == nz_msm[n] - 1)
           nzhi_in[n] = betaz[n];
         nzhi_out[n] = MIN(nzhi_out[n],betaz[n]);
         if (nzhi_in[n] < 0)
           nzhi_in[n] = alpha[n] - 1;
       }
     }
 
     // MSM grids for this proc, including ghosts
 
     ngrid[n] = (nxhi_out[n]-nxlo_out[n]+1) * (nyhi_out[n]-nylo_out[n]+1) *
       (nzhi_out[n]-nzlo_out[n]+1);
   }
 }
 
 /* ----------------------------------------------------------------------
    reset local grid arrays and communication stencils
    called by fix balance b/c it changed sizes of processor sub-domains
 ------------------------------------------------------------------------- */
 
 void MSM::setup_grid()
 {
   // free all arrays previously allocated
   // pre-compute volume-dependent coeffs
   // reset portion of global grid that each proc owns
   // reallocate MSM long-range dependent memory
   // don't invoke allocate_peratom(), compute() will allocate when needed
 
   setup();
 }
 
 /* ----------------------------------------------------------------------
    check if all factors of n are in list of factors
    return 1 if yes, 0 if no
 ------------------------------------------------------------------------- */
 
 int MSM::factorable(int n, int &flag, int &levels)
 {
   int i,norig;
   norig = n;
   levels = 1;
 
   while (n > 1) {
     for (i = 0; i < nfactors; i++) {
       if (n % factors[i] == 0) {
         n /= factors[i];
         levels++;
         break;
       }
     }
     if (i == nfactors) {
       flag = 1;
       return 0;
     }
   }
 
   return 1;
 }
 
 /* ----------------------------------------------------------------------
    find center grid pt for each of my particles
    check that full stencil for the particle will fit in my 3d brick
    store central grid pt indices in part2grid array
 ------------------------------------------------------------------------- */
 
 void MSM::particle_map()
 {
   int nx,ny,nz;
 
   double **x = atom->x;
   int nlocal = atom->nlocal;
 
   int flag = 0;
 
   for (int i = 0; i < nlocal; i++) {
 
     // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
     // current particle coord can be outside global and local box
     // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
 
     nx = static_cast<int> ((x[i][0]-boxlo[0])*delxinv[0]+OFFSET) - OFFSET;
     ny = static_cast<int> ((x[i][1]-boxlo[1])*delyinv[0]+OFFSET) - OFFSET;
     nz = static_cast<int> ((x[i][2]-boxlo[2])*delzinv[0]+OFFSET) - OFFSET;
 
     part2grid[i][0] = nx;
     part2grid[i][1] = ny;
     part2grid[i][2] = nz;
 
     // check that entire stencil around nx,ny,nz will fit in my 3d brick
 
     if (nx+nlower < nxlo_out[0] || nx+nupper > nxhi_out[0] ||
         ny+nlower < nylo_out[0] || ny+nupper > nyhi_out[0] ||
         nz+nlower < nzlo_out[0] || nz+nupper > nzhi_out[0]) flag = 1;
   }
 
   if (flag) error->one(FLERR,"Out of range atoms - cannot compute MSM");
 }
 
 /* ----------------------------------------------------------------------
    create discretized "density" on section of global grid due to my particles
    density(x,y,z) = charge "density" at grid points of my 3d brick
    (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
    in global grid
 ------------------------------------------------------------------------- */
 
 void MSM::make_rho()
 {
   //fprintf(screen,"MSM aninterpolation\n\n");
 
   int i,l,m,n,nx,ny,nz,mx,my,mz;
   double dx,dy,dz,x0,y0,z0;
 
   // clear 3d density array
 
   double ***qgridn = qgrid[0];
 
   memset(&(qgridn[nzlo_out[0]][nylo_out[0]][nxlo_out[0]]),0,ngrid[0]*sizeof(double));
 
   // loop over my charges, add their contribution to nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
 
   double *q = atom->q;
   double **x = atom->x;
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
 
     nx = part2grid[i][0];
     ny = part2grid[i][1];
     nz = part2grid[i][2];
     dx = nx - (x[i][0]-boxlo[0])*delxinv[0];
     dy = ny - (x[i][1]-boxlo[1])*delyinv[0];
     dz = nz - (x[i][2]-boxlo[2])*delzinv[0];
 
     compute_phis_and_dphis(dx,dy,dz);
 
     z0 = q[i];
     for (n = nlower; n <= nupper; n++) {
       mz = n+nz;
       y0 = z0*phi1d[2][n];
       for (m = nlower; m <= nupper; m++) {
         my = m+ny;
         x0 = y0*phi1d[1][m];
         for (l = nlower; l <= nupper; l++) {
           mx = l+nx;
           qgridn[mz][my][mx] += x0*phi1d[0][l];
         }
       }
     }
   }
 
 }
 
 /* ----------------------------------------------------------------------
    MSM direct part procedure for intermediate grid levels
 ------------------------------------------------------------------------- */
 
 void MSM::direct(int n)
 {
   //fprintf(screen,"Direct contribution on level %i\n\n",n);
 
   double ***qgridn = qgrid[n];
   double ***egridn = egrid[n];
   double ***v0gridn = v0grid[n];
   double ***v1gridn = v1grid[n];
   double ***v2gridn = v2grid[n];
   double ***v3gridn = v3grid[n];
   double ***v4gridn = v4grid[n];
   double ***v5gridn = v5grid[n];
   double *g_directn = g_direct[n];
 
   // zero out electric potential
 
   memset(&(egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
 
   // zero out virial
 
   if (vflag_atom) {
     memset(&(v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
     memset(&(v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
     memset(&(v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
     memset(&(v3gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
     memset(&(v4gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
     memset(&(v5gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
   }
 
   int icx,icy,icz,ix,iy,iz,zk,zyk,k;
   int jj,kk;
   int imin,imax,jmin,jmax,kmin,kmax;
   double qtmp;
   double esum,v0sum,v1sum,v2sum,v3sum,v4sum,v5sum;
   
   int nx = nxhi_direct - nxlo_direct + 1;
   int ny = nyhi_direct - nylo_direct + 1;
 
   for (icz = nzlo_in[n]; icz <= nzhi_in[n]; icz++) {
         
     if (domain->zperiodic) {
       kmin = nzlo_direct;
       kmax = nzhi_direct;
     } else {
       kmin = MAX(nzlo_direct,alpha[n] - icz);
       kmax = MIN(nzhi_direct,betaz[n] - icz);
     }
         
     for (icy = nylo_in[n]; icy <= nyhi_in[n]; icy++) {
 
       if (domain->yperiodic) {
         jmin = nylo_direct;
         jmax = nyhi_direct;
       } else {
         jmin = MAX(nylo_direct,alpha[n] - icy);
         jmax = MIN(nyhi_direct,betay[n] - icy);
       }
         
       for (icx = nxlo_in[n]; icx <= nxhi_in[n]; icx++) {
 
         if (domain->xperiodic) {
           imin = nxlo_direct;
           imax = nxhi_direct;
         } else {
           imin = MAX(nxlo_direct,alpha[n] - icx);
           imax = MIN(nxhi_direct,betax[n] - icx);
         }
 
         esum = 0.0;
         if (vflag_either)
           v0sum = v1sum = v2sum = v3sum = v4sum = v5sum = 0.0;
         
         for (iz = kmin; iz <= kmax; iz++) {
           kk = icz+iz;
           zk = (iz + nzhi_direct)*ny;
           for (iy = jmin; iy <= jmax; iy++) {
             jj = icy+iy;
             zyk = (zk + iy + nyhi_direct)*nx;
             for (ix = imin; ix <= imax; ix++) {
               qtmp = qgridn[kk][jj][icx+ix];
               k = zyk + ix + nxhi_direct;
               esum += g_directn[k] * qtmp;
 
               if (vflag_either) {
                 v0sum += v0_direct[n][k] * qtmp;
                 v1sum += v1_direct[n][k] * qtmp;
                 v2sum += v2_direct[n][k] * qtmp;
                 v3sum += v3_direct[n][k] * qtmp;
                 v4sum += v4_direct[n][k] * qtmp;
                 v5sum += v5_direct[n][k] * qtmp;
               }
             }
           }
         }
         egridn[icz][icy][icx] = esum;
 
         if (vflag_atom) {
           v0gridn[icz][icy][icx] = v0sum;
           v1gridn[icz][icy][icx] = v1sum;
           v2gridn[icz][icy][icx] = v2sum;
           v3gridn[icz][icy][icx] = v3sum;
           v4gridn[icz][icy][icx] = v4sum;
           v5gridn[icz][icy][icx] = v5sum;
         }
 
         if (evflag) {
           qtmp = qgridn[icz][icy][icx];
           if (eflag_global) energy += esum * qtmp;
           if (vflag_global) {
             virial[0] += v0sum * qtmp;
             virial[1] += v1sum * qtmp;
             virial[2] += v2sum * qtmp;
             virial[3] += v3sum * qtmp;
             virial[4] += v4sum * qtmp;
             virial[5] += v5sum * qtmp;
           }
         }
 
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    MSM direct part procedure for top grid level
 ------------------------------------------------------------------------- */
 
 void MSM::direct_top(int n)
 {
   //fprintf(screen,"Direct contribution on level %i\n\n",n);
 
   double ***qgridn = qgrid[n];
   double ***egridn = egrid[n];
   double ***v0gridn = v0grid[n];
   double ***v1gridn = v1grid[n];
   double ***v2gridn = v2grid[n];
   double ***v3gridn = v3grid[n];
   double ***v4gridn = v4grid[n];
   double ***v5gridn = v5grid[n];
 
   // zero out electric potential
 
   memset(&(egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
 
   // zero out virial
 
   if (vflag_atom) {
     memset(&(v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
     memset(&(v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
     memset(&(v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
     memset(&(v3gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
     memset(&(v4gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
     memset(&(v5gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
   }
 
   if (!domain->nonperiodic) return; // omit top grid level for periodic systems
 
   int icx,icy,icz,ix,iy,iz,zk,zyk,k;
   int jj,kk;
   int imin,imax,jmin,jmax,kmin,kmax;
   double qtmp;
   double esum,v0sum,v1sum,v2sum,v3sum,v4sum,v5sum;
 
   int nx_top = betax[n] - alpha[n];
   int ny_top = betay[n] - alpha[n];
   int nz_top = betaz[n] - alpha[n];
   
   int nx = 2*nx_top + 1;
   int ny = 2*ny_top + 1;
 
   for (icz = nzlo_in[n]; icz <= nzhi_in[n]; icz++) {
       kmin = alpha[n] - icz;
       kmax = betaz[n] - icz;
     for (icy = nylo_in[n]; icy <= nyhi_in[n]; icy++) {
         jmin = alpha[n] - icy;
         jmax = betay[n] - icy;
       for (icx = nxlo_in[n]; icx <= nxhi_in[n]; icx++) {
           imin = alpha[n] - icx;
           imax = betax[n] - icx;
 
         esum = 0.0;
         if (vflag_either)
           v0sum = v1sum = v2sum = v3sum = v4sum = v5sum = 0.0;
 
         for (iz = kmin; iz <= kmax; iz++) {
           kk = icz+iz;
           zk = (iz + nz_top)*ny;
           for (iy = jmin; iy <= jmax; iy++) {
             jj = icy+iy;
             zyk = (zk + iy + ny_top)*nx;
             for (ix = imin; ix <= imax; ix++) {
               qtmp = qgridn[kk][jj][icx+ix];
               k = zyk + ix + nx_top;
               esum += g_direct_top[k] * qtmp;
 
               if (vflag_either) {
                 v0sum += v0_direct_top[k] * qtmp;
                 v1sum += v1_direct_top[k] * qtmp;
                 v2sum += v2_direct_top[k] * qtmp;
                 v3sum += v3_direct_top[k] * qtmp;
                 v4sum += v4_direct_top[k] * qtmp;
                 v5sum += v5_direct_top[k] * qtmp;
               }
             }
           }
         }
         egridn[icz][icy][icx] = esum;
 
         if (vflag_atom) {
           v0gridn[icz][icy][icx] = v0sum;
           v1gridn[icz][icy][icx] = v1sum;
           v2gridn[icz][icy][icx] = v2sum;
           v3gridn[icz][icy][icx] = v3sum;
           v4gridn[icz][icy][icx] = v4sum;
           v5gridn[icz][icy][icx] = v5sum;
         }
 
         if (evflag) {
           qtmp = qgridn[icz][icy][icx];
           if (eflag_global) energy += esum * qtmp;
           if (vflag_global) {
             virial[0] += v0sum * qtmp;
             virial[1] += v1sum * qtmp;
             virial[2] += v2sum * qtmp;
             virial[3] += v3sum * qtmp;
             virial[4] += v4sum * qtmp;
             virial[5] += v5sum * qtmp;
           }
         }
 
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    MSM restriction procedure for intermediate grid levels
 ------------------------------------------------------------------------- */
 
 void MSM::restriction(int n)
 {
   //fprintf(screen,"Restricting from level %i to %i\n\n",n,n+1);
 
   int p = order-1;
 
   double ***qgrid1 = qgrid[n];
   double ***qgrid2 = qgrid[n+1];
 
   //restrict grid (going from grid n to grid n+1, i.e. to a coarser grid)
 
   int k = 0;
   int index[p+1];
   for (int nu=-p; nu<=p; nu++) {
     if (nu%2 == 0 && nu != 0) continue;
     phi1d[0][k] = compute_phi(nu*delxinv[n+1]/delxinv[n]);
     phi1d[1][k] = compute_phi(nu*delyinv[n+1]/delyinv[n]);
     phi1d[2][k] = compute_phi(nu*delzinv[n+1]/delzinv[n]);
     index[k] = nu;
     k++;
   }
 
   int ip,jp,kp,ic,jc,kc,i,j;
   int ii,jj,kk;
   double phiz,phizy;
 
   // zero out charge on coarser grid
 
   memset(&(qgrid2[nzlo_out[n+1]][nylo_out[n+1]][nxlo_out[n+1]]),0,ngrid[n+1]*sizeof(double));
 
   for (kp = nzlo_in[n+1]; kp <= nzhi_in[n+1]; kp++)
     for (jp = nylo_in[n+1]; jp <= nyhi_in[n+1]; jp++)
       for (ip = nxlo_in[n+1]; ip <= nxhi_in[n+1]; ip++) {
 
         ic = ip * static_cast<int> (delxinv[n]/delxinv[n+1]);
         jc = jp * static_cast<int> (delyinv[n]/delyinv[n+1]);
         kc = kp * static_cast<int> (delzinv[n]/delzinv[n+1]);
 
         for (k=0; k<=p+1; k++) {
           kk = kc+index[k];
           if (!domain->zperiodic) {
             if (kk < alpha[n]) continue;
             if (kk > betaz[n]) break;
           }
           phiz = phi1d[2][k];
           for (j=0; j<=p+1; j++) {
             jj = jc+index[j];
             if (!domain->yperiodic) {
               if (jj < alpha[n]) continue;
               if (jj > betay[n]) break;
             }
             phizy = phi1d[1][j]*phiz;
             for (i=0; i<=p+1; i++) {
               ii = ic+index[i];
               if (!domain->xperiodic) {
                 if (ii < alpha[n]) continue;
                 if (ii > betax[n]) break;
               }
               qgrid2[kp][jp][ip] += qgrid1[kk][jj][ii] *
                 phi1d[0][i]*phizy;
             }
           }
         }
       }
 
 }
 
 /* ----------------------------------------------------------------------
    MSM prolongation procedure for intermediate grid levels
 ------------------------------------------------------------------------- */
 
 void MSM::prolongation(int n)
 {
   //fprintf(screen,"Prolongating from level %i to %i\n\n",n+1,n);
 
   int p = order-1;
 
   double ***egrid1 = egrid[n];
   double ***egrid2 = egrid[n+1];
 
   double ***v0grid1 = v0grid[n];
   double ***v0grid2 = v0grid[n+1];
   double ***v1grid1 = v1grid[n];
   double ***v1grid2 = v1grid[n+1];
   double ***v2grid1 = v2grid[n];
   double ***v2grid2 = v2grid[n+1];
   double ***v3grid1 = v3grid[n];
   double ***v3grid2 = v3grid[n+1];
   double ***v4grid1 = v4grid[n];
   double ***v4grid2 = v4grid[n+1];
   double ***v5grid1 = v5grid[n];
   double ***v5grid2 = v5grid[n+1];
 
   //prolongate grid (going from grid n to grid n-1, i.e. to a finer grid)
 
   int k = 0;
   int index[p+1];
   for (int nu=-p; nu<=p; nu++) {
     if (nu%2 == 0 && nu != 0) continue;
     phi1d[0][k] = compute_phi(nu*delxinv[n+1]/delxinv[n]);
     phi1d[1][k] = compute_phi(nu*delyinv[n+1]/delyinv[n]);
     phi1d[2][k] = compute_phi(nu*delzinv[n+1]/delzinv[n]);
     index[k] = nu;
     k++;
   }
 
   int ip,jp,kp,ic,jc,kc,i,j;
   int ii,jj,kk;
   double phiz,phizy,phi3d;
 
   for (kp = nzlo_in[n+1]; kp <= nzhi_in[n+1]; kp++)
     for (jp = nylo_in[n+1]; jp <= nyhi_in[n+1]; jp++)
       for (ip = nxlo_in[n+1]; ip <= nxhi_in[n+1]; ip++) {
 
         ic = ip * static_cast<int> (delxinv[n]/delxinv[n+1]);
         jc = jp * static_cast<int> (delyinv[n]/delyinv[n+1]);
         kc = kp * static_cast<int> (delzinv[n]/delzinv[n+1]);
 
         for (k=0; k<=p+1; k++) {
           kk = kc+index[k];
           if (!domain->zperiodic) {
             if (kk < alpha[n]) continue;
             if (kk > betaz[n]) break;
           }
           phiz = phi1d[2][k];
           for (j=0; j<=p+1; j++) {
             jj = jc+index[j];
             if (!domain->yperiodic) {
               if (jj < alpha[n]) continue;
               if (jj > betay[n]) break;
             }
             phizy = phi1d[1][j]*phiz;
             for (i=0; i<=p+1; i++) {
               ii = ic+index[i];
               if (!domain->xperiodic) {
                 if (ii < alpha[n]) continue;
                 if (ii > betax[n]) break;
               }
               phi3d = phi1d[0][i]*phizy;
 
               egrid1[kk][jj][ii] += egrid2[kp][jp][ip] * phi3d;
 
               if (vflag_atom) {
                 v0grid1[kk][jj][ii] += v0grid2[kp][jp][ip] * phi3d;
                 v1grid1[kk][jj][ii] += v1grid2[kp][jp][ip] * phi3d;
                 v2grid1[kk][jj][ii] += v2grid2[kp][jp][ip] * phi3d;
                 v3grid1[kk][jj][ii] += v3grid2[kp][jp][ip] * phi3d;
                 v4grid1[kk][jj][ii] += v4grid2[kp][jp][ip] * phi3d;
                 v5grid1[kk][jj][ii] += v5grid2[kp][jp][ip] * phi3d;
               }
               
             }
           }
         }
         
       }
 
 }
 
 /* ----------------------------------------------------------------------
    pack own values to buf to send to another proc
 ------------------------------------------------------------------------- */
 
 void MSM::pack_forward(int flag, double *buf, int nlist, int *list)
 {
   int n = current_level;
 
   double ***qgridn = qgrid[n];
   double ***egridn = egrid[n];
 
   double ***v0gridn = v0grid[n];
   double ***v1gridn = v1grid[n];
   double ***v2gridn = v2grid[n];
   double ***v3gridn = v3grid[n];
   double ***v4gridn = v4grid[n];
   double ***v5gridn = v5grid[n];
 
   int k = 0;
 
   if (flag == FORWARD_RHO) {
     double *qsrc = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     for (int i = 0; i < nlist; i++) {
       buf[k++] = qsrc[list[i]];
     }
   } else if (flag == FORWARD_AD) {
     double *src = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     for (int i = 0; i < nlist; i++)
       buf[i] = src[list[i]];
   } else if (flag == FORWARD_AD_PERATOM) {
     double *v0src = &v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     double *v1src = &v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     double *v2src = &v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     double *v3src = &v3gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     double *v4src = &v4gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     double *v5src = &v5gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     for (int i = 0; i < nlist; i++) {
       buf[k++] = v0src[list[i]];
       buf[k++] = v1src[list[i]];
       buf[k++] = v2src[list[i]];
       buf[k++] = v3src[list[i]];
       buf[k++] = v4src[list[i]];
       buf[k++] = v5src[list[i]];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    unpack another proc's own values from buf and set own ghost values
 ------------------------------------------------------------------------- */
 
 void MSM::unpack_forward(int flag, double *buf, int nlist, int *list)
 {
   int n = current_level;
 
   double ***qgridn = qgrid[n];
   double ***egridn = egrid[n];
 
   double ***v0gridn = v0grid[n];
   double ***v1gridn = v1grid[n];
   double ***v2gridn = v2grid[n];
   double ***v3gridn = v3grid[n];
   double ***v4gridn = v4grid[n];
   double ***v5gridn = v5grid[n];
 
   int k = 0;
 
   if (flag == FORWARD_RHO) {
     double *dest = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     for (int i = 0; i < nlist; i++) {
       dest[list[i]] = buf[k++];
     }
   } else if (flag == FORWARD_AD) {
     double *dest = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     for (int i = 0; i < nlist; i++)
       dest[list[i]] = buf[k++];
   } else if (flag == FORWARD_AD_PERATOM) {
     double *v0src = &v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     double *v1src = &v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     double *v2src = &v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     double *v3src = &v3gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     double *v4src = &v4gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     double *v5src = &v5gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     for (int i = 0; i < nlist; i++) {
       v0src[list[i]] = buf[k++];
       v1src[list[i]] = buf[k++];
       v2src[list[i]] = buf[k++];
       v3src[list[i]] = buf[k++];
       v4src[list[i]] = buf[k++];
       v5src[list[i]] = buf[k++];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    pack ghost values into buf to send to another proc
 ------------------------------------------------------------------------- */
 
 void MSM::pack_reverse(int flag, double *buf, int nlist, int *list)
 {
   int n = current_level;
 
   double ***qgridn = qgrid[n];
   double ***egridn = egrid[n];
 
   double ***v0gridn = v0grid[n];
   double ***v1gridn = v1grid[n];
   double ***v2gridn = v2grid[n];
   double ***v3gridn = v3grid[n];
   double ***v4gridn = v4grid[n];
   double ***v5gridn = v5grid[n];
 
   int k = 0;
 
   if (flag == REVERSE_RHO) {
     double *qsrc = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     for (int i = 0; i < nlist; i++) {
       buf[k++] = qsrc[list[i]];
     }
   } else if (flag == REVERSE_AD) {
     double *src = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     for (int i = 0; i < nlist; i++)
       buf[i] = src[list[i]];
   } else if (flag == REVERSE_AD_PERATOM) {
     double *v0src = &v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     double *v1src = &v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     double *v2src = &v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     double *v3src = &v3gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     double *v4src = &v4gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     double *v5src = &v5gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     for (int i = 0; i < nlist; i++) {
       buf[k++] = v0src[list[i]];
       buf[k++] = v1src[list[i]];
       buf[k++] = v2src[list[i]];
       buf[k++] = v3src[list[i]];
       buf[k++] = v4src[list[i]];
       buf[k++] = v5src[list[i]];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    unpack another proc's ghost values from buf and add to own values
 ------------------------------------------------------------------------- */
 
 void MSM::unpack_reverse(int flag, double *buf, int nlist, int *list)
 {
   int n = current_level;
 
   double ***qgridn = qgrid[n];
   double ***egridn = egrid[n];
 
   double ***v0gridn = v0grid[n];
   double ***v1gridn = v1grid[n];
   double ***v2gridn = v2grid[n];
   double ***v3gridn = v3grid[n];
   double ***v4gridn = v4grid[n];
   double ***v5gridn = v5grid[n];
 
   int k = 0;
 
   if (flag == REVERSE_RHO) {
     double *dest = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     for (int i = 0; i < nlist; i++) {
       dest[list[i]] += buf[k++];
     }
   } else if (flag == REVERSE_AD) {
     double *dest = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     for (int i = 0; i < nlist; i++)
       dest[list[i]] += buf[k++];
   } else if (flag == REVERSE_AD_PERATOM) {
     double *v0src = &v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     double *v1src = &v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     double *v2src = &v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     double *v3src = &v3gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     double *v4src = &v4gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     double *v5src = &v5gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
     for (int i = 0; i < nlist; i++) {
       v0src[list[i]] += buf[k++];
       v1src[list[i]] += buf[k++];
       v2src[list[i]] += buf[k++];
       v3src[list[i]] += buf[k++];
       v4src[list[i]] += buf[k++];
       v5src[list[i]] += buf[k++];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get force on my particles
 ------------------------------------------------------------------------- */
 
 void MSM::fieldforce()
 {
   //fprintf(screen,"MSM interpolation\n\n");
 
   double ***egridn = egrid[0];
 
   int i,l,m,n,nx,ny,nz,mx,my,mz;
   double dx,dy,dz;
   double phi_x,phi_y,phi_z;
   double dphi_x,dphi_y,dphi_z;
   double ekx,eky,ekz;
 
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of E-field on particle
 
   double *q = atom->q;
   double **x = atom->x;
   double **f = atom->f;
 
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
     nx = part2grid[i][0];
     ny = part2grid[i][1];
     nz = part2grid[i][2];
     dx = nx - (x[i][0]-boxlo[0])*delxinv[0];
     dy = ny - (x[i][1]-boxlo[1])*delyinv[0];
     dz = nz - (x[i][2]-boxlo[2])*delzinv[0];
 
     compute_phis_and_dphis(dx,dy,dz);
 
     ekx = eky = ekz = 0.0;
     for (n = nlower; n <= nupper; n++) {
       mz = n+nz;
       phi_z = phi1d[2][n];
       dphi_z = dphi1d[2][n];
       for (m = nlower; m <= nupper; m++) {
         my = m+ny;
         phi_y = phi1d[1][m];
         dphi_y = dphi1d[1][m];
         for (l = nlower; l <= nupper; l++) {
           mx = l+nx;
           phi_x = phi1d[0][l];
           dphi_x = dphi1d[0][l];
           ekx += dphi_x*phi_y*phi_z*egridn[mz][my][mx];
           eky += phi_x*dphi_y*phi_z*egridn[mz][my][mx];
           ekz += phi_x*phi_y*dphi_z*egridn[mz][my][mx];
         }
       }
     }
 
     ekx *= delxinv[0];
     eky *= delyinv[0];
     ekz *= delzinv[0];
 
     // convert E-field to force
 
     const double qfactor = force->qqrd2e*scale*q[i];
     f[i][0] += qfactor*ekx;
     f[i][1] += qfactor*eky;
     f[i][2] += qfactor*ekz;
   }
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get per-atom energy/virial
 ------------------------------------------------------------------------- */
 
 void MSM::fieldforce_peratom()
 {
   int i,l,m,n,nx,ny,nz,mx,my,mz;
   double dx,dy,dz,x0,y0,z0;
   double u,v0,v1,v2,v3,v4,v5;
 
   double ***egridn = egrid[0];
 
   double ***v0gridn = v0grid[0];
   double ***v1gridn = v1grid[0];
   double ***v2gridn = v2grid[0];
   double ***v3gridn = v3grid[0];
   double ***v4gridn = v4grid[0];
   double ***v5gridn = v5grid[0];
 
   // loop over my charges, interpolate from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
 
   double *q = atom->q;
   double **x = atom->x;
 
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
     nx = part2grid[i][0];
     ny = part2grid[i][1];
     nz = part2grid[i][2];
     dx = nx - (x[i][0]-boxlo[0])*delxinv[0];
     dy = ny - (x[i][1]-boxlo[1])*delyinv[0];
     dz = nz - (x[i][2]-boxlo[2])*delzinv[0];
 
     compute_phis_and_dphis(dx,dy,dz);
 
     u = v0 = v1 = v2 = v3 = v4 = v5 = 0.0;
     for (n = nlower; n <= nupper; n++) {
       mz = n+nz;
       z0 = phi1d[2][n];
       for (m = nlower; m <= nupper; m++) {
         my = m+ny;
         y0 = z0*phi1d[1][m];
         for (l = nlower; l <= nupper; l++) {
           mx = l+nx;
           x0 = y0*phi1d[0][l];
           if (eflag_atom) u += x0*egridn[mz][my][mx];
           if (vflag_atom) {
             v0 += x0*v0gridn[mz][my][mx];
             v1 += x0*v1gridn[mz][my][mx];
             v2 += x0*v2gridn[mz][my][mx];
             v3 += x0*v3gridn[mz][my][mx];
             v4 += x0*v4gridn[mz][my][mx];
             v5 += x0*v5gridn[mz][my][mx];
           }
         }
       }
     }
 
     if (eflag_atom) eatom[i] += q[i]*u;
     if (vflag_atom) {
       vatom[i][0] += q[i]*v0;
       vatom[i][1] += q[i]*v1;
       vatom[i][2] += q[i]*v2;
       vatom[i][3] += q[i]*v3;
       vatom[i][4] += q[i]*v4;
       vatom[i][5] += q[i]*v5;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    charge assignment into phi1d
 ------------------------------------------------------------------------- */
 
 void MSM::compute_phis_and_dphis(const double &dx, const double &dy,
                                  const double &dz)
 {
   double delx,dely,delz;
 
   for (int nu = nlower; nu <= nupper; nu++) {
     delx = dx + double(nu);
     dely = dy + double(nu);
     delz = dz + double(nu);
 
     phi1d[0][nu] = compute_phi(delx);
     phi1d[1][nu] = compute_phi(dely);
     phi1d[2][nu] = compute_phi(delz);
     dphi1d[0][nu] = compute_dphi(delx);
     dphi1d[1][nu] = compute_dphi(dely);
     dphi1d[2][nu] = compute_dphi(delz);
   }
 }
 
 /* ----------------------------------------------------------------------
    compute phi using interpolating polynomial
    see Eq 7 from Parallel Computing 35 (2009) 164�177
    and Hardy's thesis
 ------------------------------------------------------------------------- */
 
 double MSM::compute_phi(const double &xi)
 {
   double phi;
   double abs_xi = fabs(xi);
   double xi2 = xi*xi;
 
   if (order == 4) {
     if (abs_xi <= 1) {
       phi = (1.0 - abs_xi)*(1.0 + abs_xi - 1.5*xi2);
     } else if (abs_xi <= 2) {
       phi = -0.5*(abs_xi - 1.0)*(2.0 - abs_xi)*(2.0 - abs_xi);
     } else {
       phi = 0.0;
     }
 
   } else if (order == 6) {
     if (abs_xi <= 1) {
       phi = (1.0 - xi2)*(2.0 - abs_xi)*(6.0 + 3.0*abs_xi -
         5.0*xi2)/12.0;
     } else if (abs_xi <= 2) {
       phi = -(abs_xi - 1.0)*(2.0 - abs_xi)*(3.0 - abs_xi)*
         (4.0 + 9.0*abs_xi - 5.0*xi2)/24.0;
     } else if (abs_xi <= 3) {
       phi = (abs_xi - 1.0)*(abs_xi - 2.0)*(3.0 - abs_xi)*
         (3.0 - abs_xi)*(4.0 - abs_xi)/24.0;
     } else {
       phi = 0.0;
     }
 
   } else if (order == 8) {
     if (abs_xi <= 1) {
       phi = (1.0 - xi2)*(4.0 - xi2)*(3.0 - abs_xi)*
         (12.0 + 4.0*abs_xi - 7.0*xi2)/144.0;
     } else if (abs_xi <= 2) {
       phi = -(xi2 - 1.0)*(2.0 - abs_xi)*(3.0 - abs_xi)*
         (4.0 - abs_xi)*(10.0 + 12.0*abs_xi - 7.0*xi2)/240.0;
     } else if (abs_xi <= 3) {
       phi = (abs_xi - 1.0)*(abs_xi - 2.0)*(3.0 - abs_xi)*(4.0 - abs_xi)*
         (5.0 - abs_xi)*(6.0 + 20.0*abs_xi - 7.0*xi2)/720.0;
     } else if (abs_xi <= 4) {
       phi = -(abs_xi - 1.0)*(abs_xi - 2.0)*(abs_xi - 3.0)*(4.0 - abs_xi)*
         (4.0 - abs_xi)*(5.0 - abs_xi)*(6.0 - abs_xi)/720.0;
     } else {
       phi = 0.0;
     }
 
   } else if (order == 10) {
     if (abs_xi <= 1) {
       phi = (1.0 - xi2)*(4.0 - xi2)*(9.0 - xi2)*
         (4.0 - abs_xi)*(20.0 + 5.0*abs_xi - 9.0*xi2)/2880.0;
     } else if (abs_xi <= 2) {
       phi = -(xi2 - 1.0)*(4.0 - xi2)*(3.0 - abs_xi)*(4.0 - abs_xi)*
         (5.0 - abs_xi)*(6.0 + 5.0*abs_xi - 3.0*xi2)/1440.0;
     } else if (abs_xi <= 3) {
       phi = (xi2 - 1.0)*(abs_xi - 2.0)*(3.0 - abs_xi)*(4.0 - abs_xi)*
         (5.0 - abs_xi)*(6.0 - abs_xi)*(14.0 + 25.0*abs_xi - 9.0*xi2)/10080.0;
     } else if (abs_xi <= 4) {
       phi = -(abs_xi - 1.0)*(abs_xi - 2.0)*(abs_xi - 3.0)*(4.0 - abs_xi)*
         (5.0 - abs_xi)*(6.0 - abs_xi)*(7.0 - abs_xi)*
         (8.0 + 35.0*abs_xi - 9.0*xi2)/40320.0;
     } else if (abs_xi <= 5) {
       phi = (abs_xi - 1.0)*(abs_xi - 2.0)*(abs_xi - 3.0)*
         (abs_xi - 4.0)*(5.0 - abs_xi)*(5.0 - abs_xi)*(6.0 - abs_xi)*
         (7.0 - abs_xi)*(8.0 - abs_xi)/40320.0;
     } else {
       phi = 0.0;
     }
   }
 
   return phi;
 }
 
 /* ----------------------------------------------------------------------
    compute the derivative of phi
    phi is an interpolating polynomial
    see Eq 7 from Parallel Computing 35 (2009) 164�177
    and Hardy's thesis
 ------------------------------------------------------------------------- */
 
 double MSM::compute_dphi(const double &xi)
 {
   double dphi;
   double abs_xi = fabs(xi);
 
   if (order == 4) {
     double xi2 = xi*xi;
     double abs_xi2 = abs_xi*abs_xi;
     if (abs_xi == 0.0) {
       dphi = 0.0;
     } else if (abs_xi <= 1) {
       dphi = xi*(3*xi2 + 6*abs_xi2 - 10*abs_xi)/2.0/abs_xi;
     } else if (abs_xi <= 2) {
       dphi = xi*(2 - abs_xi)*(3*abs_xi - 4)/2.0/abs_xi;
     } else {
       dphi = 0.0;
     }
 
   } else if (order == 6) {
     double xi2 = xi*xi;
     double xi4 = xi2*xi2;
     double abs_xi2 = abs_xi*abs_xi;
     double abs_xi3 = abs_xi2*abs_xi;
     double abs_xi4 = abs_xi2*abs_xi2;
     if (abs_xi == 0.0) {
       dphi = 0.0;
     } else if (abs_xi <= 1) {
       dphi = xi*(46*xi2*abs_xi - 20*xi2*abs_xi2 - 5*xi4 + 5*xi2 +
         6*abs_xi3 + 10*abs_xi2 - 50*abs_xi)/12.0/abs_xi;
     } else if (abs_xi <= 2) {
       dphi = xi*(15*xi2*abs_xi2 - 60*xi2*abs_xi + 55*xi2 +
         10*abs_xi4 - 96*abs_xi3 + 260*abs_xi2 - 210*abs_xi + 10)/
         24.0/abs_xi;
     } else if (abs_xi <= 3) {
       dphi = -xi*(abs_xi - 3)*(5*abs_xi3 - 37*abs_xi2 +
       84*abs_xi - 58)/24.0/abs_xi;
     } else {
       dphi = 0.0;
     }
 
   } else if (order == 8) {
     double xi2 = xi*xi;
     double xi4 = xi2*xi2;
     double xi6 = xi4*xi2;
     double abs_xi2 = abs_xi*abs_xi;
     double abs_xi3 = abs_xi2*abs_xi;
     double abs_xi4 = abs_xi2*abs_xi2;
     double abs_xi5 = abs_xi4*abs_xi;
     double abs_xi6 = abs_xi5*abs_xi;
     if (abs_xi == 0.0) {
       dphi = 0.0;
     } else if (abs_xi <= 1) {
       dphi = xi*(7*xi6 + 42*xi4*abs_xi2 - 134*xi4*abs_xi - 35*xi4 -
         16*xi2*abs_xi3 - 140*xi2*abs_xi2 + 604*xi2*abs_xi + 28*xi2 +
         40*abs_xi3 + 56*abs_xi2 - 560*abs_xi)/144.0/abs_xi;
     } else if (abs_xi <= 2) {
       dphi = xi*(126*xi4*abs_xi - 21*xi4*abs_xi2 - 182*xi4 -
         28*xi2*abs_xi4 + 300*xi2*abs_xi3 - 1001*xi2*abs_xi2 +
         990*xi2*abs_xi + 154*xi2 + 24*abs_xi5 - 182*abs_xi4 +
         270*abs_xi3 + 602*abs_xi2 - 1260*abs_xi + 28)/240.0/abs_xi;
     } else if (abs_xi <= 3) {
       dphi = xi*(35*xi2*abs_xi4 - 420*xi2*abs_xi3 +
         1785*xi2*abs_xi2 - 3150*xi2*abs_xi + 1918*xi2 +
         14*abs_xi6 - 330*abs_xi5 + 2660*abs_xi4 -
         9590*abs_xi3 + 15806*abs_xi2 - 9940*abs_xi + 756)/720.0/abs_xi;
     } else if (abs_xi <= 4) {
       dphi = -xi*(abs_xi - 4)*(7*abs_xi5 - 122*abs_xi4 +
         807*abs_xi3 - 2512*abs_xi2 + 3644*abs_xi - 1944)/720.0/abs_xi;
     } else {
       dphi = 0.0;
     }
 
   } else if (order == 10) {
     double xi2 = xi*xi;
     double xi4 = xi2*xi2;
     double xi6 = xi4*xi2;
     double xi8 = xi6*xi2;
     double abs_xi2 = abs_xi*abs_xi;
     double abs_xi3 = abs_xi2*abs_xi;
     double abs_xi4 = abs_xi2*abs_xi2;
     double abs_xi5 = abs_xi4*abs_xi;
     double abs_xi6 = abs_xi5*abs_xi;
     double abs_xi7 = abs_xi6*abs_xi;
     double abs_xi8 = abs_xi7*abs_xi;
     if (abs_xi == 0.0) {
       dphi = 0.0;
     } else if (abs_xi <= 1) {
       dphi = xi*(298*xi6*abs_xi - 72*xi6*abs_xi2 - 9*xi8 +
         126*xi6 + 30*xi4*abs_xi3 + 756*xi4*abs_xi2 - 3644*xi4*abs_xi -
         441*xi4 - 280*xi2*abs_xi3 - 1764*xi2*abs_xi2 + 12026*xi2*abs_xi +
         324*xi2 + 490*abs_xi3 + 648*abs_xi2 - 10792*abs_xi)/2880.0/abs_xi;
     } else if (abs_xi <= 2) {
       dphi = xi*(9*xi6*abs_xi2 - 72*xi6*abs_xi + 141*xi6 +
         18*xi4*abs_xi4 - 236*xi4*abs_xi3 + 963*xi4*abs_xi2 -
         1046*xi4*abs_xi - 687*xi4 - 20*xi2*abs_xi5 + 156*xi2*abs_xi4 +
         168*xi2*abs_xi3 - 3522*xi2*abs_xi2 + 6382*xi2*abs_xi + 474*xi2 +
         50*abs_xi5 - 516*abs_xi4 + 1262*abs_xi3 + 1596*abs_xi2 -
         6344*abs_xi + 72)/1440.0/abs_xi;
     } else if (abs_xi <= 3) {
       dphi = xi*(720*xi4*abs_xi3 - 45*xi4*abs_xi4 - 4185*xi4*abs_xi2 +
         10440*xi4*abs_xi - 9396*xi4 - 36*xi2*abs_xi6 + 870*xi2*abs_xi5 -
         7965*xi2*abs_xi4 + 34540*xi2*abs_xi3 - 70389*xi2*abs_xi2 +
         51440*xi2*abs_xi + 6012*xi2 + 50*abs_xi7 - 954*abs_xi6 +
         6680*abs_xi5 - 19440*abs_xi4 + 11140*abs_xi3 + 49014*abs_xi2 -
         69080*abs_xi + 3384)/10080.0/abs_xi;
     } else if (abs_xi <= 4) {
       dphi = xi*(63*xi2*abs_xi6 - 1512*xi2*abs_xi5 + 14490*xi2*abs_xi4 -
         70560*xi2*abs_xi3 + 182763*xi2*abs_xi2 - 236376*xi2*abs_xi +
         117612*xi2 + 18*abs_xi8 - 784*abs_xi7 + 12600*abs_xi6 -
         101556*abs_xi5 + 451962*abs_xi4 - 1121316*abs_xi3 +
         1451628*abs_xi2 - 795368*abs_xi + 71856)/40320.0/abs_xi;
     } else if (abs_xi <= 5) {
       dphi = -xi*(abs_xi - 5)*(9*abs_xi7 - 283*abs_xi6 +
         3667*abs_xi5 - 25261*abs_xi4 + 99340*abs_xi3 -
         221416*abs_xi2 + 256552*abs_xi - 117648)/40320.0/abs_xi;
     } else {
       dphi = 0.0;
     }
   }
 
   return dphi;
 }
 
 /* ----------------------------------------------------------------------
    Compute direct interaction for intermediate grid levels
 ------------------------------------------------------------------------- */
 void MSM::get_g_direct()
 {
   if (g_direct) memory->destroy(g_direct);
   memory->create(g_direct,levels,nmax_direct,"msm:g_direct");
 
   double a = cutoff;
 
   int n,zk,zyk,k,ix,iy,iz;
   double xdiff,ydiff,zdiff;
   double rsq,rho,two_n;
 
   two_n = 1.0;
 
   int nx = nxhi_direct - nxlo_direct + 1;
   int ny = nyhi_direct - nylo_direct + 1;
 
   for (n=0; n<levels; n++) {
 
     for (iz = nzlo_direct; iz <= nzhi_direct; iz++) {
       zdiff = iz/delzinv[n];
       zk = (iz + nzhi_direct)*ny;
       for (iy = nylo_direct; iy <= nyhi_direct; iy++) {
         ydiff = iy/delyinv[n];
         zyk = (zk + iy + nyhi_direct)*nx;
         for (ix = nxlo_direct; ix <= nxhi_direct; ix++) {
           xdiff = ix/delxinv[n];
           rsq = xdiff*xdiff + ydiff*ydiff + zdiff*zdiff;
           rho = sqrt(rsq)/(two_n*a);
           k = zyk + ix + nxhi_direct;
           g_direct[n][k] = gamma(rho)/(two_n*a) - gamma(rho/2.0)/(2.0*two_n*a);
         }
       }
     }
     two_n *= 2.0;
   }
 }
 
 /* ----------------------------------------------------------------------
    Compute direct interaction for intermediate grid levels
 ------------------------------------------------------------------------- */
 void MSM::get_virial_direct()
 {
   if (v0_direct) memory->destroy(v0_direct);
   memory->create(v0_direct,levels,nmax_direct,"msm:v0_direct");
   if (v1_direct) memory->destroy(v1_direct);
   memory->create(v1_direct,levels,nmax_direct,"msm:v1_direct");
   if (v2_direct) memory->destroy(v2_direct);
   memory->create(v2_direct,levels,nmax_direct,"msm:v2_direct");
   if (v3_direct) memory->destroy(v3_direct);
   memory->create(v3_direct,levels,nmax_direct,"msm:v3_direct");
   if (v4_direct) memory->destroy(v4_direct);
   memory->create(v4_direct,levels,nmax_direct,"msm:v4_direct");
   if (v5_direct) memory->destroy(v5_direct);
   memory->create(v5_direct,levels,nmax_direct,"msm:v5_direct");
 
   double a = cutoff;
   double a_sq = cutoff*cutoff;
 
   int n,zk,zyk,k,ix,iy,iz;
   double xdiff,ydiff,zdiff;
   double rsq,r,rho,two_n,two_nsq,dg;
 
   two_n = 1.0;
 
   int nx = nxhi_direct - nxlo_direct + 1;
   int ny = nyhi_direct - nylo_direct + 1;
 
   for (n=0; n<levels; n++) {
     two_nsq = two_n * two_n;
 
     for (iz = nzlo_direct; iz <= nzhi_direct; iz++) {
       zdiff = iz/delzinv[n];
       zk = (iz + nzhi_direct)*ny;
       for (iy = nylo_direct; iy <= nyhi_direct; iy++) {
         ydiff = iy/delyinv[n];
         zyk = (zk + iy + nyhi_direct)*nx;
         for (ix = nxlo_direct; ix <= nxhi_direct; ix++) {
           xdiff = ix/delxinv[n];
           rsq = xdiff*xdiff + ydiff*ydiff + zdiff*zdiff;
           k = zyk + ix + nxhi_direct;
           r = sqrt(rsq);
           if (r == 0) {
             v0_direct[n][k] = 0.0;
             v1_direct[n][k] = 0.0;
             v2_direct[n][k] = 0.0;
             v3_direct[n][k] = 0.0;
             v4_direct[n][k] = 0.0;
             v5_direct[n][k] = 0.0;
           } else {
             rho = r/(two_n*a);
             dg = -(dgamma(rho)/(two_nsq*a_sq) -
               dgamma(rho/2.0)/(4.0*two_nsq*a_sq))/r;
             v0_direct[n][k] = dg * xdiff * xdiff;
             v1_direct[n][k] = dg * ydiff * ydiff;
             v2_direct[n][k] = dg * zdiff * zdiff;
             v3_direct[n][k] = dg * xdiff * ydiff;
             v4_direct[n][k] = dg * xdiff * zdiff;
             v5_direct[n][k] = dg * ydiff * zdiff;
           }
 
         }
       }
     }
     two_n *= 2.0;
   }
 }
 
 /* ----------------------------------------------------------------------
    Compute direct interaction for top grid level
 ------------------------------------------------------------------------- */
 void MSM::get_g_direct_top(int n)
 {
   int nx_top = betax[n] - alpha[n];
   int ny_top = betay[n] - alpha[n];
   int nz_top = betaz[n] - alpha[n];
   
   int nx = 2*nx_top + 1;
   int ny = 2*ny_top + 1;
   int nz = 2*nz_top + 1;
   
   int nmax_top = 8*(nx+1)*(ny*1)*(nz+1);
   
   if (g_direct_top) memory->destroy(g_direct_top);
   memory->create(g_direct_top,nmax_top,"msm:g_direct_top");
 
   double a = cutoff;
 
   int zk,zyk,k,ix,iy,iz;
   double xdiff,ydiff,zdiff;
   double rsq,rho,two_n;
 
   two_n = pow(2.0,n);
 
   for (iz = -nz_top; iz <= nz_top; iz++) {
     zdiff = iz/delzinv[n];
     zk = (iz + nz_top)*ny;
     for (iy = -ny_top; iy <= ny_top; iy++) {
       ydiff = iy/delyinv[n];
       zyk = (zk + iy + ny_top)*nx;
       for (ix = -nx_top; ix <= nx_top; ix++) {
         xdiff = ix/delxinv[n];
         rsq = xdiff*xdiff + ydiff*ydiff + zdiff*zdiff;
         rho = sqrt(rsq)/(two_n*a);
         k = zyk + ix + nx_top;
         g_direct_top[k] = gamma(rho)/(two_n*a);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    Compute direct interaction for top grid level
 ------------------------------------------------------------------------- */
 void MSM::get_virial_direct_top(int n)
 {
   int nx_top = betax[n] - alpha[n];
   int ny_top = betay[n] - alpha[n];
   int nz_top = betaz[n] - alpha[n];
   
   int nx = 2*nx_top + 1;
   int ny = 2*ny_top + 1;
   int nz = 2*nz_top + 1;
   
   int nmax_top = 8*(nx+1)*(ny*1)*(nz+1);
   
   if (v0_direct_top) memory->destroy(v0_direct_top);
   memory->create(v0_direct_top,nmax_top,"msm:v0_direct_top");
   if (v1_direct_top) memory->destroy(v1_direct_top);
   memory->create(v1_direct_top,nmax_top,"msm:v1_direct_top");
   if (v2_direct_top) memory->destroy(v2_direct_top);
   memory->create(v2_direct_top,nmax_top,"msm:v2_direct_top");
   if (v3_direct_top) memory->destroy(v3_direct_top);
   memory->create(v3_direct_top,nmax_top,"msm:v3_direct_top");
   if (v4_direct_top) memory->destroy(v4_direct_top);
   memory->create(v4_direct_top,nmax_top,"msm:v4_direct_top");
   if (v5_direct_top) memory->destroy(v5_direct_top);
   memory->create(v5_direct_top,nmax_top,"msm:v5_direct_top");
 
   double a = cutoff;
   double a_sq = cutoff*cutoff;
 
   int zk,zyk,k,ix,iy,iz;
   double xdiff,ydiff,zdiff;
   double rsq,r,rho,two_n,two_nsq,dg;
 
   two_n = pow(2.0,n);
   two_nsq = two_n * two_n;
 
   for (iz = -nz_top; iz <= nz_top; iz++) {
     zdiff = iz/delzinv[n];
     zk = (iz + nz_top)*ny;
     for (iy = -ny_top; iy <= ny_top; iy++) {
       ydiff = iy/delyinv[n];
       zyk = (zk + iy + ny_top)*nx;
       for (ix = -nx_top; ix <= nx_top; ix++) {
         xdiff = ix/delxinv[n];
         rsq = xdiff*xdiff + ydiff*ydiff + zdiff*zdiff;
         k = zyk + ix + nx_top;
         r = sqrt(rsq);
         if (r == 0) {
           v0_direct_top[k] = 0.0;
           v1_direct_top[k] = 0.0;
           v2_direct_top[k] = 0.0;
           v3_direct_top[k] = 0.0;
           v4_direct_top[k] = 0.0;
           v5_direct_top[k] = 0.0;
         } else {
           rho = r/(two_n*a);
           dg = -(dgamma(rho)/(two_nsq*a_sq))/r;
           v0_direct_top[k] = dg * xdiff * xdiff;
           v1_direct_top[k] = dg * ydiff * ydiff;
           v2_direct_top[k] = dg * zdiff * zdiff;
           v3_direct_top[k] = dg * xdiff * ydiff;
           v4_direct_top[k] = dg * xdiff * zdiff;
           v5_direct_top[k] = dg * ydiff * zdiff;
         }
       }
     }
   }
 }
diff --git a/src/KSPACE/msm.h b/src/KSPACE/msm.h
index 63759850a..085633d8c 100644
--- a/src/KSPACE/msm.h
+++ b/src/KSPACE/msm.h
@@ -1,234 +1,229 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef KSPACE_CLASS
 
 KSpaceStyle(msm,MSM)
 
 #else
 
 #ifndef LMP_MSM_H
 #define LMP_MSM_H
 
 #include "lmptype.h"
 #include "mpi.h"
 
 #include "kspace.h"
 
 namespace LAMMPS_NS {
 
 class MSM : public KSpace {
  public:
   MSM(class LAMMPS *, int, char **);
   virtual ~MSM();
   void init();
   void setup();
   virtual void compute(int, int);
 
  protected:
   int me,nprocs;
   double precision;
   int nfactors;
   int *factors;
   double qsum,qsqsum,q2;
   double qqrd2e;
   double cutoff;
   double volume;
   double *delxinv,*delyinv,*delzinv,*delvolinv;
 
   int *nx_msm,*ny_msm,*nz_msm;
   int *nxlo_in,*nylo_in,*nzlo_in;
   int *nxhi_in,*nyhi_in,*nzhi_in;
   int *nxlo_out,*nylo_out,*nzlo_out;
   int *nxhi_out,*nyhi_out,*nzhi_out;
   int *ngrid;
   int *alpha,*betax,*betay,*betaz;
   int nxlo_direct,nxhi_direct,nylo_direct;
   int nyhi_direct,nzlo_direct,nzhi_direct;
   int nmax_direct;
   int nlower,nupper;
   int peratom_allocate_flag;
   int levels;
   
   MPI_Comm *world_levels;  
 
   double ****qgrid;
   double ****egrid;
   double ****v0grid,****v1grid,****v2grid;
   double ****v3grid,****v4grid,****v5grid;
   double **g_direct;
   double **v0_direct,**v1_direct,**v2_direct;
   double **v3_direct,**v4_direct,**v5_direct;
   double *g_direct_top;
   double *v0_direct_top,*v1_direct_top,*v2_direct_top;
   double *v3_direct_top,*v4_direct_top,*v5_direct_top;
 
   double **phi1d,**dphi1d;
 
   int procgrid[3];                  // procs assigned in each dim of 3d grid
   int myloc[3];                     // which proc I am in each dim
   int ***procneigh_levels;          // my 6 neighboring procs, 0/1 = left/right
   class CommGrid **cg;
   class CommGrid **cg_peratom;
   class CommGrid *cg_all;
   class CommGrid *cg_peratom_all;
   
   int current_level;
 
   int **part2grid;             // storage for particle -> grid mapping
   int nmax;
 
   double *boxlo;
 
   void set_grid_global();
   void set_proc_grid(int);
   void set_grid_local();
   void setup_grid();
   double estimate_cutoff(double,double);
   double estimate_1d_error(double,double);
   double estimate_3d_error();
   double estimate_total_error();
   void allocate();
   void allocate_peratom();
   void deallocate();
   void deallocate_peratom();
   void allocate_levels();
   void deallocate_levels();
   int factorable(int,int&,int&);
   void particle_map();
   void make_rho();
   virtual void direct(int);
   void direct_top(int);
   void restriction(int);
   void prolongation(int);
   void fieldforce();
   void fieldforce_peratom();
   void compute_phis_and_dphis(const double &, const double &, const double &);
   double compute_phi(const double &);
   double compute_dphi(const double &);
   void get_g_direct();
   void get_virial_direct();
   void get_g_direct_top(int);
   void get_virial_direct_top(int);
 
   // grid communication
   void pack_forward(int, double *, int, int *);
   void unpack_forward(int, double *, int, int *);
   void pack_reverse(int, double *, int, int *);
   void unpack_reverse(int, double *, int, int *);
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Cannot (yet) use MSM with triclinic box
 
 This feature is not yet supported.
 
 E: Cannot (yet) use MSM with 2d simulation
 
 This feature is not yet supported.
 
 E: Kspace style requires atom attribute q
 
 The atom style defined does not have these attributes.
 
 E: Cannot use slab correction with MSM
 
 Slab correction can only be used with Ewald and PPPM, not MSM.
 
 E: MSM order must be 4, 6, 8, or 10
 
 This is a limitation of the MSM implementation in LAMMPS:
 the MSM order can only be 4, 6, 8, or 10.
 
 E: Cannot (yet) use single precision with MSM (remove -DFFT_SINGLE from Makefile and recompile)
 
 Single precision cannot be used with MSM.
 
 E: KSpace style is incompatible with Pair style
 
 Setting a kspace style requires that a pair style with a long-range
 Coulombic component be selected that is compatible with MSM.  Note
 that TIP4P is not (yet) supported by MSM.
 
 E: Cannot use kspace solver on system with no charge
 
 No atoms in system have a non-zero charge.
 
 E: System is not charge neutral, net charge = %g
 
 The total charge on all atoms on the system is not 0.0, which
 is not valid for MSM.
 
 E: KSpace accuracy must be > 0
 
 The kspace accuracy designated in the input must be greater than zero.
 
 W: Number of MSM mesh points increased to be a multiple of 2
 
 MSM requires that the number of grid points in each direction be a multiple
 of two and the number of grid points in one or more directions have been
 adjusted to meet this requirement.
 
 E: Too many MSM grid levels
 
-UNDOCUMENTED
+The max number of MSM grid levels is hardwired to 10.
 
 W: MSM mesh too small, increasing to 2 points in each direction)
 
-UNDOCUMENTED
+Self-explanatory.
 
 W: Adjusting Coulombic cutoff for MSM, new cutoff = %g
 
 The adjust/cutoff command is turned on and the Coulombic cutoff has been
 adjusted to match the user-specified accuracy.
 
 E: MSM grid is too large
 
 The global MSM grid is larger than OFFSET in one or more dimensions.
 OFFSET is currently set to 16384.  You likely need to decrease the
 requested accuracy.
 
 E: Out of range atoms - cannot compute MSM
 
 One or more atoms are attempting to map their charge to a MSM grid point 
 that is not owned by a processor.  This is likely for one of two
 reasons, both of them bad.  First, it may mean that an atom near the
 boundary of a processor's sub-domain has moved more than 1/2 the
 "neighbor skin distance"_neighbor.html without neighbor lists being
 rebuilt and atoms being migrated to new processors.  This also means
 you may be missing pairwise interactions that need to be computed.
 The solution is to change the re-neighboring criteria via the
 "neigh_modify"_neigh_modify command.  The safest settings are "delay 0
 every 1 check yes".  Second, it may mean that an atom has moved far
 outside a processor's sub-domain or even the entire simulation box.
 This indicates bad physics, e.g. due to highly overlapping atoms, too
 large a timestep, etc.
 
-U: MSM mesh too small, increasing to 2 points in each direction
-
-The global MSM grid is too small, so the number of grid points has been
-increased
-
 */
diff --git a/src/KSPACE/msm_cg.h b/src/KSPACE/msm_cg.h
index dd413a04e..a1009ce51 100644
--- a/src/KSPACE/msm_cg.h
+++ b/src/KSPACE/msm_cg.h
@@ -1,141 +1,75 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef KSPACE_CLASS
 
 KSpaceStyle(msm/cg,MSMCG)
 
 #else
 
 #ifndef LMP_MSM_CG_H
 #define LMP_MSM_CG_H
 
 #include "msm.h"
 
 namespace LAMMPS_NS {
 
 class MSMCG : public MSM {
  public:
   MSMCG(class LAMMPS *, int, char **);
   virtual ~MSMCG();
   virtual void compute(int, int);
   virtual double memory_usage();
 
  protected:
   int num_charged;
   int *is_charged;
   double smallq;
 
  protected:
   virtual void particle_map();
   virtual void make_rho();
   virtual void fieldforce();
   virtual void fieldforce_peratom();
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Out of range atoms - cannot compute MSM
 
 One or more atoms are attempting to map their charge to a MSM grid point 
 that is not owned by a processor.  This is likely for one of two
 reasons, both of them bad.  First, it may mean that an atom near the
 boundary of a processor's sub-domain has moved more than 1/2 the
 "neighbor skin distance"_neighbor.html without neighbor lists being
 rebuilt and atoms being migrated to new processors.  This also means
 you may be missing pairwise interactions that need to be computed.
 The solution is to change the re-neighboring criteria via the
 "neigh_modify"_neigh_modify command.  The safest settings are "delay 0
 every 1 check yes".  Second, it may mean that an atom has moved far
 outside a processor's sub-domain or even the entire simulation box.
 This indicates bad physics, e.g. due to highly overlapping atoms, too
 large a timestep, etc.
 
-U: Cannot (yet) use MSM with triclinic box
-
-This feature is not yet supported.
-
-U: Cannot (yet) use MSM with 2d simulation
-
-This feature is not yet supported.
-
-U: Kspace style requires atom attribute q
-
-The atom style defined does not have these attributes.
-
-U: Cannot use slab correction with MSM
-
-Slab correction can only be used with Ewald and PPPM, not MSM.
-
-U: MSM order must be 4, 6, 8, or 10
-
-This is a limitation of the MSM implementation in LAMMPS:
-the MSM order can only be 4, 6, 8, or 10.
-
-U: Cannot (yet) use single precision with MSM (remove -DFFT_SINGLE from Makefile and recompile)
-
-Single precision cannot be used with MSM.
-
-U: KSpace style is incompatible with Pair style
-
-Setting a kspace style requires that a pair style with a long-range
-Coulombic component be selected that is compatible with MSM.  Note
-that TIP4P is not (yet) supported by MSM.
-
-U: Cannot use kspace solver on system with no charge
-
-No atoms in system have a non-zero charge.
-
-U: System is not charge neutral, net charge = %g
-
-The total charge on all atoms on the system is not 0.0, which
-is not valid for MSM.
-
-U: MSM grid is too large
-
-The global MSM grid is larger than OFFSET in one or more dimensions.
-OFFSET is currently set to 16384.  You likely need to decrease the
-requested accuracy.
-
-U: MSM mesh too small, increasing to 2 points in each direction
-
-The global MSM grid is too small, so the number of grid points has been
-increased
-
-U: KSpace accuracy must be > 0
-
-The kspace accuracy designated in the input must be greater than zero.
-
-U: Number of MSM mesh points increased to be a multiple of 2
-
-MSM requires that the number of grid points in each direction be a multiple
-of two and the number of grid points in one or more directions have been
-adjusted to meet this requirement.
-
-U: Adjusting Coulombic cutoff for MSM, new cutoff = %g
-
-The adjust/cutoff command is turned on and the Coulombic cutoff has been
-adjusted to match the user-specified accuracy.
-
 */
diff --git a/src/KSPACE/pair_born_coul_long.h b/src/KSPACE/pair_born_coul_long.h
index f844ec7f9..d8071547a 100644
--- a/src/KSPACE/pair_born_coul_long.h
+++ b/src/KSPACE/pair_born_coul_long.h
@@ -1,90 +1,85 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(born/coul/long,PairBornCoulLong)
 
 #else
 
 #ifndef LMP_PAIR_BORN_COUL_LONG_H
 #define LMP_PAIR_BORN_COUL_LONG_H
 
 #include "pair.h"
 
 namespace LAMMPS_NS {
 
 class PairBornCoulLong : public Pair {
  public:
   PairBornCoulLong(class LAMMPS *);
   virtual ~PairBornCoulLong();
   virtual void compute(int, int);
   void settings(int, char **);
   void coeff(int, char **);
   void init_style();
   double init_one(int, int);
   void write_restart(FILE *);
   void read_restart(FILE *);
   void write_restart_settings(FILE *);
   void read_restart_settings(FILE *);
   virtual double single(int, int, int, int, double, double, double, double &);
   virtual void *extract(const char *, int &);
 
  protected:
   double cut_lj_global;
   double **cut_lj,**cut_ljsq;
   double cut_coul,cut_coulsq;
   double **a,**rho,**sigma,**c,**d;
   double **rhoinv,**born1,**born2,**born3,**offset;
   double *cut_respa;
   double g_ewald;
 
   void allocate();
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Incorrect args for pair coefficients
 
 Self-explanatory.  Check the input script or data file.
 
 E: All pair coeffs are not set
 
 All pair coefficients must be set in the data file or by the
 pair_coeff command before running a simulation.
 
 E: Pair style born/coul/long requires atom attribute q
 
 An atom style that defines this attribute must be used.
 
 E: Pair style requires a KSpace style
 
-UNDOCUMENTED
-
-U: Pair style is incompatible with KSpace style
-
-If a pair style with a long-range Coulombic component is selected,
-then a kspace style must also be used.
+This pair style is designed for use with a KSpace style.
 
 */
diff --git a/src/KSPACE/pair_buck_coul_long.h b/src/KSPACE/pair_buck_coul_long.h
index da120a487..89115da0a 100644
--- a/src/KSPACE/pair_buck_coul_long.h
+++ b/src/KSPACE/pair_buck_coul_long.h
@@ -1,91 +1,86 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(buck/coul/long,PairBuckCoulLong)
 
 #else
 
 #ifndef LMP_PAIR_BUCK_COUL_LONG_H
 #define LMP_PAIR_BUCK_COUL_LONG_H
 
 #include "pair.h"
 
 namespace LAMMPS_NS {
 
 class PairBuckCoulLong : public Pair {
  public:
   PairBuckCoulLong(class LAMMPS *);
   virtual ~PairBuckCoulLong();
   virtual void compute(int, int);
   void settings(int, char **);
   void coeff(int, char **);
   void init_style();
   double init_one(int, int);
   void write_restart(FILE *);
   void read_restart(FILE *);
   void write_restart_settings(FILE *);
   void read_restart_settings(FILE *);
   virtual double single(int, int, int, int, double, double, double, double &);
   virtual void *extract(const char *, int &);
 
  protected:
   double cut_lj_global;
   double **cut_lj,**cut_ljsq;
   double cut_coul,cut_coulsq;
   double **a,**rho,**c;
   double **rhoinv,**buck1,**buck2,**offset;
 
   double *cut_respa;
   double g_ewald;
 
   void allocate();
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Incorrect args for pair coefficients
 
 Self-explanatory.  Check the input script or data file.
 
 E: All pair coeffs are not set
 
 All pair coefficients must be set in the data file or by the
 pair_coeff command before running a simulation.
 
 E: Pair style buck/coul/long requires atom attribute q
 
 The atom style defined does not have these attributes.
 
 E: Pair style requres a KSpace style
 
-UNDOCUMENTED
-
-U: Pair style is incompatible with KSpace style
-
-If a pair style with a long-range Coulombic component is selected,
-then a kspace style must also be used.
+This pair style is designed for use with a KSpace style.
 
 */
diff --git a/src/KSPACE/pair_buck_long_coul_long.cpp b/src/KSPACE/pair_buck_long_coul_long.cpp
index 23b834ce2..b7435dcc6 100644
--- a/src/KSPACE/pair_buck_long_coul_long.cpp
+++ b/src/KSPACE/pair_buck_long_coul_long.cpp
@@ -1,981 +1,980 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Pieter J. in 't Veld (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "math_vector.h"
 #include "pair_buck_long_coul_long.h"
 #include "atom.h"
 #include "comm.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "force.h"
 #include "kspace.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 /* ---------------------------------------------------------------------- */
 
 PairBuckLongCoulLong::PairBuckLongCoulLong(LAMMPS *lmp) : Pair(lmp)
 {
   dispersionflag = ewaldflag = pppmflag = 1;
   respa_enable = 1;
   ftable = NULL;
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::options(char **arg, int order)
 {
   const char *option[] = {"long", "cut", "off", NULL};
   int i;
 
-  if (!*arg) error->all(FLERR,"Illegal pair_style buck/coul command");
+  if (!*arg) error->all(FLERR,"Illegal pair_style buck/long/coul/long command");
   for (i=0; option[i]&&strcmp(arg[0], option[i]); ++i);
   switch (i) {
-    default: error->all(FLERR,"Illegal pair_style buck/coul command");
+    default: error->all(FLERR,"Illegal pair_style buck/long/coul/long command");
     case 0: ewald_order |= 1<<order; break;
     case 2: ewald_off |= 1<<order;
     case 1: break;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::settings(int narg, char **arg)
 {
   if (narg != 3 && narg != 4) error->all(FLERR,"Illegal pair_style command");
 
   ewald_order = 0;
   ewald_off = 0;
 
   options(arg,6);
   options(++arg,1);
 
   if (!comm->me && ewald_order & (1<<6)) 
     error->warning(FLERR,"Geometric mixing assumed for 1/r^6 coefficients");
   if (!comm->me && ewald_order == ((1<<1) | (1<<6))) 
-    error->warning(FLERR,"Using largest cut-off for buck/coul long long");
-  if (!*(++arg)) error->all(FLERR,"Cut-offs missing in pair_style buck/coul");
+    error->warning(FLERR,"Using largest cutoff for buck/long/coul/long");
+  if (!*(++arg)) error->all(FLERR,"Cutoffs missing in pair_style buck/long/coul/long");
   if (ewald_off & (1<<6)) 
-    error->all(FLERR,"LJ6 off not supported in pair_style buck/coul");
+    error->all(FLERR,"LJ6 off not supported in pair_style buck/long/coul/long");
   if (!((ewald_order^ewald_off) & (1<<1))) 
-    error->all(FLERR,"Coulombic cut not supported in pair_style buck/coul");
+    error->all(FLERR,"Coulomb cut not supported in pair_style buck/long/coul/coul");
   cut_buck_global = force->numeric(*(arg++));
   if (*arg && ((ewald_order & 0x42) == 0x42)) 
-    error->all(FLERR,"Only one cut-off allowed when requesting all long");
+    error->all(FLERR,"Only one cutoff allowed when requesting all long");
   if (narg == 4) cut_coul = force->numeric(*arg);
   else cut_coul = cut_buck_global;
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut_buck[i][j] = cut_buck_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairBuckLongCoulLong::~PairBuckLongCoulLong()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut_buck_read);
     memory->destroy(cut_buck);
     memory->destroy(cut_bucksq);
     memory->destroy(buck_a_read);
     memory->destroy(buck_a);
     memory->destroy(buck_c_read);
     memory->destroy(buck_c);
     memory->destroy(buck_rho_read);
     memory->destroy(buck_rho);
     memory->destroy(buck1);
     memory->destroy(buck2);
     memory->destroy(rhoinv);
     memory->destroy(offset);
   }
   if (ftable) free_tables();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut_buck_read,n+1,n+1,"pair:cut_buck_read");
   memory->create(cut_buck,n+1,n+1,"pair:cut_buck");
   memory->create(cut_bucksq,n+1,n+1,"pair:cut_bucksq");
   memory->create(buck_a_read,n+1,n+1,"pair:buck_a_read");
   memory->create(buck_a,n+1,n+1,"pair:buck_a");
   memory->create(buck_c_read,n+1,n+1,"pair:buck_c_read");
   memory->create(buck_c,n+1,n+1,"pair:buck_c");
   memory->create(buck_rho_read,n+1,n+1,"pair:buck_rho_read");
   memory->create(buck_rho,n+1,n+1,"pair:buck_rho");
   memory->create(buck1,n+1,n+1,"pair:buck1");
   memory->create(buck2,n+1,n+1,"pair:buck2");
   memory->create(rhoinv,n+1,n+1,"pair:rhoinv");
   memory->create(offset,n+1,n+1,"pair:offset");
 }
 
 /* ----------------------------------------------------------------------
    extract protected data from object
 ------------------------------------------------------------------------- */
 
 void *PairBuckLongCoulLong::extract(const char *id, int &dim)
 {
   const char *ids[] = {
     "B", "ewald_order", "ewald_cut", "ewald_mix", "cut_coul", "cut_LJ", NULL};
   void *ptrs[] = {
     buck_c, &ewald_order, &cut_coul, &mix_flag, &cut_coul, &cut_buck_global, 
     NULL};
   int i;
 
   for (i=0; ids[i]&&strcmp(ids[i], id); ++i);
   if (i == 0) dim = 2;
   else dim = 0;
   return ptrs[i];
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::coeff(int narg, char **arg)
 {
   if (narg < 5 || narg > 6) 
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(*(arg++),atom->ntypes,ilo,ihi);
   force->bounds(*(arg++),atom->ntypes,jlo,jhi);
 
   double buck_a_one = force->numeric(*(arg++));
   double buck_rho_one = force->numeric(*(arg++));
   double buck_c_one = force->numeric(*(arg++));
 
   double cut_buck_one = cut_buck_global;
   if (narg == 6) cut_buck_one = force->numeric(*(arg++));
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       buck_a_read[i][j] = buck_a_one;
       buck_c_read[i][j] = buck_c_one;
       buck_rho_read[i][j] = buck_rho_one;
       cut_buck_read[i][j] = cut_buck_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::init_style()
 {
   // require an atom style with charge defined
 
   if (!atom->q_flag && (ewald_order&(1<<1)))
-    error->all(FLERR,
-        "Invoking coulombic in pair style lj/coul requires atom attribute q");
+    error->all(FLERR,"Pair style buck/long/coul/long requires atom attribute q");
 
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
   if (update->whichflag == 0 && strstr(update->integrate_style,"respa")) {
     int respa = 0;
     if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
     if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
 
     if (respa == 0) irequest = neighbor->request(this);
     else if (respa == 1) {
       irequest = neighbor->request(this);
       neighbor->requests[irequest]->id = 1;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respainner = 1;
       irequest = neighbor->request(this);
       neighbor->requests[irequest]->id = 3;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respaouter = 1;
     } else {
       irequest = neighbor->request(this);
       neighbor->requests[irequest]->id = 1;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respainner = 1;
       irequest = neighbor->request(this);
       neighbor->requests[irequest]->id = 2;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respamiddle = 1;
       irequest = neighbor->request(this);
       neighbor->requests[irequest]->id = 3;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respaouter = 1;
     }
 
   } else irequest = neighbor->request(this);
 
   cut_coulsq = cut_coul * cut_coul;
 
   // set rRESPA cutoffs
 
   if (strstr(update->integrate_style,"respa") &&
       ((Respa *) update->integrate)->level_inner >= 0)
     cut_respa = ((Respa *) update->integrate)->cutoff;
   else cut_respa = NULL;
 
   // ensure use of KSpace long-range solver, set two g_ewalds
 
   if (force->kspace == NULL)
     error->all(FLERR,"Pair style requires a KSpace style");
   if (ewald_order&(1<<1)) g_ewald = force->kspace->g_ewald;
   if (ewald_order&(1<<6)) g_ewald_6 = force->kspace->g_ewald_6;
 
   // setup force tables
 
   if (ncoultablebits) init_tables(cut_coul,cut_respa);
 }
 
 /* ----------------------------------------------------------------------
    neighbor callback to inform pair style of neighbor list to use
    regular or rRESPA
 ------------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::init_list(int id, NeighList *ptr)
 {
   if (id == 0) list = ptr;
   else if (id == 1) listinner = ptr;
   else if (id == 2) listmiddle = ptr;
   else if (id == 3) listouter = ptr;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairBuckLongCoulLong::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
 
   cut_buck[i][j] = cut_buck_read[i][j];
   buck_a[i][j] = buck_a_read[i][j];
   buck_c[i][j] = buck_c_read[i][j];
   buck_rho[i][j] = buck_rho_read[i][j];
 
   double cut = MAX(cut_buck[i][j],cut_coul);
   cutsq[i][j] = cut*cut;
   cut_bucksq[i][j] = cut_buck[i][j] * cut_buck[i][j];
 
   buck1[i][j] = buck_a[i][j]/buck_rho[i][j];
   buck2[i][j] = 6.0*buck_c[i][j];
   rhoinv[i][j] = 1.0/buck_rho[i][j];
 
   // check interior rRESPA cutoff
 
   if (cut_respa && MIN(cut_buck[i][j],cut_coul) < cut_respa[3])
     error->all(FLERR,"Pair cutoff < Respa interior cutoff");
 
   if (offset_flag) {
     double rexp = exp(-cut_buck[i][j]/buck_rho[i][j]);
     offset[i][j] = buck_a[i][j]*rexp - buck_c[i][j]/pow(cut_buck[i][j],6.0);
   } else offset[i][j] = 0.0;
 
   cutsq[j][i] = cutsq[i][j];
   cut_bucksq[j][i] = cut_bucksq[i][j];
   buck_a[j][i] = buck_a[i][j];
   buck_c[j][i] = buck_c[i][j];
   rhoinv[j][i] = rhoinv[i][j];
   buck1[j][i] = buck1[i][j];
   buck2[j][i] = buck2[i][j];
   offset[j][i] = offset[i][j];
 
   return cut;
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&buck_a_read[i][j],sizeof(double),1,fp);
         fwrite(&buck_rho_read[i][j],sizeof(double),1,fp);
         fwrite(&buck_c_read[i][j],sizeof(double),1,fp);
         fwrite(&cut_buck_read[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
 
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&buck_a_read[i][j],sizeof(double),1,fp);
           fread(&buck_rho_read[i][j],sizeof(double),1,fp);
           fread(&buck_c_read[i][j],sizeof(double),1,fp);
           fread(&cut_buck_read[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&buck_a_read[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&buck_rho_read[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&buck_c_read[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut_buck_read[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_buck_global,sizeof(double),1,fp);
   fwrite(&cut_coul,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&ncoultablebits,sizeof(int),1,fp);
   fwrite(&tabinner,sizeof(double),1,fp);
   fwrite(&ewald_order,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&cut_buck_global,sizeof(double),1,fp);
     fread(&cut_coul,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&ncoultablebits,sizeof(int),1,fp);
     fread(&tabinner,sizeof(double),1,fp);
     fread(&ewald_order,sizeof(int),1,fp);
   }
   MPI_Bcast(&cut_buck_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&ncoultablebits,1,MPI_INT,0,world);
   MPI_Bcast(&tabinner,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&ewald_order,1,MPI_INT,0,world);
 }
 
 /* ----------------------------------------------------------------------
    compute pair interactions
 ------------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::compute(int eflag, int vflag)
 {
   double evdwl,ecoul,fpair;
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x, *x0 = x[0];
   double **f = atom->f, *f0 = f[0], *fi = f0;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   int i, j, order1 = ewald_order&(1<<1), order6 = ewald_order&(1<<6);
   int *ineigh, *ineighn, *jneigh, *jneighn, typei, typej, ni;
   double qi = 0.0, qri = 0.0, *cutsqi, *cut_bucksqi,
          *buck1i, *buck2i, *buckai, *buckci, *rhoinvi, *offseti;
   double r, rsq, r2inv, force_coul, force_buck;
   double g2 = g_ewald_6*g_ewald_6, g6 = g2*g2*g2, g8 = g6*g2;
   vector xi, d;
 
   ineighn = (ineigh = list->ilist)+list->inum;
 
   for (; ineigh<ineighn; ++ineigh) {                        // loop over my atoms
     i = *ineigh; fi = f0+3*i;
     if (order1) qri = (qi = q[i])*qqrd2e;                // initialize constants
     offseti = offset[typei = type[i]];
     buck1i = buck1[typei]; buck2i = buck2[typei];
     buckai = buck_a[typei]; buckci = buck_c[typei], rhoinvi = rhoinv[typei];
     cutsqi = cutsq[typei]; cut_bucksqi = cut_bucksq[typei];
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
     jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i];
 
     for (; jneigh<jneighn; ++jneigh) {                        // loop over neighbors
       j = *jneigh;
       ni = sbmask(j);
       j &= NEIGHMASK;
 
       { register double *xj = x0+(j+(j<<1));
         d[0] = xi[0] - xj[0];                                // pair vector
         d[1] = xi[1] - xj[1];
         d[2] = xi[2] - xj[2]; }
 
       if ((rsq = vec_dot(d, d)) >= cutsqi[typej = type[j]]) continue;
       r2inv = 1.0/rsq;
       r = sqrt(rsq);
 
       if (order1 && (rsq < cut_coulsq)) {                // coulombic
         if (!ncoultablebits || rsq <= tabinnersq) {        // series real space
           register double x = g_ewald*r;
           register double s = qri*q[j], t = 1.0/(1.0+EWALD_P*x);
           if (ni == 0) {
             s *= g_ewald*exp(-x*x);
             force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s;
             if (eflag) ecoul = t;
           }
           else {                                        // special case
             register double f = s*(1.0-special_coul[ni])/r;
             s *= g_ewald*exp(-x*x);
             force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s-f;
             if (eflag) ecoul = t-f;
           }
         }                                                // table real space
         else {
           register union_int_float_t t;
           t.f = rsq;
           register const int k = (t.i & ncoulmask) >> ncoulshiftbits;
           register double f = (rsq-rtable[k])*drtable[k], qiqj = qi*q[j];
           if (ni == 0) {
             force_coul = qiqj*(ftable[k]+f*dftable[k]);
             if (eflag) ecoul = qiqj*(etable[k]+f*detable[k]);
           }
           else {                                        // special case
             t.f = (1.0-special_coul[ni])*(ctable[k]+f*dctable[k]);
             force_coul = qiqj*(ftable[k]+f*dftable[k]-t.f);
             if (eflag) ecoul = qiqj*(etable[k]+f*detable[k]-t.f);
           }
         }
       }
       else force_coul = ecoul = 0.0;
 
       if (rsq < cut_bucksqi[typej]) {                        // buckingham
         register double rn = r2inv*r2inv*r2inv,
                         expr = exp(-r*rhoinvi[typej]);
         if (order6) {                                        // long-range
           register double x2 = g2*rsq, a2 = 1.0/x2;
           x2 = a2*exp(-x2)*buckci[typej];
           if (ni == 0) {
             force_buck =
               r*expr*buck1i[typej]-g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq;
             if (eflag) evdwl = expr*buckai[typej]-g6*((a2+1.0)*a2+0.5)*x2;
           }
           else {                                        // special case
             register double f = special_lj[ni], t = rn*(1.0-f);
             force_buck = f*r*expr*buck1i[typej]-
               g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq+t*buck2i[typej];
             if (eflag) evdwl = f*expr*buckai[typej] -
                          g6*((a2+1.0)*a2+0.5)*x2+t*buckci[typej];
           }
         }
         else {                                                // cut
           if (ni == 0) {
             force_buck = r*expr*buck1i[typej]-rn*buck2i[typej];
             if (eflag) evdwl = expr*buckai[typej] -
                          rn*buckci[typej]-offseti[typej];
           }
           else {                                        // special case
             register double f = special_lj[ni];
             force_buck = f*(r*expr*buck1i[typej]-rn*buck2i[typej]);
             if (eflag)
               evdwl = f*(expr*buckai[typej]-rn*buckci[typej]-offseti[typej]);
           }
         }
       }
       else force_buck = evdwl = 0.0;
 
       fpair = (force_coul+force_buck)*r2inv;
 
       if (newton_pair || j < nlocal) {
         register double *fj = f0+(j+(j<<1)), f;
         fi[0] += f = d[0]*fpair; fj[0] -= f;
         fi[1] += f = d[1]*fpair; fj[1] -= f;
         fi[2] += f = d[2]*fpair; fj[2] -= f;
       }
       else {
         fi[0] += d[0]*fpair;
         fi[1] += d[1]*fpair;
         fi[2] += d[2]*fpair;
       }
 
       if (evflag) ev_tally(i,j,nlocal,newton_pair,
                            evdwl,ecoul,fpair,d[0],d[1],d[2]);
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::compute_inner()
 {
   double r, rsq, r2inv, force_coul = 0.0, force_buck, fpair;
 
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *x0 = atom->x[0], *f0 = atom->f[0], *fi = f0, *q = atom->q;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   double cut_out_on = cut_respa[0];
   double cut_out_off = cut_respa[1];
 
   double cut_out_diff = cut_out_off - cut_out_on;
   double cut_out_on_sq = cut_out_on*cut_out_on;
   double cut_out_off_sq = cut_out_off*cut_out_off;
 
   int *ineigh, *ineighn, *jneigh, *jneighn, typei, typej, ni;
   int i, j, order1 = (ewald_order|(ewald_off^-1))&(1<<1);
   double qri, *cut_bucksqi, *buck1i, *buck2i, *rhoinvi;
   vector xi, d;
 
   ineighn = (ineigh = listinner->ilist)+listinner->inum;
 
   for (; ineigh<ineighn; ++ineigh) {                        // loop over my atoms
     i = *ineigh; fi = f0+3*i;
     qri = qqrd2e*q[i];
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
     cut_bucksqi = cut_bucksq[typei = type[i]];
     buck1i = buck1[typei]; buck2i = buck2[typei]; rhoinvi = rhoinv[typei];
     jneighn = (jneigh = listinner->firstneigh[i])+listinner->numneigh[i];
 
     for (; jneigh<jneighn; ++jneigh) {                        // loop over neighbors
       j = *jneigh;
       ni = sbmask(j);
       j &= NEIGHMASK;
 
       { register double *xj = x0+(j+(j<<1));
         d[0] = xi[0] - xj[0];                                // pair vector
         d[1] = xi[1] - xj[1];
         d[2] = xi[2] - xj[2]; }
 
       if ((rsq = vec_dot(d, d)) >= cut_out_off_sq) continue;
       r2inv = 1.0/rsq;
       r = sqrt(rsq);
 
       if (order1 && (rsq < cut_coulsq))                        // coulombic
         force_coul = ni == 0 ?
           qri*q[j]/r : qri*q[j]/r*special_coul[ni];
 
       if (rsq < cut_bucksqi[typej = type[j]]) {                // buckingham
         register double rn = r2inv*r2inv*r2inv,
                         expr = exp(-r*rhoinvi[typej]);
         force_buck = ni == 0 ?
           (r*expr*buck1i[typej]-rn*buck2i[typej]) :
           (r*expr*buck1i[typej]-rn*buck2i[typej])*special_lj[ni];
       }
       else force_buck = 0.0;
 
       fpair = (force_coul + force_buck) * r2inv;
 
       if (rsq > cut_out_on_sq) {                        // switching
         register double rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff;
         fpair  *= 1.0 + rsw*rsw*(2.0*rsw-3.0);
       }
 
       if (newton_pair || j < nlocal) {                        // force update
         register double *fj = f0+(j+(j<<1)), f;
         fi[0] += f = d[0]*fpair; fj[0] -= f;
         fi[1] += f = d[1]*fpair; fj[1] -= f;
         fi[2] += f = d[2]*fpair; fj[2] -= f;
       }
       else {
         fi[0] += d[0]*fpair;
         fi[1] += d[1]*fpair;
         fi[2] += d[2]*fpair;
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::compute_middle()
 {
   double r, rsq, r2inv, force_coul = 0.0, force_buck, fpair;
 
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *x0 = atom->x[0], *f0 = atom->f[0], *fi = f0, *q = atom->q;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   double cut_in_off = cut_respa[0];
   double cut_in_on = cut_respa[1];
   double cut_out_on = cut_respa[2];
   double cut_out_off = cut_respa[3];
 
   double cut_in_diff = cut_in_on - cut_in_off;
   double cut_out_diff = cut_out_off - cut_out_on;
   double cut_in_off_sq = cut_in_off*cut_in_off;
   double cut_in_on_sq = cut_in_on*cut_in_on;
   double cut_out_on_sq = cut_out_on*cut_out_on;
   double cut_out_off_sq = cut_out_off*cut_out_off;
 
   int *ineigh, *ineighn, *jneigh, *jneighn, typei, typej, ni;
   int i, j, order1 = (ewald_order|(ewald_off^-1))&(1<<1);
   double qri, *cut_bucksqi, *buck1i, *buck2i, *rhoinvi;
   vector xi, d;
 
   ineighn = (ineigh = listmiddle->ilist)+listmiddle->inum;
 
   for (; ineigh<ineighn; ++ineigh) {                        // loop over my atoms
     i = *ineigh; fi = f0+3*i;
     qri = qqrd2e*q[i];
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
     cut_bucksqi = cut_bucksq[typei = type[i]];
     buck1i = buck1[typei]; buck2i = buck2[typei]; rhoinvi = rhoinv[typei];
     jneighn = (jneigh = listmiddle->firstneigh[i])+listmiddle->numneigh[i];
 
     for (; jneigh<jneighn; ++jneigh) {                        // loop over neighbors
       j = *jneigh;
       ni = sbmask(j);
       j &= NEIGHMASK;
 
       { register double *xj = x0+(j+(j<<1));
         d[0] = xi[0] - xj[0];                                // pair vector
         d[1] = xi[1] - xj[1];
         d[2] = xi[2] - xj[2]; }
 
       if ((rsq = vec_dot(d, d)) >= cut_out_off_sq) continue;
       if (rsq <= cut_in_off_sq) continue;
       r2inv = 1.0/rsq;
       r = sqrt(rsq);
 
       if (order1 && (rsq < cut_coulsq))                        // coulombic
         force_coul = ni == 0 ?
           qri*q[j]/r : qri*q[j]/r*special_coul[ni];
 
       if (rsq < cut_bucksqi[typej = type[j]]) {                // buckingham
         register double rn = r2inv*r2inv*r2inv,
                         expr = exp(-r*rhoinvi[typej]);
         force_buck = ni == 0 ?
           (r*expr*buck1i[typej]-rn*buck2i[typej]) :
           (r*expr*buck1i[typej]-rn*buck2i[typej])*special_lj[ni];
       }
       else force_buck = 0.0;
 
       fpair = (force_coul + force_buck) * r2inv;
 
       if (rsq < cut_in_on_sq) {                                // switching
         register double rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff;
         fpair  *= rsw*rsw*(3.0 - 2.0*rsw);
       }
       if (rsq > cut_out_on_sq) {
         register double rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff;
         fpair  *= 1.0 + rsw*rsw*(2.0*rsw-3.0);
       }
 
       if (newton_pair || j < nlocal) {                        // force update
         register double *fj = f0+(j+(j<<1)), f;
         fi[0] += f = d[0]*fpair; fj[0] -= f;
         fi[1] += f = d[1]*fpair; fj[1] -= f;
         fi[2] += f = d[2]*fpair; fj[2] -= f;
       }
       else {
         fi[0] += d[0]*fpair;
         fi[1] += d[1]*fpair;
         fi[2] += d[2]*fpair;
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::compute_outer(int eflag, int vflag)
 {
   double evdwl,ecoul,fpair;
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = 0;
 
   double **x = atom->x, *x0 = x[0];
   double **f = atom->f, *f0 = f[0], *fi = f0;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   int i, j, order1 = ewald_order&(1<<1), order6 = ewald_order&(1<<6);
   int *ineigh, *ineighn, *jneigh, *jneighn, typei, typej, ni, respa_flag;
   double qi = 0.0, qri = 0.0, *cutsqi, *cut_bucksqi,
          *buck1i, *buck2i, *buckai, *buckci, *rhoinvi, *offseti;
   double r, rsq, r2inv, force_coul, force_buck;
   double g2 = g_ewald_6*g_ewald_6, g6 = g2*g2*g2, g8 = g6*g2;
   double respa_buck = 0.0, respa_coul = 0.0, frespa = 0.0;
   vector xi, d;
 
   double cut_in_off = cut_respa[2];
   double cut_in_on = cut_respa[3];
 
   double cut_in_diff = cut_in_on - cut_in_off;
   double cut_in_off_sq = cut_in_off*cut_in_off;
   double cut_in_on_sq = cut_in_on*cut_in_on;
 
   ineighn = (ineigh = listouter->ilist)+listouter->inum;
 
   for (; ineigh<ineighn; ++ineigh) {                        // loop over my atoms
     i = *ineigh; fi = f0+3*i;
     if (order1) qri = (qi = q[i])*qqrd2e;                // initialize constants
     offseti = offset[typei = type[i]];
     buck1i = buck1[typei]; buck2i = buck2[typei];
     buckai = buck_a[typei]; buckci = buck_c[typei]; rhoinvi = rhoinv[typei];
     cutsqi = cutsq[typei]; cut_bucksqi = cut_bucksq[typei];
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
     jneighn = (jneigh = listouter->firstneigh[i])+listouter->numneigh[i];
 
     for (; jneigh<jneighn; ++jneigh) {                        // loop over neighbors
       j = *jneigh;
       ni = sbmask(j);
       j &= NEIGHMASK;
 
       { register double *xj = x0+(j+(j<<1));
         d[0] = xi[0] - xj[0];                                // pair vector
         d[1] = xi[1] - xj[1];
         d[2] = xi[2] - xj[2]; }
 
       if ((rsq = vec_dot(d, d)) >= cutsqi[typej = type[j]]) continue;
       r2inv = 1.0/rsq;
       r = sqrt(rsq);
 
       if ((respa_flag = (rsq>cut_in_off_sq)&&(rsq<cut_in_on_sq))) {
         register double rsw = (r-cut_in_off)/cut_in_diff;
         frespa = rsw*rsw*(3.0-2.0*rsw);
       }
 
       if (order1 && (rsq < cut_coulsq)) {                // coulombic
         if (!ncoultablebits || rsq <= tabinnersq) {        // series real space
           register double s = qri*q[j];
           if (respa_flag)                                // correct for respa
             respa_coul = ni == 0 ? frespa*s/r : frespa*s/r*special_coul[ni];
           register double x = g_ewald*r, t = 1.0/(1.0+EWALD_P*x);
           if (ni == 0) {
             s *= g_ewald*exp(-x*x);
             force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s;
             if (eflag) ecoul = t;
           }
           else {                                        // correct for special
             r = s*(1.0-special_coul[ni])/r; s *= g_ewald*exp(-x*x);
             force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s-r;
             if (eflag) ecoul = t-r;
           }
         }                                                // table real space
         else {
           if (respa_flag) respa_coul = ni == 0 ?        // correct for respa
               frespa*qri*q[j]/r :
               frespa*qri*q[j]/r*special_coul[ni];
           register union_int_float_t t;
           t.f = rsq;
           register const int k = (t.i & ncoulmask) >> ncoulshiftbits;
           register double f = (rsq-rtable[k])*drtable[k], qiqj = qi*q[j];
           if (ni == 0) {
             force_coul = qiqj*(ftable[k]+f*dftable[k]);
             if (eflag) ecoul = qiqj*(etable[k]+f*detable[k]);
           }
           else {                                        // correct for special
             t.f = (1.0-special_coul[ni])*(ctable[k]+f*dctable[k]);
             force_coul = qiqj*(ftable[k]+f*dftable[k]-t.f);
             if (eflag) ecoul = qiqj*(etable[k]+f*detable[k]-t.f);
           }
         }
       }
       else force_coul = respa_coul = ecoul = 0.0;
 
       if (rsq < cut_bucksqi[typej]) {                        // buckingham
         register double rn = r2inv*r2inv*r2inv,
                         expr = exp(-r*rhoinvi[typej]);
         if (respa_flag) respa_buck = ni == 0 ?                 // correct for respa
             frespa*(r*expr*buck1i[typej]-rn*buck2i[typej]) :
             frespa*(r*expr*buck1i[typej]-rn*buck2i[typej])*special_lj[ni];
         if (order6) {                                        // long-range form
           register double x2 = g2*rsq, a2 = 1.0/x2;
           x2 = a2*exp(-x2)*buckci[typej];
           if (ni == 0) {
             force_buck =
               r*expr*buck1i[typej]-g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq;
             if (eflag) evdwl = expr*buckai[typej]-g6*((a2+1.0)*a2+0.5)*x2;
           }
           else {                                        // correct for special
             register double f = special_lj[ni], t = rn*(1.0-f);
             force_buck = f*r*expr*buck1i[typej]-
               g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq+t*buck2i[typej];
             if (eflag) evdwl = f*expr*buckai[typej] -
                          g6*((a2+1.0)*a2+0.5)*x2+t*buckci[typej];
           }
         }
         else {                                                // cut form
           if (ni == 0) {
             force_buck = r*expr*buck1i[typej]-rn*buck2i[typej];
             if (eflag)
               evdwl = expr*buckai[typej]-rn*buckci[typej]-offseti[typej];
           }
           else {                                        // correct for special
             register double f = special_lj[ni];
             force_buck = f*(r*expr*buck1i[typej]-rn*buck2i[typej]);
             if (eflag)
               evdwl = f*(expr*buckai[typej]-rn*buckci[typej]-offseti[typej]);
           }
         }
       }
       else force_buck = respa_buck = evdwl = 0.0;
 
       fpair = (force_coul+force_buck)*r2inv;
       frespa = fpair-(respa_coul+respa_buck)*r2inv;
 
       if (newton_pair || j < nlocal) {
         register double *fj = f0+(j+(j<<1)), f;
         fi[0] += f = d[0]*frespa; fj[0] -= f;
         fi[1] += f = d[1]*frespa; fj[1] -= f;
         fi[2] += f = d[2]*frespa; fj[2] -= f;
       }
       else {
         fi[0] += d[0]*frespa;
         fi[1] += d[1]*frespa;
         fi[2] += d[2]*frespa;
       }
 
       if (evflag) ev_tally(i,j,nlocal,newton_pair,
                            evdwl,ecoul,fpair,d[0],d[1],d[2]);
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairBuckLongCoulLong::single(int i, int j, int itype, int jtype,
                             double rsq, double factor_coul, double factor_buck,
                             double &fforce)
 {
   double f, r, r2inv, r6inv, force_coul, force_buck;
   double g2 = g_ewald_6*g_ewald_6, g6 = g2*g2*g2, g8 = g6*g2, *q = atom->q;
 
   r = sqrt(rsq);
   r2inv = 1.0/rsq;
   double eng = 0.0;
 
   if ((ewald_order&2) && (rsq < cut_coulsq)) {                // coulombic
     if (!ncoultablebits || rsq <= tabinnersq) {                // series real space
       register double x = g_ewald*r;
       register double s = force->qqrd2e*q[i]*q[j], t = 1.0/(1.0+EWALD_P*x);
       f = s*(1.0-factor_coul)/r; s *= g_ewald*exp(-x*x);
       force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s-f;
       eng += t-f;
     }
     else {                                                // table real space
       register union_int_float_t t;
       t.f = rsq;
       register const int k = (t.i & ncoulmask) >> ncoulshiftbits;
       register double f = (rsq-rtable[k])*drtable[k], qiqj = q[i]*q[j];
       t.f = (1.0-factor_coul)*(ctable[k]+f*dctable[k]);
       force_coul = qiqj*(ftable[k]+f*dftable[k]-t.f);
       eng += qiqj*(etable[k]+f*detable[k]-t.f);
     }
   } else force_coul = 0.0;
 
   if (rsq < cut_bucksq[itype][jtype]) {                        // buckingham
     register double expr = factor_buck*exp(-sqrt(rsq)*rhoinv[itype][jtype]);
     r6inv = r2inv*r2inv*r2inv;
     if (ewald_order&64) {                                // long-range
       register double x2 = g2*rsq, a2 = 1.0/x2, t = r6inv*(1.0-factor_buck);
       x2 = a2*exp(-x2)*buck_c[itype][jtype];
       force_buck = buck1[itype][jtype]*r*expr-
                g8*(((6.0*a2+6.0)*a2+3.0)*a2+a2)*x2*rsq+t*buck2[itype][jtype];
       eng += buck_a[itype][jtype]*expr-
         g6*((a2+1.0)*a2+0.5)*x2+t*buck_c[itype][jtype];
     }
     else {                                                // cut
       force_buck =
         buck1[itype][jtype]*r*expr-factor_buck*buck_c[itype][jtype]*r6inv;
       eng += buck_a[itype][jtype]*expr-
         factor_buck*(buck_c[itype][jtype]*r6inv-offset[itype][jtype]);
     }
   } else force_buck = 0.0;
 
   fforce = (force_coul+force_buck)*r2inv;
   return eng;
 }
diff --git a/src/KSPACE/pair_buck_long_coul_long.h b/src/KSPACE/pair_buck_long_coul_long.h
index b421a924c..8d29c2513 100644
--- a/src/KSPACE/pair_buck_long_coul_long.h
+++ b/src/KSPACE/pair_buck_long_coul_long.h
@@ -1,122 +1,126 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(buck/long/coul/long,PairBuckLongCoulLong)
 
 #else
 
 #ifndef LMP_PAIR_BUCK_LONG_COUL_LONG_H
 #define LMP_PAIR_BUCK_LONG_COUL_LONG_H
 
 #include "pair.h"
 
 namespace LAMMPS_NS {
 
 class PairBuckLongCoulLong : public Pair {
  public:
   double cut_coul;
 
   PairBuckLongCoulLong(class LAMMPS *);
   ~PairBuckLongCoulLong();
   virtual void compute(int, int);
 
   virtual void settings(int, char **);
   void coeff(int, char **);
   void init_style();
   void init_list(int, class NeighList *);
   double init_one(int, int);
   void write_restart(FILE *);
   void read_restart(FILE *);
 
   void write_restart_settings(FILE *);
   void read_restart_settings(FILE *);
   double single(int, int, int, int, double, double, double, double &);
   void *extract(const char *, int &);
 
   void compute_inner();
   void compute_middle();
   void compute_outer(int, int);
 
  protected:
   double cut_buck_global;
   double **cut_buck, **cut_buck_read, **cut_bucksq;
   double cut_coulsq;
   double **buck_a_read, **buck_a, **buck_c_read, **buck_c;
   double **buck1, **buck2, **buck_rho_read, **buck_rho, **rhoinv, **offset;
   double *cut_respa;
   double g_ewald;
   double g_ewald_6;
   int ewald_order, ewald_off;
 
   void options(char **arg, int order);
   void allocate();
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
-UNDOCUMENTED
+Self-explanatory.  Check the input script syntax and compare to the
+documentation for the command.  You can use -echo screen as a
+command-line option when running LAMMPS to see the offending line.
 
 W: Geometric mixing assumed for 1/r^6 coefficients
 
-UNDOCUMENTED
+Self-explanatory.
 
-W: Using largest cut-off for buck/coul long long
+W: Using largest cutoff for buck/long/coul/long
 
-UNDOCUMENTED
+Self-exlanatory.
 
-E: Cut-offs missing in pair_style buck/coul
+E: Cutoffs missing in pair_style buck/long/coul/long
 
-UNDOCUMENTED
+Self-exlanatory.
 
-E: LJ6 off not supported in pair_style buck/coul
+E: LJ6 off not supported in pair_style buck/long/coul/long
 
-UNDOCUMENTED
+Self-exlanatory.
 
-E: Coulombic cut not supported in pair_style buck/coul
+E: Coulomb cut not supported in pair_style buck/long/coul/long
 
-UNDOCUMENTED
+Must use long-range Coulombic interactions.
 
-E: Only one cut-off allowed when requesting all long
+E: Only one cutoff allowed when requesting all long
 
-UNDOCUMENTED
+Self-explanatory.
 
 E: Incorrect args for pair coefficients
 
-UNDOCUMENTED
+Self-explanatory.  Check the input script or data file.
 
-E: Invoking coulombic in pair style lj/coul requires atom attribute q
+E: Pair style buck/long/coul/long requires atom attribute q
 
-UNDOCUMENTED
+The atom style defined does not have this attribute.
 
 E: Pair style requires a KSpace style
 
-UNDOCUMENTED
+This pair style is designed for use with a KSpace style.
 
 E: All pair coeffs are not set
 
-UNDOCUMENTED
+All pair coefficients must be set in the data file or by the
+pair_coeff command before running a simulation.
 
 E: Pair cutoff < Respa interior cutoff
 
-UNDOCUMENTED
+One or more pairwise cutoffs are too short to use with the specified
+rRESPA cutoffs.
 
 */
diff --git a/src/KSPACE/pair_coul_long.h b/src/KSPACE/pair_coul_long.h
index 766438b2b..49ddb3a98 100644
--- a/src/KSPACE/pair_coul_long.h
+++ b/src/KSPACE/pair_coul_long.h
@@ -1,87 +1,77 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(coul/long,PairCoulLong)
 
 #else
 
 #ifndef LMP_PAIR_COUL_LONG_H
 #define LMP_PAIR_COUL_LONG_H
 
 #include "pair.h"
 
 namespace LAMMPS_NS {
 
 class PairCoulLong : public Pair {
  public:
   PairCoulLong(class LAMMPS *);
   ~PairCoulLong();
   virtual void compute(int, int);
   virtual void settings(int, char **);
   void coeff(int, char **);
   virtual void init_style();
   double init_one(int, int);
   void write_restart(FILE *);
   void read_restart(FILE *);
   virtual void write_restart_settings(FILE *);
   virtual void read_restart_settings(FILE *);
   virtual double single(int, int, int, int, double, double, double, double &);
   virtual void *extract(const char *, int &);
 
  protected:
   double cut_coul,cut_coulsq;
   double *cut_respa;
   double g_ewald;
   double **scale;
 
   void allocate();
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Incorrect args for pair coefficients
 
 Self-explanatory.  Check the input script or data file.
 
 E: Pair style lj/cut/coul/long requires atom attribute q
 
 The atom style defined does not have this attribute.
 
 E: Pair style requires a KSpace style
 
-UNDOCUMENTED
-
-U: Pair cutoff < Respa interior cutoff
-
-One or more pairwise cutoffs are too short to use with the specified
-rRESPA cutoffs.
-
-U: Pair style is incompatible with KSpace style
-
-If a pair style with a long-range Coulombic component is selected,
-then a kspace style must also be used.
+This pair style is designed for use with a KSpace style.
 
 */
diff --git a/src/KSPACE/pair_lj_charmm_coul_long.h b/src/KSPACE/pair_lj_charmm_coul_long.h
index cf4e50d51..2f4f0c55b 100644
--- a/src/KSPACE/pair_lj_charmm_coul_long.h
+++ b/src/KSPACE/pair_lj_charmm_coul_long.h
@@ -1,109 +1,104 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(lj/charmm/coul/long,PairLJCharmmCoulLong)
 
 #else
 
 #ifndef LMP_PAIR_LJ_CHARMM_COUL_LONG_H
 #define LMP_PAIR_LJ_CHARMM_COUL_LONG_H
 
 #include "pair.h"
 
 namespace LAMMPS_NS {
 
 class PairLJCharmmCoulLong : public Pair {
  public:
   PairLJCharmmCoulLong(class LAMMPS *);
   virtual ~PairLJCharmmCoulLong();
 
   virtual void compute(int, int);
   void settings(int, char **);
   void coeff(int, char **);
   void init_style();
   void init_list(int, class NeighList *);
   double init_one(int, int);
   void write_restart(FILE *);
   void read_restart(FILE *);
   void write_restart_settings(FILE *);
   void read_restart_settings(FILE *);
   virtual double single(int, int, int, int, double, double, double, double &);
 
   void compute_inner();
   void compute_middle();
   virtual void compute_outer(int, int);
   virtual void *extract(const char *, int &);
 
  protected:
   int implicit;
   double cut_lj_inner,cut_lj;
   double cut_lj_innersq,cut_ljsq;
   double cut_coul,cut_coulsq;
   double cut_bothsq;
   double denom_lj;
   double **epsilon,**sigma,**eps14,**sigma14;
   double **lj1,**lj2,**lj3,**lj4,**offset;
   double **lj14_1,**lj14_2,**lj14_3,**lj14_4;
   double *cut_respa;
   double g_ewald;
 
   void allocate();
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Incorrect args for pair coefficients
 
 Self-explanatory.  Check the input script or data file.
 
 E: Pair style lj/charmm/coul/long requires atom attribute q
 
 The atom style defined does not have these attributes.
 
 E: Pair inner cutoff >= Pair outer cutoff
 
 The specified cutoffs for the pair style are inconsistent.
 
 E: Pair cutoff < Respa interior cutoff
 
 One or more pairwise cutoffs are too short to use with the specified
 rRESPA cutoffs.
 
 E: Pair inner cutoff < Respa interior cutoff
 
 One or more pairwise cutoffs are too short to use with the specified
 rRESPA cutoffs.
 
 E: Pair style requires a KSpace style
 
-UNDOCUMENTED
-
-U: Pair style is incompatible with KSpace style
-
-If a pair style with a long-range Coulombic component is selected,
-then a kspace style must also be used.
+This pair style is designed for use with a KSpace style.
 
 */
diff --git a/src/KSPACE/pair_lj_cut_coul_long.h b/src/KSPACE/pair_lj_cut_coul_long.h
index 87b809917..9676b52d1 100644
--- a/src/KSPACE/pair_lj_cut_coul_long.h
+++ b/src/KSPACE/pair_lj_cut_coul_long.h
@@ -1,97 +1,92 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(lj/cut/coul/long,PairLJCutCoulLong)
 
 #else
 
 #ifndef LMP_PAIR_LJ_CUT_COUL_LONG_H
 #define LMP_PAIR_LJ_CUT_COUL_LONG_H
 
 #include "pair.h"
 
 namespace LAMMPS_NS {
 
 class PairLJCutCoulLong : public Pair {
 
  public:
   PairLJCutCoulLong(class LAMMPS *);
   virtual ~PairLJCutCoulLong();
   virtual void compute(int, int);
   virtual void settings(int, char **);
   void coeff(int, char **);
   virtual void init_style();
   void init_list(int, class NeighList *);
   virtual double init_one(int, int);
   void write_restart(FILE *);
   void read_restart(FILE *);
   virtual void write_restart_settings(FILE *);
   virtual void read_restart_settings(FILE *);
   virtual double single(int, int, int, int, double, double, double, double &);
 
   void compute_inner();
   void compute_middle();
   virtual void compute_outer(int, int);
   virtual void *extract(const char *, int &);
 
  protected:
   double cut_lj_global;
   double **cut_lj,**cut_ljsq;
   double cut_coul,cut_coulsq;
   double **epsilon,**sigma;
   double **lj1,**lj2,**lj3,**lj4,**offset;
   double *cut_respa;
   double qdist;             // TIP4P distance from O site to negative charge
   double g_ewald;
 
   void allocate();
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Incorrect args for pair coefficients
 
 Self-explanatory.  Check the input script or data file.
 
 E: Pair style lj/cut/coul/long requires atom attribute q
 
 The atom style defined does not have this attribute.
 
 E: Pair style requires a KSpace style
 
-UNDOCUMENTED
+This pair style is designed for use with a KSpace style.
 
 E: Pair cutoff < Respa interior cutoff
 
 One or more pairwise cutoffs are too short to use with the specified
 rRESPA cutoffs.
 
-U: Pair style is incompatible with KSpace style
-
-If a pair style with a long-range Coulombic component is selected,
-then a kspace style must also be used.
-
 */
diff --git a/src/KSPACE/pair_lj_cut_tip4p_long.h b/src/KSPACE/pair_lj_cut_tip4p_long.h
index 5bb35d71a..5d0bfed4e 100644
--- a/src/KSPACE/pair_lj_cut_tip4p_long.h
+++ b/src/KSPACE/pair_lj_cut_tip4p_long.h
@@ -1,110 +1,105 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(lj/cut/tip4p/long,PairLJCutTIP4PLong)
 
 #else
 
 #ifndef LMP_PAIR_LJ_CUT_TIP4P_LONG_H
 #define LMP_PAIR_LJ_CUT_TIP4P_LONG_H
 
 #include "pair_lj_cut_coul_long.h"
 
 namespace LAMMPS_NS {
 
 class PairLJCutTIP4PLong : public PairLJCutCoulLong {
  public:
   PairLJCutTIP4PLong(class LAMMPS *);
   ~PairLJCutTIP4PLong();
   virtual void compute(int, int);
   void settings(int, char **);
   void init_style();
   double init_one(int, int);
   void write_restart_settings(FILE *fp);
   void read_restart_settings(FILE *fp);
   void *extract(const char *, int &);
   double memory_usage();
 
  protected:
   int typeH,typeO;             // atom types of TIP4P water H and O atoms
   int typeA,typeB;             // angle and bond types of TIP4P water
   double alpha;                // geometric constraint parameter for TIP4P
 
   int nmax;                    // info on off-oxygen charge sites
   int **hneigh;                // 0,1 = indices of 2 H associated with O
                                // 2 = 0 if site loc not yet computed, 1 if yes
   double **newsite;            // locations of charge sites
 
   void compute_newsite(double *, double *, double *, double *);
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: TIP4P hydrogen is missing
 
 The TIP4P pairwise computation failed to find the correct H atom
 within a water molecule.
 
 E: TIP4P hydrogen has incorrect atom type
 
 The TIP4P pairwise computation found an H atom whose type does not
 agree with the specified H type.
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Pair style lj/cut/coul/long/tip4p requires atom IDs
 
 There are no atom IDs defined in the system and the TIP4P potential
 requires them to find O,H atoms with a water molecule.
 
 E: Pair style lj/cut/coul/long/tip4p requires newton pair on
 
 This is because the computation of constraint forces within a water
 molecule adds forces to atoms owned by other processors.
 
 E: Pair style lj/cut/coul/long/tip4p requires atom attribute q
 
 The atom style defined does not have these attributes.
 
 E: Must use a bond style with TIP4P potential
 
 TIP4P potentials assume bond lengths in water are constrained
 by a fix shake command.
 
 E: Must use an angle style with TIP4P potential
-
+d
 TIP4P potentials assume angles in water are constrained by a fix shake
 command.
 
 E: Water H epsilon must be 0.0 for pair style lj/cut/coul/long/tip4p
 
 This is because LAMMPS does not compute the Lennard-Jones interactions
 with these particles for efficiency reasons.
 
-U: Pair style is incompatible with KSpace style
-
-If a pair style with a long-range Coulombic component is selected,
-then a kspace style must also be used.
-
 */
diff --git a/src/KSPACE/pair_lj_long_coul_long.cpp b/src/KSPACE/pair_lj_long_coul_long.cpp
index 557c3c563..87f56fbc7 100644
--- a/src/KSPACE/pair_lj_long_coul_long.cpp
+++ b/src/KSPACE/pair_lj_long_coul_long.cpp
@@ -1,980 +1,979 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Pieter J. in 't Veld (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "math_vector.h"
 #include "pair_lj_long_coul_long.h"
 #include "atom.h"
 #include "comm.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "force.h"
 #include "kspace.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 /* ---------------------------------------------------------------------- */
 
 PairLJLongCoulLong::PairLJLongCoulLong(LAMMPS *lmp) : Pair(lmp)
 {
   dispersionflag = ewaldflag = pppmflag = 1;
   respa_enable = 1;
   ftable = NULL;
   qdist = 0.0;
 }
  
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairLJLongCoulLong::options(char **arg, int order)
 {
   const char *option[] = {"long", "cut", "off", NULL};
   int i;
 
   if (!*arg) error->all(FLERR,"Illegal pair_style lj/long/coul/long command");
   for (i=0; option[i]&&strcmp(arg[0], option[i]); ++i);
   switch (i) {
     default: error->all(FLERR,"Illegal pair_style lj/long/coul/long command");
     case 0: ewald_order |= 1<<order; break;
     case 2: ewald_off |= 1<<order;
     case 1: break;
   }
 }
 
 void PairLJLongCoulLong::settings(int narg, char **arg)
 {
   if (narg != 3 && narg != 4) error->all(FLERR,"Illegal pair_style command");
 
   ewald_off = 0;
   ewald_order = 0;
   options(arg, 6);
   options(++arg, 1);
   if (!comm->me && ewald_order & (1<<6)) 
-    error->warning(FLERR,"Mixing forced for lj coefficients");
+    error->warning(FLERR,"Mixing forced for LJ coefficients");
   if (!comm->me && ewald_order == ((1<<1) | (1<<6)))
-    error->warning(FLERR,"Using largest cut-off for lj/coul long long");
+    error->warning(FLERR,"Using largest cutoff for pair_style lj/long/coullong");
   if (!*(++arg)) 
-    error->all(FLERR,"Cut-offs missing in pair_style lj/coul");
+    error->all(FLERR,"Cutoffs missing in pair_style lj/long/coul/long");
   if (!((ewald_order^ewald_off) & (1<<1))) 
-    error->all(FLERR,"Coulombic cut not supported in pair_style lj/coul");
+    error->all(FLERR,"Coulomb cut not supported in pair_style lj/long/coul/long");
   cut_lj_global = force->numeric(*(arg++));
   if (*arg && ((ewald_order & 0x42) == 0x42)) 
-    error->all(FLERR,"Only one cut-off allowed when requesting all long");
+    error->all(FLERR,"Only one cutoff allowed when requesting all long");
   if (narg == 4) cut_coul = force->numeric(*arg);
   else cut_coul = cut_lj_global;
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut_lj[i][j] = cut_lj_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairLJLongCoulLong::~PairLJLongCoulLong()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut_lj_read);
     memory->destroy(cut_lj);
     memory->destroy(cut_ljsq);
     memory->destroy(epsilon_read);
     memory->destroy(epsilon);
     memory->destroy(sigma_read);
     memory->destroy(sigma);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(lj4);
     memory->destroy(offset);
   }
   if (ftable) free_tables();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairLJLongCoulLong::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut_lj_read,n+1,n+1,"pair:cut_lj_read");
   memory->create(cut_lj,n+1,n+1,"pair:cut_lj");
   memory->create(cut_ljsq,n+1,n+1,"pair:cut_ljsq");
   memory->create(epsilon_read,n+1,n+1,"pair:epsilon_read");
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma_read,n+1,n+1,"pair:sigma_read");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(lj1,n+1,n+1,"pair:lj1");
   memory->create(lj2,n+1,n+1,"pair:lj2");
   memory->create(lj3,n+1,n+1,"pair:lj3");
   memory->create(lj4,n+1,n+1,"pair:lj4");
   memory->create(offset,n+1,n+1,"pair:offset");
 }
 
 /* ----------------------------------------------------------------------
    extract protected data from object
 ------------------------------------------------------------------------- */
 
 void *PairLJLongCoulLong::extract(const char *id, int &dim)
 {
   const char *ids[] = {
     "B", "sigma", "epsilon", "ewald_order", "ewald_cut", "ewald_mix",
     "cut_coul", "cut_LJ", NULL};
   void *ptrs[] = {
     lj4, sigma, epsilon, &ewald_order, &cut_coul, &mix_flag,
     &cut_coul, &cut_lj_global, NULL};
   int i;
 
   for (i=0; ids[i]&&strcmp(ids[i], id); ++i);
   if (i <= 2) dim = 2;
   else dim = 0;
   return ptrs[i];
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairLJLongCoulLong::coeff(int narg, char **arg)
 {
   if (narg < 4 || narg > 5) error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double epsilon_one = force->numeric(arg[2]);
   double sigma_one = force->numeric(arg[3]);
 
   double cut_lj_one = cut_lj_global;
   if (narg == 5) cut_lj_one = force->numeric(arg[4]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       epsilon_read[i][j] = epsilon_one;
       sigma_read[i][j] = sigma_one;
       cut_lj_read[i][j] = cut_lj_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJLongCoulLong::init_style()
 {
   const char *style1[] = 
     {"ewald", "ewald/n", "pppm", "pppm_disp", "pppm_disp/tip4p", NULL};
   const char *style6[] = {"ewald/n", "pppm_disp", "pppm_disp/tip4p", NULL};
   int i;
 
   // require an atom style with charge defined
 
   if (!atom->q_flag && (ewald_order&(1<<1)))
-    error->all(FLERR,
-        "Invoking coulombic in pair style lj/coul requires atom attribute q");
+    error->all(FLERR,"Pair style lj/long/coul/long requires atom attribute q");
 
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
   if (update->whichflag == 0 && strstr(update->integrate_style,"respa")) {
     int respa = 0;
     if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
     if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
 
     if (respa == 0) irequest = neighbor->request(this);
     else if (respa == 1) {
       irequest = neighbor->request(this);
       neighbor->requests[irequest]->id = 1;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respainner = 1;
       irequest = neighbor->request(this);
       neighbor->requests[irequest]->id = 3;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respaouter = 1;
     } else {
       irequest = neighbor->request(this);
       neighbor->requests[irequest]->id = 1;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respainner = 1;
       irequest = neighbor->request(this);
       neighbor->requests[irequest]->id = 2;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respamiddle = 1;
       irequest = neighbor->request(this);
       neighbor->requests[irequest]->id = 3;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respaouter = 1;
     }
 
   } else irequest = neighbor->request(this);
 
   cut_coulsq = cut_coul * cut_coul;
 
   // set rRESPA cutoffs
 
   if (strstr(update->integrate_style,"respa") &&
       ((Respa *) update->integrate)->level_inner >= 0)
     cut_respa = ((Respa *) update->integrate)->cutoff;
   else cut_respa = NULL;
 
   // ensure use of KSpace long-range solver, set g_ewald
 
   if (force->kspace == NULL)
     error->all(FLERR,"Pair style requires a KSpace style");
   if (force->kspace) g_ewald = force->kspace->g_ewald;
   if (force->kspace) g_ewald_6 = force->kspace->g_ewald_6;
 
   // setup force tables
 
   if (ncoultablebits) init_tables(cut_coul,cut_respa);
 }
 
 /* ----------------------------------------------------------------------
    neighbor callback to inform pair style of neighbor list to use
    regular or rRESPA
 ------------------------------------------------------------------------- */
 
 void PairLJLongCoulLong::init_list(int id, NeighList *ptr)
 {
   if (id == 0) list = ptr;
   else if (id == 1) listinner = ptr;
   else if (id == 2) listmiddle = ptr;
   else if (id == 3) listouter = ptr;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairLJLongCoulLong::init_one(int i, int j)
 {
   if ((ewald_order&(1<<6))||(setflag[i][j] == 0)) {
     epsilon[i][j] = mix_energy(epsilon_read[i][i],epsilon_read[j][j],
                                sigma_read[i][i],sigma_read[j][j]);
     sigma[i][j] = mix_distance(sigma_read[i][i],sigma_read[j][j]);
     if (ewald_order&(1<<6))
       cut_lj[i][j] = cut_lj_global;
     else
       cut_lj[i][j] = mix_distance(cut_lj_read[i][i],cut_lj_read[j][j]);
   }
   else {
     sigma[i][j] = sigma_read[i][j];
     epsilon[i][j] = epsilon_read[i][j];
     cut_lj[i][j] = cut_lj_read[i][j];
   }
 
   double cut = MAX(cut_lj[i][j], cut_coul + 2.0*qdist);
   cutsq[i][j] = cut*cut;
   cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
 
   lj1[i][j] = 48.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj2[i][j] = 24.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
   lj3[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj4[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
 
   // check interior rRESPA cutoff
 
   if (cut_respa && MIN(cut_lj[i][j],cut_coul) < cut_respa[3])
     error->all(FLERR,"Pair cutoff < Respa interior cutoff");
 
   if (offset_flag) {
     double ratio = sigma[i][j] / cut_lj[i][j];
     offset[i][j] = 4.0 * epsilon[i][j] * (pow(ratio,12.0) - pow(ratio,6.0));
   } else offset[i][j] = 0.0;
 
   cutsq[j][i] = cutsq[i][j];
   cut_ljsq[j][i] = cut_ljsq[i][j];
   lj1[j][i] = lj1[i][j];
   lj2[j][i] = lj2[i][j];
   lj3[j][i] = lj3[i][j];
   lj4[j][i] = lj4[i][j];
   offset[j][i] = offset[i][j];
 
   return cut;
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJLongCoulLong::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&epsilon_read[i][j],sizeof(double),1,fp);
         fwrite(&sigma_read[i][j],sizeof(double),1,fp);
         fwrite(&cut_lj_read[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJLongCoulLong::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
 
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&epsilon_read[i][j],sizeof(double),1,fp);
           fread(&sigma_read[i][j],sizeof(double),1,fp);
           fread(&cut_lj_read[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&epsilon_read[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma_read[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut_lj_read[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJLongCoulLong::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_lj_global,sizeof(double),1,fp);
   fwrite(&cut_coul,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&ncoultablebits,sizeof(int),1,fp);
   fwrite(&tabinner,sizeof(double),1,fp);
   fwrite(&ewald_order,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJLongCoulLong::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&cut_lj_global,sizeof(double),1,fp);
     fread(&cut_coul,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&ncoultablebits,sizeof(int),1,fp);
     fread(&tabinner,sizeof(double),1,fp);
     fread(&ewald_order,sizeof(int),1,fp);
   }
   MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&ncoultablebits,1,MPI_INT,0,world);
   MPI_Bcast(&tabinner,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&ewald_order,1,MPI_INT,0,world);
 }
 
 /* ----------------------------------------------------------------------
    compute pair interactions
 ------------------------------------------------------------------------- */
 
 void PairLJLongCoulLong::compute(int eflag, int vflag)
 {
   double evdwl,ecoul,fpair;
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x, *x0 = x[0];
   double **f = atom->f, *f0 = f[0], *fi = f0;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   int i, j, order1 = ewald_order&(1<<1), order6 = ewald_order&(1<<6);
   int *ineigh, *ineighn, *jneigh, *jneighn, typei, typej, ni;
   double qi = 0.0, qri = 0.0;
   double *cutsqi, *cut_ljsqi, *lj1i, *lj2i, *lj3i, *lj4i, *offseti;
   double rsq, r2inv, force_coul, force_lj;
   double g2 = g_ewald_6*g_ewald_6, g6 = g2*g2*g2, g8 = g6*g2;
   vector xi, d;
 
   ineighn = (ineigh = list->ilist)+list->inum;
 
   for (; ineigh<ineighn; ++ineigh) {                        // loop over my atoms
     i = *ineigh; fi = f0+3*i;
     if (order1) qri = (qi = q[i])*qqrd2e;                // initialize constants
     offseti = offset[typei = type[i]];
     lj1i = lj1[typei]; lj2i = lj2[typei]; lj3i = lj3[typei]; lj4i = lj4[typei];
     cutsqi = cutsq[typei]; cut_ljsqi = cut_ljsq[typei];
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
     jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i];
 
     for (; jneigh<jneighn; ++jneigh) {                        // loop over neighbors
       j = *jneigh;
       ni = sbmask(j);
       j &= NEIGHMASK;
 
       { register double *xj = x0+(j+(j<<1));
         d[0] = xi[0] - xj[0];                                // pair vector
         d[1] = xi[1] - xj[1];
         d[2] = xi[2] - xj[2]; }
 
       if ((rsq = vec_dot(d, d)) >= cutsqi[typej = type[j]]) continue;
       r2inv = 1.0/rsq;
 
       if (order1 && (rsq < cut_coulsq)) {                // coulombic
         if (!ncoultablebits || rsq <= tabinnersq) {        // series real space
           register double r = sqrt(rsq), x = g_ewald*r;
           register double s = qri*q[j], t = 1.0/(1.0+EWALD_P*x);
           if (ni == 0) {
             s *= g_ewald*exp(-x*x);
             force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s;
             if (eflag) ecoul = t;
           }
           else {                                        // special case
             r = s*(1.0-special_coul[ni])/r; s *= g_ewald*exp(-x*x);
             force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s-r;
             if (eflag) ecoul = t-r;
           }
         }                                                // table real space
         else {
           register union_int_float_t t;
           t.f = rsq;
           register const int k = (t.i & ncoulmask)>>ncoulshiftbits;
           register double f = (rsq-rtable[k])*drtable[k], qiqj = qi*q[j];
           if (ni == 0) {
             force_coul = qiqj*(ftable[k]+f*dftable[k]);
             if (eflag) ecoul = qiqj*(etable[k]+f*detable[k]);
           }
           else {                                        // special case
             t.f = (1.0-special_coul[ni])*(ctable[k]+f*dctable[k]);
             force_coul = qiqj*(ftable[k]+f*dftable[k]-t.f);
             if (eflag) ecoul = qiqj*(etable[k]+f*detable[k]-t.f);
           }
         }
       }
       else force_coul = ecoul = 0.0;
 
       if (rsq < cut_ljsqi[typej]) {                        // lj
                if (order6) {                                        // long-range lj
           register double rn = r2inv*r2inv*r2inv;
           register double x2 = g2*rsq, a2 = 1.0/x2;
           x2 = a2*exp(-x2)*lj4i[typej];
           if (ni == 0) {
             force_lj =
               (rn*=rn)*lj1i[typej]-g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq;
             if (eflag)
               evdwl = rn*lj3i[typej]-g6*((a2+1.0)*a2+0.5)*x2;
           }
           else {                                        // special case
             register double f = special_lj[ni], t = rn*(1.0-f);
             force_lj = f*(rn *= rn)*lj1i[typej]-
               g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq+t*lj2i[typej];
             if (eflag)
               evdwl = f*rn*lj3i[typej]-g6*((a2+1.0)*a2+0.5)*x2+t*lj4i[typej];
           }
         }
         else {                                                // cut lj
           register double rn = r2inv*r2inv*r2inv;
           if (ni == 0) {
             force_lj = rn*(rn*lj1i[typej]-lj2i[typej]);
             if (eflag) evdwl = rn*(rn*lj3i[typej]-lj4i[typej])-offseti[typej];
           }
           else {                                        // special case
             register double f = special_lj[ni];
             force_lj = f*rn*(rn*lj1i[typej]-lj2i[typej]);
             if (eflag)
               evdwl = f * (rn*(rn*lj3i[typej]-lj4i[typej])-offseti[typej]);
           }
         }
       }
       else force_lj = evdwl = 0.0;
 
       fpair = (force_coul+force_lj)*r2inv;
 
       if (newton_pair || j < nlocal) {
         register double *fj = f0+(j+(j<<1)), f;
         fi[0] += f = d[0]*fpair; fj[0] -= f;
         fi[1] += f = d[1]*fpair; fj[1] -= f;
         fi[2] += f = d[2]*fpair; fj[2] -= f;
       }
       else {
         fi[0] += d[0]*fpair;
         fi[1] += d[1]*fpair;
         fi[2] += d[2]*fpair;
       }
 
       if (evflag) ev_tally(i,j,nlocal,newton_pair,
                            evdwl,ecoul,fpair,d[0],d[1],d[2]);
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJLongCoulLong::compute_inner()
 {
   double rsq, r2inv, force_coul = 0.0, force_lj, fpair;
 
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *x0 = atom->x[0], *f0 = atom->f[0], *fi = f0, *q = atom->q;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
 
   double cut_out_on = cut_respa[0];
   double cut_out_off = cut_respa[1];
 
 
   double cut_out_diff = cut_out_off - cut_out_on;
   double cut_out_on_sq = cut_out_on*cut_out_on;
   double cut_out_off_sq = cut_out_off*cut_out_off;
 
   int *ineigh, *ineighn, *jneigh, *jneighn, typei, typej, ni;
   int i, j, order1 = (ewald_order|(ewald_off^-1))&(1<<1);
   double qri, *cut_ljsqi, *lj1i, *lj2i;
   vector xi, d;
 
   ineighn = (ineigh = list->ilist)+list->inum;
   for (; ineigh<ineighn; ++ineigh) {                        // loop over my atoms
     i = *ineigh; fi = f0+3*i;
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
     cut_ljsqi = cut_ljsq[typei = type[i]];
     lj1i = lj1[typei]; lj2i = lj2[typei];
     jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i];
     for (; jneigh<jneighn; ++jneigh) {                        // loop over neighbors
       j = *jneigh;
       ni = sbmask(j);
       j &= NEIGHMASK;
 
       { register double *xj = x0+(j+(j<<1));
         d[0] = xi[0] - xj[0];                                // pair vector
         d[1] = xi[1] - xj[1];
         d[2] = xi[2] - xj[2]; }
 
       if ((rsq = vec_dot(d, d)) >= cut_out_off_sq) continue;
       r2inv = 1.0/rsq;
 
       if (order1 && (rsq < cut_coulsq)) {                       // coulombic
         qri = qqrd2e*q[i];
         force_coul = ni == 0 ?
           qri*q[j]*sqrt(r2inv) : qri*q[j]*sqrt(r2inv)*special_coul[ni];
       }
 
       if (rsq < cut_ljsqi[typej = type[j]]) {                // lennard-jones
         register double rn = r2inv*r2inv*r2inv;
         force_lj = ni == 0 ?
           rn*(rn*lj1i[typej]-lj2i[typej]) :
           rn*(rn*lj1i[typej]-lj2i[typej])*special_lj[ni];
       }
       else force_lj = 0.0;
 
       fpair = (force_coul + force_lj) * r2inv;
 
       if (rsq > cut_out_on_sq) {                        // switching
         register double rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff;
         fpair  *= 1.0 + rsw*rsw*(2.0*rsw-3.0);
       }
 
       if (newton_pair || j < nlocal) {                        // force update
         register double *fj = f0+(j+(j<<1)), f;
         fi[0] += f = d[0]*fpair; fj[0] -= f;
         fi[1] += f = d[1]*fpair; fj[1] -= f;
         fi[2] += f = d[2]*fpair; fj[2] -= f;
       }
       else {
         fi[0] += d[0]*fpair;
         fi[1] += d[1]*fpair;
         fi[2] += d[2]*fpair;
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJLongCoulLong::compute_middle()
 {
   double rsq, r2inv, force_coul = 0.0, force_lj, fpair;
 
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *x0 = atom->x[0], *f0 = atom->f[0], *fi = f0, *q = atom->q;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   double cut_in_off = cut_respa[0];
   double cut_in_on = cut_respa[1];
   double cut_out_on = cut_respa[2];
   double cut_out_off = cut_respa[3];
 
   double cut_in_diff = cut_in_on - cut_in_off;
   double cut_out_diff = cut_out_off - cut_out_on;
   double cut_in_off_sq = cut_in_off*cut_in_off;
   double cut_in_on_sq = cut_in_on*cut_in_on;
   double cut_out_on_sq = cut_out_on*cut_out_on;
   double cut_out_off_sq = cut_out_off*cut_out_off;
 
   int *ineigh, *ineighn, *jneigh, *jneighn, typei, typej, ni;
   int i, j, order1 = (ewald_order|(ewald_off^-1))&(1<<1);
   double qri, *cut_ljsqi, *lj1i, *lj2i;
   vector xi, d;
 
   ineighn = (ineigh = list->ilist)+list->inum;
 
   for (; ineigh<ineighn; ++ineigh) {                        // loop over my atoms
     i = *ineigh; fi = f0+3*i;
     qri = qqrd2e*q[i];
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
     cut_ljsqi = cut_ljsq[typei = type[i]];
     lj1i = lj1[typei]; lj2i = lj2[typei];
     jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i];
 
     for (; jneigh<jneighn; ++jneigh) {
       j = *jneigh;
       ni = sbmask(j);
       j &= NEIGHMASK;
 
       { register double *xj = x0+(j+(j<<1));
         d[0] = xi[0] - xj[0];                                // pair vector
         d[1] = xi[1] - xj[1];
         d[2] = xi[2] - xj[2]; }
 
       if ((rsq = vec_dot(d, d)) >= cut_out_off_sq) continue;
       if (rsq <= cut_in_off_sq) continue;
       r2inv = 1.0/rsq;
 
       if (order1 && (rsq < cut_coulsq))                        // coulombic
         force_coul = ni == 0 ?
           qri*q[j]*sqrt(r2inv) : qri*q[j]*sqrt(r2inv)*special_coul[ni];
 
       if (rsq < cut_ljsqi[typej = type[j]]) {                // lennard-jones
         register double rn = r2inv*r2inv*r2inv;
         force_lj = ni == 0 ?
           rn*(rn*lj1i[typej]-lj2i[typej]) :
           rn*(rn*lj1i[typej]-lj2i[typej])*special_lj[ni];
       }
       else force_lj = 0.0;
 
       fpair = (force_coul + force_lj) * r2inv;
 
       if (rsq < cut_in_on_sq) {                                // switching
         register double rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff;
         fpair  *= rsw*rsw*(3.0 - 2.0*rsw);
       }
       if (rsq > cut_out_on_sq) {
         register double rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff;
         fpair  *= 1.0 + rsw*rsw*(2.0*rsw-3.0);
       }
 
       if (newton_pair || j < nlocal) {                        // force update
         register double *fj = f0+(j+(j<<1)), f;
         fi[0] += f = d[0]*fpair; fj[0] -= f;
         fi[1] += f = d[1]*fpair; fj[1] -= f;
         fi[2] += f = d[2]*fpair; fj[2] -= f;
       }
       else {
         fi[0] += d[0]*fpair;
         fi[1] += d[1]*fpair;
         fi[2] += d[2]*fpair;
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJLongCoulLong::compute_outer(int eflag, int vflag)
 {
   double evdwl,ecoul,fvirial,fpair;
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = 0;
 
   double **x = atom->x, *x0 = x[0];
   double **f = atom->f, *f0 = f[0], *fi = f0;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   int i, j, order1 = ewald_order&(1<<1), order6 = ewald_order&(1<<6);
   int *ineigh, *ineighn, *jneigh, *jneighn, typei, typej, ni, respa_flag;
   double qi = 0.0, qri = 0.0;
   double *cutsqi, *cut_ljsqi, *lj1i, *lj2i, *lj3i, *lj4i, *offseti;
   double rsq, r2inv, force_coul, force_lj;
   double g2 = g_ewald_6*g_ewald_6, g6 = g2*g2*g2, g8 = g6*g2;
   double respa_lj = 0.0, respa_coul = 0.0, frespa = 0.0;
   vector xi, d;
 
   double cut_in_off = cut_respa[2];
   double cut_in_on = cut_respa[3];
 
   double cut_in_diff = cut_in_on - cut_in_off;
   double cut_in_off_sq = cut_in_off*cut_in_off;
   double cut_in_on_sq = cut_in_on*cut_in_on;
 
   ineighn = (ineigh = list->ilist)+list->inum;
 
   for (; ineigh<ineighn; ++ineigh) {                        // loop over my atoms
     i = *ineigh; fi = f0+3*i;
     if (order1) qri = (qi = q[i])*qqrd2e;                // initialize constants
     offseti = offset[typei = type[i]];
     lj1i = lj1[typei]; lj2i = lj2[typei]; lj3i = lj3[typei]; lj4i = lj4[typei];
     cutsqi = cutsq[typei]; cut_ljsqi = cut_ljsq[typei];
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
     jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i];
 
     for (; jneigh<jneighn; ++jneigh) {                        // loop over neighbors
       j = *jneigh;
       ni = sbmask(j);
       j &= NEIGHMASK;
 
       { register double *xj = x0+(j+(j<<1));
         d[0] = xi[0] - xj[0];                                // pair vector
         d[1] = xi[1] - xj[1];
         d[2] = xi[2] - xj[2]; }
 
       if ((rsq = vec_dot(d, d)) >= cutsqi[typej = type[j]]) continue;
       r2inv = 1.0/rsq;
 
       frespa = 1.0;                                       // check whether and how to compute respa corrections
       respa_coul = 0;
       respa_lj = 0;
       respa_flag = rsq < cut_in_on_sq ? 1 : 0;
       if (respa_flag && (rsq > cut_in_off_sq)) {
         register double rsw = (sqrt(rsq)-cut_in_off)/cut_in_diff;
         frespa = 1-rsw*rsw*(3.0-2.0*rsw);
       }
 
       if (order1 && (rsq < cut_coulsq)) {                // coulombic
         if (!ncoultablebits || rsq <= tabinnersq) {        // series real space
           register double r = sqrt(rsq), s = qri*q[j];
           if (respa_flag)                                // correct for respa
             respa_coul = ni == 0 ? frespa*s/r : frespa*s/r*special_coul[ni];
           register double x = g_ewald*r, t = 1.0/(1.0+EWALD_P*x);
           if (ni == 0) {
             s *= g_ewald*exp(-x*x);
             force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s-respa_coul;
             if (eflag) ecoul = t;
           }
           else {                                        // correct for special
             r = s*(1.0-special_coul[ni])/r; s *= g_ewald*exp(-x*x);
             force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s-r-respa_coul;
             if (eflag) ecoul = t-r;
           }
         }                                                // table real space
         else {
           if (respa_flag) {
             register double r = sqrt(rsq), s = qri*q[j];
             respa_coul = ni == 0 ? frespa*s/r : frespa*s/r*special_coul[ni];
           }
           register union_int_float_t t;
           t.f = rsq;
           register const int k = (t.i & ncoulmask) >> ncoulshiftbits;
           register double f = (rsq-rtable[k])*drtable[k], qiqj = qi*q[j];
           if (ni == 0) {
             force_coul = qiqj*(ftable[k]+f*dftable[k]);
             if (eflag) ecoul = qiqj*(etable[k]+f*detable[k]);
           }
           else {                                        // correct for special
             t.f = (1.0-special_coul[ni])*(ctable[k]+f*dctable[k]);
             force_coul = qiqj*(ftable[k]+f*dftable[k]-t.f);
             if (eflag) {
               t.f = (1.0-special_coul[ni])*(ptable[k]+f*dptable[k]);
               ecoul = qiqj*(etable[k]+f*detable[k]-t.f);
             }
           }
         }
       }
       else force_coul = respa_coul = ecoul = 0.0;
 
       if (rsq < cut_ljsqi[typej]) {                        // lennard-jones
         register double rn = r2inv*r2inv*r2inv;
         if (respa_flag) respa_lj = ni == 0 ?                 // correct for respa
             frespa*rn*(rn*lj1i[typej]-lj2i[typej]) :
             frespa*rn*(rn*lj1i[typej]-lj2i[typej])*special_lj[ni];
         if (order6) {                                        // long-range form
           register double x2 = g2*rsq, a2 = 1.0/x2;
           x2 = a2*exp(-x2)*lj4i[typej];
           if (ni == 0) {
             force_lj =
               (rn*=rn)*lj1i[typej]-g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq-respa_lj;
             if (eflag) evdwl = rn*lj3i[typej]-g6*((a2+1.0)*a2+0.5)*x2;
           }
           else {                                        // correct for special
             register double f = special_lj[ni], t = rn*(1.0-f);
             force_lj = f*(rn *= rn)*lj1i[typej]-
               g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq+t*lj2i[typej]-respa_lj;
             if (eflag)
               evdwl = f*rn*lj3i[typej]-g6*((a2+1.0)*a2+0.5)*x2+t*lj4i[typej];
           }
         }
         else {                                                // cut form
           if (ni == 0) {
             force_lj = rn*(rn*lj1i[typej]-lj2i[typej])-respa_lj;
             if (eflag) evdwl = rn*(rn*lj3i[typej]-lj4i[typej])-offseti[typej];
           }
           else {                                        // correct for special
             register double f = special_lj[ni];
             force_lj = f*rn*(rn*lj1i[typej]-lj2i[typej])-respa_lj;
             if (eflag)
               evdwl = f*(rn*(rn*lj3i[typej]-lj4i[typej])-offseti[typej]);
           }
         }
       }
       else force_lj = respa_lj = evdwl = 0.0;
 
       fpair = (force_coul+force_lj)*r2inv;
 
       if (newton_pair || j < nlocal) {
         register double *fj = f0+(j+(j<<1)), f;
         fi[0] += f = d[0]*fpair; fj[0] -= f;
         fi[1] += f = d[1]*fpair; fj[1] -= f;
         fi[2] += f = d[2]*fpair; fj[2] -= f;
       }
       else {
         fi[0] += d[0]*fpair;
         fi[1] += d[1]*fpair;
         fi[2] += d[2]*fpair;
       }
 
       if (evflag) {
           fvirial = (force_coul + force_lj + respa_coul + respa_lj)*r2inv;
           ev_tally(i,j,nlocal,newton_pair,
                            evdwl,ecoul,fvirial,d[0],d[1],d[2]);
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJLongCoulLong::single(int i, int j, int itype, int jtype,
                           double rsq, double factor_coul, double factor_lj,
                           double &fforce)
 {
   double r2inv, r6inv, force_coul, force_lj;
   double g2 = g_ewald_6*g_ewald_6, g6 = g2*g2*g2, g8 = g6*g2, *q = atom->q;
 
   double eng = 0.0;
 
   r2inv = 1.0/rsq;
   if ((ewald_order&2) && (rsq < cut_coulsq)) {                // coulombic
     if (!ncoultablebits || rsq <= tabinnersq) {                // series real space
       register double r = sqrt(rsq), x = g_ewald*r;
       register double s = force->qqrd2e*q[i]*q[j], t = 1.0/(1.0+EWALD_P*x);
       r = s*(1.0-factor_coul)/r; s *= g_ewald*exp(-x*x);
       force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s-r;
       eng += t-r;
     }
     else {                                                // table real space
       register union_int_float_t t;
       t.f = rsq;
       register const int k = (t.i & ncoulmask) >> ncoulshiftbits;
       register double f = (rsq-rtable[k])*drtable[k], qiqj = q[i]*q[j];
       t.f = (1.0-factor_coul)*(ctable[k]+f*dctable[k]);
       force_coul = qiqj*(ftable[k]+f*dftable[k]-t.f);
       eng += qiqj*(etable[k]+f*detable[k]-t.f);
     }
   } else force_coul = 0.0;
 
   if (rsq < cut_ljsq[itype][jtype]) {                        // lennard-jones
     r6inv = r2inv*r2inv*r2inv;
     if (ewald_order&64) {                                // long-range
       register double x2 = g2*rsq, a2 = 1.0/x2, t = r6inv*(1.0-factor_lj);
       x2 = a2*exp(-x2)*lj4[itype][jtype];
       force_lj = factor_lj*(r6inv *= r6inv)*lj1[itype][jtype]-
                g8*(((6.0*a2+6.0)*a2+3.0)*a2+a2)*x2*rsq+t*lj2[itype][jtype];
       eng += factor_lj*r6inv*lj3[itype][jtype]-
         g6*((a2+1.0)*a2+0.5)*x2+t*lj4[itype][jtype];
     }
     else {                                                // cut
       force_lj = factor_lj*r6inv*(lj1[itype][jtype]*r6inv-lj2[itype][jtype]);
       eng += factor_lj*(r6inv*(r6inv*lj3[itype][jtype]-
                                lj4[itype][jtype])-offset[itype][jtype]);
     }
   } else force_lj = 0.0;
 
   fforce = (force_coul+force_lj)*r2inv;
   return eng;
 }
diff --git a/src/KSPACE/pair_lj_long_coul_long.h b/src/KSPACE/pair_lj_long_coul_long.h
index 9041bb8d5..e2a34bab5 100644
--- a/src/KSPACE/pair_lj_long_coul_long.h
+++ b/src/KSPACE/pair_lj_long_coul_long.h
@@ -1,114 +1,117 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(lj/long/coul/long,PairLJLongCoulLong)
 
 #else
 
 #ifndef LMP_PAIR_LJ_LONG_COUL_LONG_H
 #define LMP_PAIR_LJ_LONG_COUL_LONG_H
 
 #include "pair.h"
 
 namespace LAMMPS_NS {
 
 class PairLJLongCoulLong : public Pair {
  public:
   double cut_coul;
 
   PairLJLongCoulLong(class LAMMPS *);
   virtual ~PairLJLongCoulLong();
   virtual void compute(int, int);
   virtual void settings(int, char **);
   void coeff(int, char **);
   void init_style();
   void init_list(int, class NeighList *);
   double init_one(int, int);
   void write_restart(FILE *);
   void read_restart(FILE *);
 
   void write_restart_settings(FILE *);
   void read_restart_settings(FILE *);
   double single(int, int, int, int, double, double, double, double &);
   void *extract(const char *, int &);
 
   void compute_inner();
   void compute_middle();
   void compute_outer(int, int);
 
  protected:
   double cut_lj_global;
   double **cut_lj, **cut_lj_read, **cut_ljsq;
   double cut_coulsq;
   double **epsilon_read, **epsilon, **sigma_read, **sigma;
   double **lj1, **lj2, **lj3, **lj4, **offset;
   double *cut_respa;
   double qdist;
   double g_ewald;
   double g_ewald_6;
   int ewald_order, ewald_off;
 
   void options(char **arg, int order);
   void allocate();
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
-UNDOCUMENTED
+Self-explanatory.  Check the input script syntax and compare to the
+documentation for the command.  You can use -echo screen as a
+command-line option when running LAMMPS to see the offending line.
 
-W: Mixing forced for lj coefficients
+W: Mixing forced for LJ coefficients
 
-UNDOCUMENTED
+Self-explanatory.
 
-W: Using largest cut-off for lj/coul long long
+W: Using largest cutoff for pair_style lj/long/coul/long
 
-UNDOCUMENTED
+Self-explanatory.
 
-E: Cut-offs missing in pair_style lj/coul
+E: Cutoffs missing in pair_style lj/long/coul/long
 
-UNDOCUMENTED
+Self-explanatory.
 
-E: Coulombic cut not supported in pair_style lj/coul
+E: Coulomb cut not supported in pair_style lj/long/coul/long
 
-UNDOCUMENTED
+Must use long-range Coulombic interactions.
 
-E: Only one cut-off allowed when requesting all long
+E: Only one cutoff allowed when requesting all long
 
-UNDOCUMENTED
+Self-explanatory.
 
 E: Incorrect args for pair coefficients
 
-UNDOCUMENTED
+Self-explanatory.  Check the input script or data file.
 
-E: Invoking coulombic in pair style lj/coul requires atom attribute q
+E: Pair style lj/long/coul/long requires atom attribute q
 
-UNDOCUMENTED
+The atom style defined does not have this attribute.
 
 E: Pair style requires a KSpace style
 
-UNDOCUMENTED
+This pair style is designed for use with a KSpace style.
 
 E: Pair cutoff < Respa interior cutoff
 
-UNDOCUMENTED
+One or more pairwise cutoffs are too short to use with the specified
+rRESPA cutoffs.
 
 */
diff --git a/src/KSPACE/pair_lj_long_tip4p_long.cpp b/src/KSPACE/pair_lj_long_tip4p_long.cpp
index 0b55782e9..1558fab40 100755
--- a/src/KSPACE/pair_lj_long_tip4p_long.cpp
+++ b/src/KSPACE/pair_lj_long_tip4p_long.cpp
@@ -1,643 +1,643 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Amalie Frischknecht and Ahmed Ismail (SNL)
                          Rolf Isele-Holder (Aachen University)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_lj_long_tip4p_long.h"
 #include "angle.h"
 #include "atom.h"
 #include "bond.h"
 #include "comm.h"
 #include "domain.h"
 #include "force.h"
 #include "kspace.h"
 #include "update.h"
 #include "respa.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 /* ---------------------------------------------------------------------- */
 
 PairLJLongTIP4PLong::PairLJLongTIP4PLong(LAMMPS *lmp) : 
   PairLJLongCoulLong(lmp)
 {
   tip4pflag = 1;
   single_enable = 0;
   respa_enable = 0;
 
   nmax = 0;
   hneigh = NULL;
   newsite = NULL;
 
   // TIP4P cannot compute virial as F dot r
   // due to find_M() finding bonded H atoms which are not near O atom
 
   no_virial_fdotr_compute = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairLJLongTIP4PLong::~PairLJLongTIP4PLong()
 {
   memory->destroy(hneigh);
   memory->destroy(newsite);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJLongTIP4PLong::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype,itable;
   int n,vlist[6];
   int key;
   int iH1,iH2,jH1,jH2;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul;
   double fraction,table;
   double r,r2inv,forcecoul,forcelj,cforce;
   double factor_coul,factor_lj;
   double grij,expm2,prefactor,t,erfc;
   double xiM[3],xjM[3],fO[3],fH[3],fd[3],v[6],xH1[3],xH2[3];// f1[3];
   double *x1,*x2;
   int *ilist,*jlist,*numneigh,**firstneigh;
   double rsq;
  
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   // reallocate hneigh & newsite if necessary
   // initialize hneigh[0] to -1 on steps when reneighboring occurred
   // initialize hneigh[2] to 0 every step
 
   int nlocal = atom->nlocal;
   int nall = nlocal + atom->nghost;
 
   if (atom->nmax > nmax) {
     nmax = atom->nmax;
     memory->destroy(hneigh);
     memory->create(hneigh,nmax,3,"pair:hneigh");
     memory->destroy(newsite);
     memory->create(newsite,nmax,3,"pair:newsite");
   }
   if (neighbor->ago == 0)
     for (i = 0; i < nall; i++) hneigh[i][0] = -1;
   for (i = 0; i < nall; i++) hneigh[i][2] = 0;
 
   double **f = atom->f;
   double **x = atom->x;
   double *q = atom->q;
   int *type = atom->type;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
   double cut_coulsqplus = (cut_coul+2.0*qdist)*(cut_coul+2.0*qdist);
 
   int order1 = ewald_order&(1<<1), order6 = ewald_order&(1<<6);
   int ni;
   double  *cut_ljsqi, *lj1i, *lj2i, *lj3i, *lj4i, *offseti;
   double g2 = g_ewald_6*g_ewald_6, g6 = g2*g2*g2, g8 = g6*g2;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
   
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     if (itype == typeO) {
       if (hneigh[i][0] < 0) {
         hneigh[i][0] = iH1 = atom->map(atom->tag[i] + 1);
         hneigh[i][1] = iH2 = atom->map(atom->tag[i] + 2);
         hneigh[i][2] = 1;
         if (iH1 == -1 || iH2 == -1)
           error->one(FLERR,"TIP4P hydrogen is missing");
         if (atom->type[iH1] != typeH || atom->type[iH2] != typeH)
           error->one(FLERR,"TIP4P hydrogen has incorrect atom type");
         compute_newsite(x[i],x[iH1],x[iH2],newsite[i]);
       } else {
         iH1 = hneigh[i][0];
         iH2 = hneigh[i][1];
         if (hneigh[i][2] == 0) {
           hneigh[i][2] = 1;
           compute_newsite(x[i],x[iH1],x[iH2],newsite[i]);
         }
       }
       x1 = newsite[i];
     } else x1 = x[i];
 
     jlist = firstneigh[i];
     jnum = numneigh[i];
     offseti = offset[itype];
     lj1i = lj1[itype]; lj2i = lj2[itype]; lj3i = lj3[itype]; lj4i = lj4[itype];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       ni = sbmask(j);
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
         
       if (rsq < cut_ljsq[itype][jtype]) {			// lj
         r2inv = 1.0/rsq;
        	if (order6) {					// long-range lj
 	  register double rn = r2inv*r2inv*r2inv;
 	  register double x2 = g2*rsq, a2 = 1.0/x2;
 	  x2 = a2*exp(-x2)*lj4i[jtype];
 	  if (ni == 0) {
 	    forcelj =
 	      (rn*=rn)*lj1i[jtype]-g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq;
 	    if (eflag)
 	      evdwl = rn*lj3i[jtype]-g6*((a2+1.0)*a2+0.5)*x2;
 	  }
 	  else {					// special case
 	    register double f = special_lj[ni], t = rn*(1.0-f);
 	    forcelj = f*(rn *= rn)*lj1i[jtype]-
 	      g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq+t*lj2i[jtype];
 	    if (eflag) 
 	      evdwl = f*rn*lj3i[jtype]-g6*((a2+1.0)*a2+0.5)*x2+t*lj4i[jtype];
 	  }
 	}
 	else {						// cut lj
 	  register double rn = r2inv*r2inv*r2inv;
 	  if (ni == 0) {
 	    forcelj = rn*(rn*lj1i[jtype]-lj2i[jtype]);
 	    if (eflag) evdwl = rn*(rn*lj3i[jtype]-lj4i[jtype])-offseti[jtype];
 	  }
 	  else {					// special case
 	    register double f = special_lj[ni];
 	    forcelj = f*rn*(rn*lj1i[jtype]-lj2i[jtype]);
 	    if (eflag)
 	      evdwl = f * (rn*(rn*lj3i[jtype]-lj4i[jtype])-offseti[jtype]);
 	  }
         }
 
         forcelj *= r2inv;
 	f[i][0] += delx*forcelj;
 	f[i][1] += dely*forcelj;
 	f[i][2] += delz*forcelj;
 	f[j][0] -= delx*forcelj;
 	f[j][1] -= dely*forcelj;
 	f[j][2] -= delz*forcelj;
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
 	      	             evdwl,0.0,forcelj,delx,dely,delz);
       }
 
       
       // adjust rsq and delxyz for off-site O charge(s)
       // ADDITIONAL REQEUST REQUIRED HERE!!!!!
 
       if (rsq < cut_coulsqplus) {
         if (itype == typeO || jtype == typeO) { 
 	  if (jtype == typeO) {
             if (hneigh[j][0] < 0) {
               hneigh[j][0] = jH1 = atom->map(atom->tag[j] + 1);
               hneigh[j][1] = jH2 = atom->map(atom->tag[j] + 2);
               hneigh[j][2] = 1;
               if (jH1 == -1 || jH2 == -1)
                 error->one(FLERR,"TIP4P hydrogen is missing");
               if (atom->type[jH1] != typeH || atom->type[jH2] != typeH)
                 error->one(FLERR,"TIP4P hydrogen has incorrect atom type");
               compute_newsite(x[j],x[jH1],x[jH2],newsite[j]);
             } else {
               jH1 = hneigh[j][0];
               jH2 = hneigh[j][1];
               if (hneigh[j][2] == 0) {
                 hneigh[j][2] = 1;
                 compute_newsite(x[j],x[jH1],x[jH2],newsite[j]);
               }
             }
             x2 = newsite[j];
 	  } else x2 = x[j];
 	  delx = x1[0] - x2[0];
 	  dely = x1[1] - x2[1];
 	  delz = x1[2] - x2[2];
 	  rsq = delx*delx + dely*dely + delz*delz;
         }
 
 	// test current rsq against cutoff and compute Coulombic force
       
         if (rsq < cut_coulsq && order1) {
 	  r2inv = 1.0 / rsq;
 	  if (!ncoultablebits || rsq <= tabinnersq) {
 	    r = sqrt(rsq);
 	    grij = g_ewald * r;
 	    expm2 = exp(-grij*grij);
 	    t = 1.0 / (1.0 + EWALD_P*grij);
 	    erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
 	    prefactor = qqrd2e * qtmp*q[j]/r;
 	    forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
 	    if (factor_coul < 1.0) {
 	      forcecoul -= (1.0-factor_coul)*prefactor; 
 	    }
 	  } else {
 	    union_int_float_t rsq_lookup;
 	    rsq_lookup.f = rsq;
 	    itable = rsq_lookup.i & ncoulmask;
 	    itable >>= ncoulshiftbits;
 	    fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
 	    table = ftable[itable] + fraction*dftable[itable];
 	    forcecoul = qtmp*q[j] * table;
 	    if (factor_coul < 1.0) {
 	      table = ctable[itable] + fraction*dctable[itable];
 	      prefactor = qtmp*q[j] * table;
 	      forcecoul -= (1.0-factor_coul)*prefactor;
 	    }
 	  }
 
 	  cforce = forcecoul * r2inv;
 
 	  //if (evflag) ev_tally(i,j,nlocal,newton_pair,
           //		       evdwl,0.0,cforce,delx,dely,delz);
 
 	  // if i,j are not O atoms, force is applied directly
 	  // if i or j are O atoms, force is on fictitious atom & partitioned
 	  // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999)
 	  // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f
 	  // preserves total force and torque on water molecule
 	  // virial = sum(r x F) where each water's atoms are near xi and xj
 	  // vlist stores 2,4,6 atoms whose forces contribute to virial
 
 	  n = 0;
           key = 0;
 
 	  if (itype != typeO) {
 	    f[i][0] += delx * cforce;
 	    f[i][1] += dely * cforce;
 	    f[i][2] += delz * cforce;
 
             if (vflag) {
               v[0] = x[i][0] * delx * cforce;
               v[1] = x[i][1] * dely * cforce;
               v[2] = x[i][2] * delz * cforce;
               v[3] = x[i][0] * dely * cforce;
               v[4] = x[i][0] * delz * cforce;
               v[5] = x[i][1] * delz * cforce;
             }
           vlist[n++] = i;
 
 	  } else {
             key += 1;
             fd[0] = delx*cforce;
             fd[1] = dely*cforce;
             fd[2] = delz*cforce;
 
             fO[0] = fd[0]*(1 - alpha);
             fO[1] = fd[1]*(1 - alpha);
             fO[2] = fd[2]*(1 - alpha);
 
             fH[0] = 0.5 * alpha * fd[0];
             fH[1] = 0.5 * alpha * fd[1];
             fH[2] = 0.5 * alpha * fd[2];
 
             f[i][0] += fO[0];
             f[i][1] += fO[1];
             f[i][2] += fO[2];
 
             f[iH1][0] += fH[0];
             f[iH1][1] += fH[1];
             f[iH1][2] += fH[2];
 
             f[iH2][0] += fH[0];
             f[iH2][1] += fH[1];
             f[iH2][2] += fH[2];
 
 	    if (vflag) {
 	      domain->closest_image(x[i],x[iH1],xH1);
 	      domain->closest_image(x[i],x[iH2],xH2);
 
 	      v[0] = x[i][0]*fO[0] + xH1[0]*fH[0] + xH2[0]*fH[0];
 	      v[1] = x[i][1]*fO[1] + xH1[1]*fH[1] + xH2[1]*fH[1];
 	      v[2] = x[i][2]*fO[2] + xH1[2]*fH[2] + xH2[2]*fH[2];
 	      v[3] = x[i][0]*fO[1] + xH1[0]*fH[1] + xH2[0]*fH[1];
 	      v[4] = x[i][0]*fO[2] + xH1[0]*fH[2] + xH2[0]*fH[2];
 	      v[5] = x[i][1]*fO[2] + xH1[1]*fH[2] + xH2[1]*fH[2];
 	    }
 	    vlist[n++] = i;
 	    vlist[n++] = iH1;
 	    vlist[n++] = iH2;
   	  }
 
 	  if (jtype != typeO) {
 	    f[j][0] -= delx * cforce;
 	    f[j][1] -= dely * cforce;
 	    f[j][2] -= delz * cforce;
 
 	    if (vflag) {
 	      v[0] -= x[j][0] * delx * cforce;
 	      v[1] -= x[j][1] * dely * cforce;
 	      v[2] -= x[j][2] * delz * cforce;
 	      v[3] -= x[j][0] * dely * cforce;
 	      v[4] -= x[j][0] * delz * cforce;
 	      v[5] -= x[j][1] * delz * cforce;
             }
 	    vlist[n++] = j;
 
 	  } else {
             key += 2;
 
 	    fd[0] = -delx*cforce;
 	    fd[1] = -dely*cforce;
 	    fd[2] = -delz*cforce;
 
             fO[0] = fd[0]*(1 - alpha);
             fO[1] = fd[1]*(1 - alpha);
             fO[2] = fd[2]*(1 - alpha);
 
             fH[0] = 0.5 * alpha * fd[0];
             fH[1] = 0.5 * alpha * fd[1];
             fH[2] = 0.5 * alpha * fd[2]; 
 
 	    f[j][0] += fO[0];
 	    f[j][1] += fO[1];
 	    f[j][2] += fO[2];
 
 	    f[jH1][0] += fH[0];
 	    f[jH1][1] += fH[1];
 	    f[jH1][2] += fH[2];
 
 	    f[jH2][0] += fH[0];
 	    f[jH2][1] += fH[1];
 	    f[jH2][2] += fH[2];
 
 	    if (vflag) {
 	      domain->closest_image(x[j],x[jH1],xH1);
 	      domain->closest_image(x[j],x[jH2],xH2);
 
 	      v[0] += x[j][0]*fO[0] + xH1[0]*fH[0] + xH2[0]*fH[0];
 	      v[1] += x[j][1]*fO[1] + xH1[1]*fH[1] + xH2[1]*fH[1];
 	      v[2] += x[j][2]*fO[2] + xH1[2]*fH[2] + xH2[2]*fH[2];
 	      v[3] += x[j][0]*fO[1] + xH1[0]*fH[1] + xH2[0]*fH[1];
 	      v[4] += x[j][0]*fO[2] + xH1[0]*fH[2] + xH2[0]*fH[2];
 	      v[5] += x[j][1]*fO[2] + xH1[1]*fH[2] + xH2[1]*fH[2];
             }
       	    vlist[n++] = j;
 	    vlist[n++] = jH1;
 	    vlist[n++] = jH2;
 	  }
 
 	  if (eflag) {
 	    if (!ncoultablebits || rsq <= tabinnersq)
 	      ecoul = prefactor*erfc;
 	    else {
 	      table = etable[itable] + fraction*detable[itable];
 	      ecoul = qtmp*q[j] * table;
 	    }
 	    if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
           } else ecoul = 0.0;
  
           if (evflag) ev_tally_tip4p(key,vlist,v,ecoul,alpha);
 	}
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairLJLongTIP4PLong::settings(int narg, char **arg)
 {
   if (narg < 8 || narg > 9) error->all(FLERR,"Illegal pair_style command");
 
   ewald_off = 0;
   ewald_order = 0;
   options(arg, 6);
   options(++arg, 1);
   if (!comm->me && ewald_order&(1<<6)) 
     error->warning(FLERR,"Mixing forced for lj coefficients");
   if (!comm->me && ewald_order==((1<<1)|(1<<6))) 
-    error->warning(FLERR,"Using largest cut-off for lj/coul long long");
+    error->warning(FLERR,"Using largest cutoff for pair_style lj/long/tip4p/long");
   if (!((ewald_order^ewald_off)&(1<<1))) 
-    error->all(FLERR,"Coulombic cut not supported in pair_style lj/coul");
+    error->all(FLERR,"Coulombic cut not supported in pair_style lj/long/tip4p/long");
   typeO = force->inumeric(arg[1]);
   typeH = force->inumeric(arg[2]);
   typeB = force->inumeric(arg[3]);
   typeA = force->inumeric(arg[4]);
   qdist = force->numeric(arg[5]);
 
 
   cut_lj_global = force->numeric(arg[6]);
   if (narg == 8) cut_coul = cut_lj_global;
   else cut_coul = force->numeric(arg[7]);
  
   
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
 	if (setflag[i][j]) cut_lj[i][j] = cut_lj_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJLongTIP4PLong::init_style()
 {
   if (atom->tag_enable == 0)
-    error->all(FLERR,"Pair style lj/coul/tip4p requires atom IDs");
+    error->all(FLERR,"Pair style lj/long/tip4p/long requires atom IDs");
   if (!force->newton_pair) 
-    error->all(FLERR,"Pair style lj/coul/tip4p requires newton pair on");
+    error->all(FLERR,"Pair style lj/long/tip4p/long requires newton pair on");
   if (!atom->q_flag)
-    error->all(FLERR,"Pair style lj/coul/tip4p requires atom attribute q");
+    error->all(FLERR,"Pair style lj/long/tip4p/long requires atom attribute q");
   if (force->bond == NULL)
     error->all(FLERR,"Must use a bond style with TIP4P potential");
   if (force->angle == NULL)
     error->all(FLERR,"Must use an angle style with TIP4P potential");
 
   PairLJLongCoulLong::init_style();
 
   // set alpha parameter
 
   double theta = force->angle->equilibrium_angle(typeA);
   double blen = force->bond->equilibrium_distance(typeB);
   alpha = qdist / (cos(0.5*theta) * blen);
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairLJLongTIP4PLong::init_one(int i, int j)
 {
   double cut = PairLJLongCoulLong::init_one(i,j);
 
   // check that LJ epsilon = 0.0 for water H
   // set LJ cutoff to 0.0 for any interaction involving water H
   // so LJ term isn't calculated in compute()
 
   if ((i == typeH && epsilon[i][i] != 0.0))
     error->all(FLERR,"Water H epsilon must be 0.0 for "
-               "pair style lj/coul/tip4p");
+               "pair style lj/long/tip4p/long");
 
   if (i == typeH || j == typeH)
     cut_ljsq[j][i] = cut_ljsq[i][j] = 0.0;
 
   return cut;
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJLongTIP4PLong::write_restart_settings(FILE *fp)
 {
   fwrite(&typeO,sizeof(int),1,fp);
   fwrite(&typeH,sizeof(int),1,fp);
   fwrite(&typeB,sizeof(int),1,fp);
   fwrite(&typeA,sizeof(int),1,fp);
   fwrite(&qdist,sizeof(double),1,fp);
 
   fwrite(&cut_lj_global,sizeof(double),1,fp);
   fwrite(&cut_coul,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&ncoultablebits,sizeof(int),1,fp);
   fwrite(&tabinner,sizeof(double),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJLongTIP4PLong::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&typeO,sizeof(int),1,fp);
     fread(&typeH,sizeof(int),1,fp);
     fread(&typeB,sizeof(int),1,fp);
     fread(&typeA,sizeof(int),1,fp);
     fread(&qdist,sizeof(double),1,fp);
 
     fread(&cut_lj_global,sizeof(double),1,fp);
     fread(&cut_coul,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&ncoultablebits,sizeof(int),1,fp);
     fread(&tabinner,sizeof(double),1,fp);
   }
 
   MPI_Bcast(&typeO,1,MPI_INT,0,world);
   MPI_Bcast(&typeH,1,MPI_INT,0,world);
   MPI_Bcast(&typeB,1,MPI_INT,0,world);
   MPI_Bcast(&typeA,1,MPI_INT,0,world);
   MPI_Bcast(&qdist,1,MPI_DOUBLE,0,world);
 
   MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&ncoultablebits,1,MPI_INT,0,world);
   MPI_Bcast(&tabinner,1,MPI_DOUBLE,0,world);
 }
 
 /* ----------------------------------------------------------------------
   compute position xM of fictitious charge site for O atom and 2 H atoms
   return it as xM
 ------------------------------------------------------------------------- */
 
 void PairLJLongTIP4PLong::compute_newsite(double *xO, double *xH1,
                                              double *xH2, double *xM)
 {
   double delx1 = xH1[0] - xO[0];
   double dely1 = xH1[1] - xO[1];
   double delz1 = xH1[2] - xO[2];
   domain->minimum_image(delx1,dely1,delz1);
 
   double delx2 = xH2[0] - xO[0];
   double dely2 = xH2[1] - xO[1];
   double delz2 = xH2[2] - xO[2];
   domain->minimum_image(delx2,dely2,delz2);
 
   xM[0] = xO[0] + alpha * 0.5 * (delx1 + delx2);
   xM[1] = xO[1] + alpha * 0.5 * (dely1 + dely2);
   xM[2] = xO[2] + alpha * 0.5 * (delz1 + delz2);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *PairLJLongTIP4PLong::extract(const char *str, int &dim)
 {
   dim = 0;
   if (strcmp(str,"qdist") == 0) return (void *) &qdist;
   if (strcmp(str,"typeO") == 0) return (void *) &typeO;
   if (strcmp(str,"typeH") == 0) return (void *) &typeH;
   if (strcmp(str,"typeA") == 0) return (void *) &typeA;
   if (strcmp(str,"typeB") == 0) return (void *) &typeB;
   if (strcmp(str,"cut_coul") == 0) return (void *) &cut_coul;
 
   const char *ids[] = {
     "B", "sigma", "epsilon", "ewald_order", "ewald_cut", "cut_coul", 
     "ewald_mix", "cut_LJ", NULL};
   void *ptrs[] = {
     lj4, sigma, epsilon, &ewald_order, &cut_coul, &cut_coul, 
     &mix_flag, &cut_lj_global, NULL};
   int i;
 
   for (i=0; ids[i]&&strcmp(ids[i], str); ++i);
   if (i <= 2) dim = 2;
   else dim = 0;
   return ptrs[i];
   return NULL;
 }
 
 /* ----------------------------------------------------------------------
    memory usage of hneigh
 ------------------------------------------------------------------------- */
 
 double PairLJLongTIP4PLong::memory_usage()
 {
   double bytes = maxeatom * sizeof(double);
   bytes += maxvatom*6 * sizeof(double);
   bytes += 2 * nmax * sizeof(double);
   return bytes;
 }
diff --git a/src/KSPACE/pair_lj_long_tip4p_long.h b/src/KSPACE/pair_lj_long_tip4p_long.h
index a1785fbf2..541ab5c9a 100755
--- a/src/KSPACE/pair_lj_long_tip4p_long.h
+++ b/src/KSPACE/pair_lj_long_tip4p_long.h
@@ -1,134 +1,118 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(lj/long/tip4p/long,PairLJLongTIP4PLong)
 
 #else
 
 #ifndef LMP_PAIR_LJ_LONG_TIP4P_LONG_H
 #define LMP_PAIR_LJ_LONG_TIP4P_LONG_H
 
 #include "pair_lj_long_coul_long.h"
 
 namespace LAMMPS_NS {
 
 class PairLJLongTIP4PLong : public PairLJLongCoulLong {
  public:
   PairLJLongTIP4PLong(class LAMMPS *);
   ~PairLJLongTIP4PLong();
   virtual void compute(int, int);
   void settings(int, char **);
   void init_style();
   double init_one(int, int);
   void write_restart_settings(FILE *fp);
   void read_restart_settings(FILE *fp);
   void *extract(const char *, int &);
   double memory_usage();
 
 
  protected:
   int typeH,typeO;             // atom types of TIP4P water H and O atoms
   int typeA,typeB;             // angle and bond types of TIP4P water
   double alpha;                // geometric constraint parameter for TIP4P
 
   int nmax;                    // info on off-oxygen charge sites
   int **hneigh;                // 0,1 = indices of 2 H associated with O
                                // 2 = 0 if site loc not yet computed, 1 if yes
   double **newsite;            // locations of charge sites
 
   void compute_newsite(double *, double *, double *, double *);
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: TIP4P hydrogen is missing
 
 The TIP4P pairwise computation failed to find the correct H atom
 within a water molecule.
 
 E: TIP4P hydrogen has incorrect atom type
 
 The TIP4P pairwise computation found an H atom whose type does not
 agree with the specified H type.
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 W: Mixing forced for lj coefficients
 
-UNDOCUMENTED
+Self-explanatory.
 
-W: Using largest cut-off for lj/coul long long
+W: Using largest cutoff for pair_style lj/long/tip4p/long
 
-UNDOCUMENTED
+Self-explanatory.
 
-E: Coulombic cut not supported in pair_style lj/coul
+E: Coulomb cut not supported in pair_style lj/long/tip4p/long
 
-UNDOCUMENTED
+Must use long-range Coulombic interactions.
 
-E: Pair style lj/coul/tip4p requires atom IDs
+E: Pair style lj/long/tip4p/long requires atom IDs
 
-UNDOCUMENTED
+There are no atom IDs defined in the system and the TIP4P potential
+requires them to find O,H atoms with a water molecule.
 
-E: Pair style lj/coul/tip4p requires newton pair on
+E: Pair style lj/long/tip4p/long requires newton pair on
 
-UNDOCUMENTED
+This is because the computation of constraint forces within a water
+molecule adds forces to atoms owned by other processors.
 
-E: Pair style lj/coul/tip4p requires atom attribute q
+E: Pair style lj/long/tip4p/long requires atom attribute q
 
-UNDOCUMENTED
+The atom style defined does not have these attributes.
 
 E: Must use a bond style with TIP4P potential
 
 TIP4P potentials assume bond lengths in water are constrained
 by a fix shake command.
 
 E: Must use an angle style with TIP4P potential
 
 TIP4P potentials assume angles in water are constrained by a fix shake
 command.
 
-E: Water H epsilon must be 0.0 for pair style lj/coul/tip4p
-
-UNDOCUMENTED
-
-U: Pair style lj/cut/coul/long/tip4p requires atom IDs
-
-There are no atom IDs defined in the system and the TIP4P potential
-requires them to find O,H atoms with a water molecule.
-
-U: Pair style lj/cut/coul/long/tip4p requires newton pair on
-
-This is because the computation of constraint forces within a water
-molecule adds forces to atoms owned by other processors.
-
-U: Pair style lj/cut/coul/long/tip4p requires atom attribute q
-
-The atom style defined does not have these attributes.
-
-U: Pair style is incompatible with KSpace style
+E: Water H epsilon must be 0.0 for pair style lj/long/tip4p/long
 
-If a pair style with a long-range Coulombic component is selected,
-then a kspace style must also be used.
+This is because LAMMPS does not compute the Lennard-Jones interactions
+with these particles for efficiency reasons.
 
 */
diff --git a/src/KSPACE/pppm.cpp b/src/KSPACE/pppm.cpp
index 772013af0..0ccff8c1b 100644
--- a/src/KSPACE/pppm.cpp
+++ b/src/KSPACE/pppm.cpp
@@ -1,3014 +1,3014 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL)
      per-atom energy/virial & group/group energy/force added by Stan Moore (BYU)
      analytic diff (2 FFT) option added by Rolf Isele-Holder (Aachen University)
 ------------------------------------------------------------------------- */
 
 #include "lmptype.h"
 #include "mpi.h"
 #include "string.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "math.h"
 #include "pppm.h"
 #include "atom.h"
 #include "comm.h"
 #include "commgrid.h"
 #include "neighbor.h"
 #include "force.h"
 #include "pair.h"
 #include "bond.h"
 #include "angle.h"
 #include "domain.h"
 #include "fft3d_wrap.h"
 #include "remap_wrap.h"
 #include "memory.h"
 #include "error.h"
 
 #include "math_const.h"
 #include "math_special.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
 #define MAXORDER 7
 #define OFFSET 16384
 #define SMALL 0.00001
 #define LARGE 10000.0
 #define EPS_HOC 1.0e-7
 
 enum{REVERSE_RHO};
 enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM};
 
 #ifdef FFT_SINGLE
 #define ZEROF 0.0f
 #define ONEF  1.0f
 #else
 #define ZEROF 0.0
 #define ONEF  1.0
 #endif
 
 /* ---------------------------------------------------------------------- */
 
 PPPM::PPPM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
 {
   if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm command");
  
   pppmflag = 1;
   group_group_enable = 1;
 
   accuracy_relative = atof(arg[0]);
 
   nfactors = 3;
   factors = new int[nfactors];
   factors[0] = 2;
   factors[1] = 3;
   factors[2] = 5;
 
   MPI_Comm_rank(world,&me);
   MPI_Comm_size(world,&nprocs);
 
   density_brick = vdx_brick = vdy_brick = vdz_brick = NULL;
   density_fft = NULL;
   u_brick = NULL;
   v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL;
   greensfn = NULL;
   work1 = work2 = NULL;
   vg = NULL;
   fkx = fky = fkz = NULL;
 
   sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = 
     sf_precoeff4 = sf_precoeff5 = sf_precoeff6 = NULL;
 
   density_A_brick = density_B_brick = NULL;
   density_A_fft = density_B_fft = NULL;
 
   gf_b = NULL;
   rho1d = rho_coeff = drho1d = drho_coeff = NULL;
 
   fft1 = fft2 = NULL;
   remap = NULL;
   cg = NULL;
   cg_peratom = NULL;
 
   nmax = 0;
   part2grid = NULL;
 
   // define acons coefficients for estimation of kspace errors
   // see JCP 109, pg 7698 for derivation of coefficients
   // higher order coefficients may be computed if needed
 
   memory->create(acons,8,7,"pppm:acons");
   acons[1][0] = 2.0 / 3.0;
   acons[2][0] = 1.0 / 50.0;
   acons[2][1] = 5.0 / 294.0;
   acons[3][0] = 1.0 / 588.0;
   acons[3][1] = 7.0 / 1440.0;
   acons[3][2] = 21.0 / 3872.0;
   acons[4][0] = 1.0 / 4320.0;
   acons[4][1] = 3.0 / 1936.0;
   acons[4][2] = 7601.0 / 2271360.0;
   acons[4][3] = 143.0 / 28800.0;
   acons[5][0] = 1.0 / 23232.0;
   acons[5][1] = 7601.0 / 13628160.0;
   acons[5][2] = 143.0 / 69120.0;
   acons[5][3] = 517231.0 / 106536960.0;
   acons[5][4] = 106640677.0 / 11737571328.0;
   acons[6][0] = 691.0 / 68140800.0;
   acons[6][1] = 13.0 / 57600.0;
   acons[6][2] = 47021.0 / 35512320.0;
   acons[6][3] = 9694607.0 / 2095994880.0;
   acons[6][4] = 733191589.0 / 59609088000.0;
   acons[6][5] = 326190917.0 / 11700633600.0;
   acons[7][0] = 1.0 / 345600.0;
   acons[7][1] = 3617.0 / 35512320.0;
   acons[7][2] = 745739.0 / 838397952.0;
   acons[7][3] = 56399353.0 / 12773376000.0;
   acons[7][4] = 25091609.0 / 1560084480.0;
   acons[7][5] = 1755948832039.0 / 36229939200000.0;
   acons[7][6] = 4887769399.0 / 37838389248.0;
 }
 
 /* ----------------------------------------------------------------------
    free all memory
 ------------------------------------------------------------------------- */
 
 PPPM::~PPPM()
 {
   delete [] factors;
   deallocate();
   deallocate_peratom();
   deallocate_groups();
   memory->destroy(part2grid);
   memory->destroy(acons);
 }
 
 /* ----------------------------------------------------------------------
    called once before run
 ------------------------------------------------------------------------- */
 
 void PPPM::init()
 {
   if (me == 0) {
     if (screen) fprintf(screen,"PPPM initialization ...\n");
     if (logfile) fprintf(logfile,"PPPM initialization ...\n");
   }
 
   // error check
 
   if (domain->triclinic)
     error->all(FLERR,"Cannot (yet) use PPPM with triclinic box");
   if (domain->dimension == 2) error->all(FLERR,
                                          "Cannot use PPPM with 2d simulation");
 
   if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
 
   if (slabflag == 0 && domain->nonperiodic > 0)
     error->all(FLERR,"Cannot use nonperiodic boundaries with PPPM");
   if (slabflag) {
     if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
         domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
       error->all(FLERR,"Incorrect boundaries with slab PPPM");
   }
 
   if (order < 2 || order > MAXORDER) {
     char str[128];
     sprintf(str,"PPPM order cannot be < 2 or > than %d",MAXORDER);
     error->all(FLERR,str);
   }
 
   // extract short-range Coulombic cutoff from pair style
 
   triclinic = domain->triclinic;
   scale = 1.0;
 
   pair_check();
 
   int itmp = 0;
   double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp);
   if (p_cutoff == NULL)
     error->all(FLERR,"KSpace style is incompatible with Pair style");
   cutoff = *p_cutoff;
 
   // if kspace is TIP4P, extract TIP4P params from pair style
   // bond/angle are not yet init(), so insure equilibrium request is valid
 
   qdist = 0.0;
 
   if (tip4pflag) {
     double *p_qdist = (double *) force->pair->extract("qdist",itmp);
     int *p_typeO = (int *) force->pair->extract("typeO",itmp);
     int *p_typeH = (int *) force->pair->extract("typeH",itmp);
     int *p_typeA = (int *) force->pair->extract("typeA",itmp);
     int *p_typeB = (int *) force->pair->extract("typeB",itmp);
     if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB)
       error->all(FLERR,"KSpace style is incompatible with Pair style");
     qdist = *p_qdist;
     typeO = *p_typeO;
     typeH = *p_typeH;
     int typeA = *p_typeA;
     int typeB = *p_typeB;
 
     if (force->angle == NULL || force->bond == NULL)
       error->all(FLERR,"Bond and angle potentials must be defined for TIP4P");
     if (typeA < 1 || typeA > atom->nangletypes ||
         force->angle->setflag[typeA] == 0)
       error->all(FLERR,"Bad TIP4P angle type for PPPM/TIP4P");
     if (typeB < 1 || typeB > atom->nbondtypes ||
         force->bond->setflag[typeB] == 0)
       error->all(FLERR,"Bad TIP4P bond type for PPPM/TIP4P");
     double theta = force->angle->equilibrium_angle(typeA);
     double blen = force->bond->equilibrium_distance(typeB);
     alpha = qdist / (cos(0.5*theta) * blen);
   }
 
   // compute qsum & qsqsum and warn if not charge-neutral
 
   qsum = qsqsum = 0.0;
   for (int i = 0; i < atom->nlocal; i++) {
     qsum += atom->q[i];
     qsqsum += atom->q[i]*atom->q[i];
   }
 
   double tmp;
   MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
   qsum = tmp;
   MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
   qsqsum = tmp;
   q2 = qsqsum * force->qqrd2e / force->dielectric;
 
   if (qsqsum == 0.0)
     error->all(FLERR,"Cannot use kspace solver on system with no charge");
   if (fabs(qsum) > SMALL && me == 0) {
     char str[128];
     sprintf(str,"System is not charge neutral, net charge = %g",qsum);
     error->warning(FLERR,str);
   }
 
   // set accuracy (force units) from accuracy_relative or accuracy_absolute
 
   if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
   else accuracy = accuracy_relative * two_charge_force;
 
   // free all arrays previously allocated
 
   deallocate();
   deallocate_peratom();
   peratom_allocate_flag = 0;
   deallocate_groups();
   group_allocate_flag = 0;
 
   // setup FFT grid resolution and g_ewald
   // normally one iteration thru while loop is all that is required
   // if grid stencil does not extend beyond neighbor proc
   //   or overlap is allowed, then done
   // else reduce order and try again
 
   int (*procneigh)[2] = comm->procneigh;
 
   CommGrid *cgtmp = NULL;
   int iteration = 0;
 
   while (order >= minorder) {
     if (iteration && me == 0)
       error->warning(FLERR,"Reducing PPPM order b/c stencil extends "
                      "beyond nearest neighbor processor");
 
     set_grid_global();
     set_grid_local();
     if (overlap_allowed) break;
 
     cgtmp = new CommGrid(lmp,world,1,1,
                          nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                          nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                          procneigh[0][0],procneigh[0][1],procneigh[1][0],
                          procneigh[1][1],procneigh[2][0],procneigh[2][1]);
     cgtmp->ghost_notify();
     if (!cgtmp->ghost_overlap()) break;
     delete cgtmp;
 
     order--;
     iteration++;
   }
   
   if (order < minorder) error->all(FLERR,"PPPM order < minimum allowed order");
   if (!overlap_allowed && cgtmp->ghost_overlap())
     error->all(FLERR,"PPPM grid stencil extends "
                "beyond nearest neighbor processor");
   if (cgtmp) delete cgtmp;
 
   // adjust g_ewald
 
   if (!gewaldflag) adjust_gewald();
 
   // calculate the final accuracy
 
   double estimated_accuracy = final_accuracy();
 
   // print stats
 
   int ngrid_max,nfft_both_max;
   MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world);
   MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world);
 
   if (me == 0) {
 
 #ifdef FFT_SINGLE
     const char fft_prec[] = "single";
 #else
     const char fft_prec[] = "double";
 #endif
 
     if (screen) {
       fprintf(screen,"  G vector (1/distance)= %g\n",g_ewald);
       fprintf(screen,"  grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
       fprintf(screen,"  stencil order = %d\n",order);
       fprintf(screen,"  estimated absolute RMS force accuracy = %g\n",
               estimated_accuracy);
       fprintf(screen,"  estimated relative force accuracy = %g\n",
               estimated_accuracy/two_charge_force);
       fprintf(screen,"  using %s precision FFTs\n",fft_prec);
       fprintf(screen,"  3d grid and FFT values/proc = %d %d\n",
               ngrid_max,nfft_both_max);
     }
     if (logfile) {
       fprintf(logfile,"  G vector (1/distance) = %g\n",g_ewald);
       fprintf(logfile,"  grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
       fprintf(logfile,"  stencil order = %d\n",order);
       fprintf(logfile,"  estimated absolute RMS force accuracy = %g\n",
               estimated_accuracy);
       fprintf(logfile,"  estimated relative force accuracy = %g\n",
               estimated_accuracy/two_charge_force);
       fprintf(logfile,"  using %s precision FFTs\n",fft_prec);
       fprintf(logfile,"  3d grid and FFT values/proc = %d %d\n",
               ngrid_max,nfft_both_max);
     }
   }
 
   // allocate K-space dependent memory
   // don't invoke allocate_peratom(), compute() will allocate when needed
 
   allocate();
   cg->ghost_notify();
   cg->setup();
 
   // pre-compute Green's function denomiator expansion
   // pre-compute 1d charge distribution coefficients
 
   compute_gf_denom();
   if (differentiation_flag == 1) compute_sf_precoeff();
   compute_rho_coeff();
 }
 
 /* ----------------------------------------------------------------------
    adjust PPPM coeffs, called initially and whenever volume has changed
 ------------------------------------------------------------------------- */
 
 void PPPM::setup()
 {
   int i,j,k,n;
   double *prd;
 
   // volume-dependent factors
   // adjust z dimension for 2d slab PPPM
   // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
   volume = xprd * yprd * zprd_slab;
 
   delxinv = nx_pppm/xprd;
   delyinv = ny_pppm/yprd;
   delzinv = nz_pppm/zprd_slab;
 
   delvolinv = delxinv*delyinv*delzinv;
 
   double unitkx = (MY_2PI/xprd);
   double unitky = (MY_2PI/yprd);
   double unitkz = (MY_2PI/zprd_slab);
 
   // fkx,fky,fkz for my FFT grid pts
 
   double per;
 
   for (i = nxlo_fft; i <= nxhi_fft; i++) {
     per = i - nx_pppm*(2*i/nx_pppm);
     fkx[i] = unitkx*per;
   }
 
   for (i = nylo_fft; i <= nyhi_fft; i++) {
     per = i - ny_pppm*(2*i/ny_pppm);
     fky[i] = unitky*per;
   }
 
   for (i = nzlo_fft; i <= nzhi_fft; i++) {
     per = i - nz_pppm*(2*i/nz_pppm);
     fkz[i] = unitkz*per;
   }
 
   // virial coefficients
 
   double sqk,vterm;
 
   n = 0;
   for (k = nzlo_fft; k <= nzhi_fft; k++) {
     for (j = nylo_fft; j <= nyhi_fft; j++) {
       for (i = nxlo_fft; i <= nxhi_fft; i++) {
         sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k];
         if (sqk == 0.0) {
           vg[n][0] = 0.0;
           vg[n][1] = 0.0;
           vg[n][2] = 0.0;
           vg[n][3] = 0.0;
           vg[n][4] = 0.0;
           vg[n][5] = 0.0;
         } else {
           vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald));
           vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i];
           vg[n][1] = 1.0 + vterm*fky[j]*fky[j];
           vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k];
           vg[n][3] = vterm*fkx[i]*fky[j];
           vg[n][4] = vterm*fkx[i]*fkz[k];
           vg[n][5] = vterm*fky[j]*fkz[k];
         }
         n++;
       }
     }
   }
 
   if (differentiation_flag == 1) compute_gf_ad();
   else compute_gf_ik();
 }
 
 /* ----------------------------------------------------------------------
    reset local grid arrays and communication stencils
    called by fix balance b/c it changed sizes of processor sub-domains
 ------------------------------------------------------------------------- */
 
 void PPPM::setup_grid()
 {
   // free all arrays previously allocated
 
   deallocate();
   deallocate_peratom();
   peratom_allocate_flag = 0;
   deallocate_groups();
   group_allocate_flag = 0;
 
   // reset portion of global grid that each proc owns
 
   set_grid_local();
 
   // reallocate K-space dependent memory
   // check if grid communication is now overlapping if not allowed
   // don't invoke allocate_peratom(), compute() will allocate when needed
 
   allocate();
 
   cg->ghost_notify();
   if (overlap_allowed == 0 && cg->ghost_overlap())
     error->all(FLERR,"PPPM grid stencil extends "
                "beyond nearest neighbor processor");
   cg->setup();
 
   // pre-compute Green's function denomiator expansion
   // pre-compute 1d charge distribution coefficients
 
   compute_gf_denom();
   if (differentiation_flag == 1) compute_sf_precoeff();
   compute_rho_coeff();
 
   // pre-compute volume-dependent coeffs
 
   setup();
 }
 
 /* ----------------------------------------------------------------------
    compute the PPPM long-range force, energy, virial
 ------------------------------------------------------------------------- */
 
 void PPPM::compute(int eflag, int vflag)
 {
   int i,j;
 
   // set energy/virial flags
   // invoke allocate_peratom() if needed for first time
 
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = evflag_atom = eflag_global = vflag_global =
          eflag_atom = vflag_atom = 0;
 
   if (evflag_atom && !peratom_allocate_flag) {
     allocate_peratom();
     cg_peratom->ghost_notify();
     cg_peratom->setup();
     peratom_allocate_flag = 1;
   }
 
   // convert atoms from box to lamda coords
 
   if (triclinic == 0) boxlo = domain->boxlo;
   else {
     boxlo = domain->boxlo_lamda;
     domain->x2lamda(atom->nlocal);
   }
 
   // extend size of per-atom arrays if necessary
 
   if (atom->nlocal > nmax) {
     memory->destroy(part2grid);
     nmax = atom->nmax;
     memory->create(part2grid,nmax,3,"pppm:part2grid");
   }
 
   // find grid points for all my particles
   // map my particle charge onto my local 3d density grid
 
   particle_map();
   make_rho();
 
   // all procs communicate density values from their ghost cells
   //   to fully sum contribution in their 3d bricks
   // remap from 3d decomposition to FFT decomposition
 
   cg->reverse_comm(this,REVERSE_RHO);
   brick2fft();
 
   // compute potential gradient on my FFT grid and
   //   portion of e_long on this proc's FFT grid
   // return gradients (electric fields) in 3d brick decomposition
   // also performs per-atom calculations via poisson_peratom()
 
   poisson();
 
   // all procs communicate E-field values
   // to fill ghost cells surrounding their 3d bricks
 
   if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD);
   else cg->forward_comm(this,FORWARD_IK);
 
   // extra per-atom energy/virial communication
 
   if (evflag_atom) {
     if (differentiation_flag == 1 && vflag_atom) 
       cg_peratom->forward_comm(this,FORWARD_AD_PERATOM);
     else if (differentiation_flag == 0)
       cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
   }
 
   // calculate the force on my particles
 
   fieldforce();
 
   // extra per-atom energy/virial communication
 
   if (evflag_atom) fieldforce_peratom();
 
   // sum global energy across procs and add in volume-dependent term
 
   const double qscale = force->qqrd2e * scale;
 
   if (eflag_global) {
     double energy_all;
     MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
     energy = energy_all;
 
     energy *= 0.5*volume;
     energy -= g_ewald*qsqsum/MY_PIS +
       MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
     energy *= qscale;
   }
 
   // sum global virial across procs
 
   if (vflag_global) {
     double virial_all[6];
     MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
     for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
   }
 
   // per-atom energy/virial
   // energy includes self-energy correction
 
   if (evflag_atom) {
     double *q = atom->q;
     int nlocal = atom->nlocal;
 
     if (eflag_atom) {
       for (i = 0; i < nlocal; i++) {
         eatom[i] *= 0.5;
         eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
           (g_ewald*g_ewald*volume);
         eatom[i] *= qscale;
       }
     }
 
     if (vflag_atom) {
       for (i = 0; i < nlocal; i++)
         for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale;
     }
   }
 
   // 2d slab correction
 
   if (slabflag == 1) slabcorr();
 
   // convert atoms back from lamda to box coords
 
   if (triclinic) domain->lamda2x(atom->nlocal);
 }
 
 /* ----------------------------------------------------------------------
    allocate memory that depends on # of K-vectors and order
 ------------------------------------------------------------------------- */
 
 void PPPM::allocate()
 {
   memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
                           nxlo_out,nxhi_out,"pppm:density_brick");
 
   memory->create(density_fft,nfft_both,"pppm:density_fft");
   memory->create(greensfn,nfft_both,"pppm:greensfn");
   memory->create(work1,2*nfft_both,"pppm:work1");
   memory->create(work2,2*nfft_both,"pppm:work2");
   memory->create(vg,nfft_both,6,"pppm:vg");
 
   memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm:fkx");
   memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm:fky");
   memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm:fkz");
 
   if (differentiation_flag == 1) {
     memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
                           nxlo_out,nxhi_out,"pppm:u_brick");
 
     memory->create(sf_precoeff1,nfft_both,"pppm:sf_precoeff1");
     memory->create(sf_precoeff2,nfft_both,"pppm:sf_precoeff2");
     memory->create(sf_precoeff3,nfft_both,"pppm:sf_precoeff3");
     memory->create(sf_precoeff4,nfft_both,"pppm:sf_precoeff4");
     memory->create(sf_precoeff5,nfft_both,"pppm:sf_precoeff5");
     memory->create(sf_precoeff6,nfft_both,"pppm:sf_precoeff6");
 
   } else {
     memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
                             nxlo_out,nxhi_out,"pppm:vdx_brick");
     memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
                             nxlo_out,nxhi_out,"pppm:vdy_brick");
     memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
                             nxlo_out,nxhi_out,"pppm:vdz_brick");
   }
 
   // summation coeffs
 
   memory->create(gf_b,order,"pppm:gf_b");
   memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d");
   memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm:drho1d");
   memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff");
   memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2,
                           "pppm:drho_coeff");
 
   // create 2 FFTs and a Remap
   // 1st FFT keeps data in FFT decompostion
   // 2nd FFT returns data in 3d brick decomposition
   // remap takes data from 3d brick to FFT decomposition
 
   int tmp;
 
   fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
                    nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
                    nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
                    0,0,&tmp);
 
   fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
                    nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
                    nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                    0,0,&tmp);
 
   remap = new Remap(lmp,world,
                     nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                     nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
                     1,0,0,FFT_PRECISION);
 
   // create ghost grid object for rho and electric field communication
 
   int (*procneigh)[2] = comm->procneigh;
 
   if (differentiation_flag == 1)
     cg = new CommGrid(lmp,world,1,1,
                       nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                       nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                       procneigh[0][0],procneigh[0][1],procneigh[1][0],
                       procneigh[1][1],procneigh[2][0],procneigh[2][1]);
   else
     cg = new CommGrid(lmp,world,3,1,
                       nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                       nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                       procneigh[0][0],procneigh[0][1],procneigh[1][0],
                       procneigh[1][1],procneigh[2][0],procneigh[2][1]);
 }
 
 /* ----------------------------------------------------------------------
    allocate per-atom memory that depends on # of K-vectors and order
 ------------------------------------------------------------------------- */
 
 void PPPM::allocate_peratom()
 {
   if (differentiation_flag != 1)
     memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
                             nxlo_out,nxhi_out,"pppm:u_brick");
 
   memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
                           nxlo_out,nxhi_out,"pppm:v0_brick");
   memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
                           nxlo_out,nxhi_out,"pppm:v1_brick");
   memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
                           nxlo_out,nxhi_out,"pppm:v2_brick");
   memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
                           nxlo_out,nxhi_out,"pppm:v3_brick");
   memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
                           nxlo_out,nxhi_out,"pppm:v4_brick");
   memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
                           nxlo_out,nxhi_out,"pppm:v5_brick");
 
   // create ghost grid object for rho and electric field communication
 
   int (*procneigh)[2] = comm->procneigh;
 
   if (differentiation_flag == 1)
     cg_peratom =
       new CommGrid(lmp,world,6,1,
                    nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                    nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                    procneigh[0][0],procneigh[0][1],procneigh[1][0],
                    procneigh[1][1],procneigh[2][0],procneigh[2][1]);
   else
     cg_peratom =
       new CommGrid(lmp,world,7,1,
                    nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                    nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                    procneigh[0][0],procneigh[0][1],procneigh[1][0],
                    procneigh[1][1],procneigh[2][0],procneigh[2][1]);
 }
 
 /* ----------------------------------------------------------------------
    deallocate memory that depends on # of K-vectors and order
 ------------------------------------------------------------------------- */
 
 void PPPM::deallocate()
 {
   memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out);
 
   if (differentiation_flag == 1) {
     memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out);
     memory->destroy(sf_precoeff1);
     memory->destroy(sf_precoeff2);
     memory->destroy(sf_precoeff3);
     memory->destroy(sf_precoeff4);
     memory->destroy(sf_precoeff5);
     memory->destroy(sf_precoeff6);
   } else {
     memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out);
     memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out);
     memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out);
   }
 
   memory->destroy(density_fft);
   memory->destroy(greensfn);
   memory->destroy(work1);
   memory->destroy(work2);
   memory->destroy(vg);
 
   memory->destroy1d_offset(fkx,nxlo_fft);
   memory->destroy1d_offset(fky,nylo_fft);
   memory->destroy1d_offset(fkz,nzlo_fft);
 
   memory->destroy(gf_b);
   memory->destroy2d_offset(rho1d,-order/2);
   memory->destroy2d_offset(drho1d,-order/2);
   memory->destroy2d_offset(rho_coeff,(1-order)/2);
   memory->destroy2d_offset(drho_coeff,(1-order)/2);
 
   delete fft1;
   delete fft2;
   delete remap;
   delete cg;
 }
 
 /* ----------------------------------------------------------------------
    deallocate per-atom memory that depends on # of K-vectors and order
 ------------------------------------------------------------------------- */
 
 void PPPM::deallocate_peratom()
 {
   memory->destroy3d_offset(v0_brick,nzlo_out,nylo_out,nxlo_out);
   memory->destroy3d_offset(v1_brick,nzlo_out,nylo_out,nxlo_out);
   memory->destroy3d_offset(v2_brick,nzlo_out,nylo_out,nxlo_out);
   memory->destroy3d_offset(v3_brick,nzlo_out,nylo_out,nxlo_out);
   memory->destroy3d_offset(v4_brick,nzlo_out,nylo_out,nxlo_out);
   memory->destroy3d_offset(v5_brick,nzlo_out,nylo_out,nxlo_out);
 
   if (differentiation_flag != 1)
     memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out);
 
   delete cg_peratom;
 }
 
 /* ----------------------------------------------------------------------
    set global size of PPPM grid = nx,ny,nz_pppm
    used for charge accumulation, FFTs, and electric field interpolation
 ------------------------------------------------------------------------- */
 
 void PPPM::set_grid_global()
 {
   // use xprd,yprd,zprd even if triclinic so grid size is the same
   // adjust z dimension for 2d slab PPPM
   // 3d PPPM just uses zprd since slab_volfactor = 1.0
 
   double xprd = domain->xprd;
   double yprd = domain->yprd;
   double zprd = domain->zprd;
   double zprd_slab = zprd*slab_volfactor;
 
   // make initial g_ewald estimate
   // based on desired accuracy and real space cutoff
   // fluid-occupied volume used to estimate real-space error
   // zprd used rather than zprd_slab
 
   double h,h_x,h_y,h_z;
   bigint natoms = atom->natoms;
 
   if (!gewaldflag) {
     if (accuracy <= 0.0)
       error->all(FLERR,"KSpace accuracy must be > 0");
     g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2);
     if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff;
     else g_ewald = sqrt(-log(g_ewald)) / cutoff;
   }
 
   // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy
   // nz_pppm uses extended zprd_slab instead of zprd
   // reduce it until accuracy target is met
 
   if (!gridflag) {
 
     if (differentiation_flag == 1) {
 
       h = h_x = h_y = h_z = 4.0/g_ewald;
       int count = 0;
       while (1) {
 
         // set grid dimension
         nx_pppm = static_cast<int> (xprd/h_x);
         ny_pppm = static_cast<int> (yprd/h_y);
         nz_pppm = static_cast<int> (zprd_slab/h_z);
 
         if (nx_pppm <= 1) nx_pppm = 2;
         if (ny_pppm <= 1) ny_pppm = 2;
         if (nz_pppm <= 1) nz_pppm = 2;
 
         //set local grid dimension
         int npey_fft,npez_fft;
         if (nz_pppm >= nprocs) {
           npey_fft = 1;
           npez_fft = nprocs;
         } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
 
         int me_y = me % npey_fft;
         int me_z = me / npey_fft;
 
         nxlo_fft = 0;
         nxhi_fft = nx_pppm - 1;
         nylo_fft = me_y*ny_pppm/npey_fft;
         nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
         nzlo_fft = me_z*nz_pppm/npez_fft;
         nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
 
         double df_kspace = compute_df_kspace();
 
         count++;
 
         // break loop if the accuracy has been reached or
         // too many loops have been performed
 
         if (df_kspace <= accuracy) break;
-        if (count > 500) error->all(FLERR, "Could not compute grid size!");
+        if (count > 500) error->all(FLERR, "Could not compute grid size");
         h *= 0.95;
         h_x = h_y = h_z = h;
       }
 
     } else {
 
       double err;
       h_x = h_y = h_z = 1.0/g_ewald;
 
       nx_pppm = static_cast<int> (xprd/h_x) + 1;
       ny_pppm = static_cast<int> (yprd/h_y) + 1;
       nz_pppm = static_cast<int> (zprd_slab/h_z) + 1;
 
       err = estimate_ik_error(h_x,xprd,natoms);
       while (err > accuracy) {
         err = estimate_ik_error(h_x,xprd,natoms);
         nx_pppm++;
         h_x = xprd/nx_pppm;
       }
 
       err = estimate_ik_error(h_y,yprd,natoms);
       while (err > accuracy) {
         err = estimate_ik_error(h_y,yprd,natoms);
         ny_pppm++;
         h_y = yprd/ny_pppm;
       }
 
       err = estimate_ik_error(h_z,zprd_slab,natoms);
       while (err > accuracy) {
         err = estimate_ik_error(h_z,zprd_slab,natoms);
         nz_pppm++;
         h_z = zprd_slab/nz_pppm;
       }
     }
   }
 
   // boost grid size until it is factorable
 
   while (!factorable(nx_pppm)) nx_pppm++;
   while (!factorable(ny_pppm)) ny_pppm++;
   while (!factorable(nz_pppm)) nz_pppm++;
 
   if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET)
     error->all(FLERR,"PPPM grid is too large");
 }
 
 /* ----------------------------------------------------------------------
    check if all factors of n are in list of factors
    return 1 if yes, 0 if no
 ------------------------------------------------------------------------- */
 
 int PPPM::factorable(int n)
 {
   int i;
 
   while (n > 1) {
     for (i = 0; i < nfactors; i++) {
       if (n % factors[i] == 0) {
         n /= factors[i];
         break;
       }
     }
     if (i == nfactors) return 0;
   }
 
   return 1;
 }
 
 /* ----------------------------------------------------------------------
    compute estimated kspace force error
 ------------------------------------------------------------------------- */
 
 double PPPM::compute_df_kspace()
 {
   double xprd = domain->xprd;
   double yprd = domain->yprd;
   double zprd = domain->zprd;
   double zprd_slab = zprd*slab_volfactor;
   bigint natoms = atom->natoms;
   double df_kspace = 0.0;
   if (differentiation_flag == 1) {
     double qopt = compute_qopt();
     df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);
   } else {
     double lprx = estimate_ik_error(xprd/nx_pppm,xprd,natoms);
     double lpry = estimate_ik_error(yprd/ny_pppm,yprd,natoms);
     double lprz = estimate_ik_error(zprd_slab/nz_pppm,zprd_slab,natoms);
     df_kspace = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0);
   }
   return df_kspace;
 }
 
 /* ----------------------------------------------------------------------
    compute qopt
 ------------------------------------------------------------------------- */
 
 double PPPM::compute_qopt()
 {
   double qopt = 0.0;
   double *prd = (triclinic==0) ? domain->prd : domain->prd_lamda;
   
   const double xprd = prd[0];
   const double yprd = prd[1];
   const double zprd = prd[2];
   const double zprd_slab = zprd*slab_volfactor;
   volume = xprd * yprd * zprd_slab;
 
   const double unitkx = (MY_2PI/xprd);
   const double unitky = (MY_2PI/yprd);
   const double unitkz = (MY_2PI/zprd_slab);
 
   double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
   double u1, u2, sqk;
   double sum1,sum2,sum3,sum4,dot2;
 
   int k,l,m,nx,ny,nz;
   const int twoorder = 2*order;
 
   for (m = nzlo_fft; m <= nzhi_fft; m++) {
     const int mper = m - nz_pppm*(2*m/nz_pppm);
 
     for (l = nylo_fft; l <= nyhi_fft; l++) {
       const int lper = l - ny_pppm*(2*l/ny_pppm);
 
       for (k = nxlo_fft; k <= nxhi_fft; k++) {
         const int kper = k - nx_pppm*(2*k/nx_pppm);
 
         sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
 
         if (sqk != 0.0) {
 
           sum1 = 0.0;
           sum2 = 0.0;
           sum3 = 0.0;
           sum4 = 0.0;
           for (nx = -2; nx <= 2; nx++) {
             qx = unitkx*(kper+nx_pppm*nx);
             sx = exp(-0.25*square(qx/g_ewald));
             argx = 0.5*qx*xprd/nx_pppm;
             wx = powsinxx(argx,twoorder);
             qx *= qx;
 
             for (ny = -2; ny <= 2; ny++) {
               qy = unitky*(lper+ny_pppm*ny);
               sy = exp(-0.25*square(qy/g_ewald));
               argy = 0.5*qy*yprd/ny_pppm;
               wy = powsinxx(argy,twoorder);
               qy *= qy;
 
               for (nz = -2; nz <= 2; nz++) {
                 qz = unitkz*(mper+nz_pppm*nz);
                 sz = exp(-0.25*square(qz/g_ewald));
                 argz = 0.5*qz*zprd_slab/nz_pppm;
                 wz = powsinxx(argz,twoorder);
                 qz *= qz;
 
                 dot2 = qx+qy+qz;
                 u1   = sx*sy*sz;
                 u2   = wx*wy*wz;
                 sum1 += u1*u1/dot2*MY_4PI*MY_4PI;
                 sum2 += u1 * u2 * MY_4PI;
                 sum3 += u2;
                 sum4 += dot2*u2;
               }
             }
           }
           sum2 *= sum2;
           qopt += sum1 - sum2/(sum3*sum4);
         }
       }
     }
   }
   double qopt_all;
   MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
   return qopt_all;
 }
 
 /* ----------------------------------------------------------------------
    estimate kspace force error for ik method
 ------------------------------------------------------------------------- */
 
 double PPPM::estimate_ik_error(double h, double prd, bigint natoms)
 {
   double sum = 0.0;
   for (int m = 0; m < order; m++)
     sum += acons[order][m] * pow(h*g_ewald,2.0*m);
   double value = q2 * pow(h*g_ewald,(double)order) *
     sqrt(g_ewald*prd*sqrt(MY_2PI)*sum/natoms) / (prd*prd);
 
   return value;
 }
 
 /* ----------------------------------------------------------------------
    adjust the g_ewald parameter to near its optimal value
    using a Newton-Raphson solver
 ------------------------------------------------------------------------- */
 
 void PPPM::adjust_gewald()
 {
   double dx;
 
   for (int i = 0; i < LARGE; i++) {
     dx = newton_raphson_f() / derivf();
     g_ewald -= dx;
     if (fabs(newton_raphson_f()) < SMALL) return;
   }
 
   char str[128];
   sprintf(str, "Could not compute g_ewald");
   error->all(FLERR, str);
 }
 
 /* ----------------------------------------------------------------------
  Calculate f(x) using Newton-Raphson solver
  ------------------------------------------------------------------------- */
 
 double PPPM::newton_raphson_f()
 {
   double xprd = domain->xprd;
   double yprd = domain->yprd;
   double zprd = domain->zprd;
   bigint natoms = atom->natoms;
 
   double df_rspace = 2.0*q2*exp(-g_ewald*g_ewald*cutoff*cutoff) /
        sqrt(natoms*cutoff*xprd*yprd*zprd);
 
   double df_kspace = compute_df_kspace();
 
   return df_rspace - df_kspace;
 }
 
 /* ----------------------------------------------------------------------
  Calculate numerical derivative f'(x) using forward difference
  [f(x + h) - f(x)] / h
  ------------------------------------------------------------------------- */
 
 double PPPM::derivf()
 {
   double h = 0.000001;  //Derivative step-size
   double df,f1,f2,g_ewald_old;
 
   f1 = newton_raphson_f();
   g_ewald_old = g_ewald;
   g_ewald += h;
   f2 = newton_raphson_f();
   g_ewald = g_ewald_old;
   df = (f2 - f1)/h;
 
   return df;
 }
 
 /* ----------------------------------------------------------------------
    Calculate the final estimate of the accuracy
 ------------------------------------------------------------------------- */
 
 double PPPM::final_accuracy()
 {
   double xprd = domain->xprd;
   double yprd = domain->yprd;
   double zprd = domain->zprd;
   double zprd_slab = zprd*slab_volfactor;
   bigint natoms = atom->natoms;
 
   double df_kspace = compute_df_kspace();
   double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd_slab);
   double df_rspace = 2.0 * q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff);
   double df_table = estimate_table_accuracy(q2_over_sqrt,df_rspace);
   double estimated_accuracy = sqrt(df_kspace*df_kspace + df_rspace*df_rspace +
    df_table*df_table);
 
   return estimated_accuracy;
 }
 
 /* ----------------------------------------------------------------------
    set local subset of PPPM/FFT grid that I own
    n xyz lo/hi in = 3d brick that I own (inclusive)
    n xyz lo/hi out = 3d brick + ghost cells in 6 directions (inclusive)
    n xyz lo/hi fft = FFT columns that I own (all of x dim, 2d decomp in yz)
 ------------------------------------------------------------------------- */
 
 void PPPM::set_grid_local()
 {
   // global indices of PPPM grid range from 0 to N-1
   // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
   //   global PPPM grid that I own without ghost cells
   // for slab PPPM, assign z grid as if it were not extended
 
   nxlo_in = static_cast<int> (comm->xsplit[comm->myloc[0]] * nx_pppm);
   nxhi_in = static_cast<int> (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1;
 
   nylo_in = static_cast<int> (comm->ysplit[comm->myloc[1]] * ny_pppm);
   nyhi_in = static_cast<int> (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1;
 
   nzlo_in = static_cast<int>
       (comm->zsplit[comm->myloc[2]] * nz_pppm/slab_volfactor);
   nzhi_in = static_cast<int>
       (comm->zsplit[comm->myloc[2]+1] * nz_pppm/slab_volfactor) - 1;
 
   // nlower,nupper = stencil size for mapping particles to PPPM grid
 
   nlower = -(order-1)/2;
   nupper = order/2;
 
   // shift values for particle <-> grid mapping
   // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
 
   if (order % 2) shift = OFFSET + 0.5;
   else shift = OFFSET;
   if (order % 2) shiftone = 0.0;
   else shiftone = 0.5;
 
   // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of
   //   global PPPM grid that my particles can contribute charge to
   // effectively nlo_in,nhi_in + ghost cells
   // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest
   //           position a particle in my box can be at
   // dist[3] = particle position bound = subbox + skin/2.0 + qdist
   //   qdist = offset due to TIP4P fictitious charge
   //   convert to triclinic if necessary
   // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping
   // for slab PPPM, assign z grid as if it were not extended
 
   double *prd,*sublo,*subhi;
 
   if (triclinic == 0) {
     prd = domain->prd;
     boxlo = domain->boxlo;
     sublo = domain->sublo;
     subhi = domain->subhi;
   } else {
     prd = domain->prd_lamda;
     boxlo = domain->boxlo_lamda;
     sublo = domain->sublo_lamda;
     subhi = domain->subhi_lamda;
   }
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
 
   double dist[3];
   double cuthalf = 0.5*neighbor->skin + qdist;
   if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf;
   else {
     dist[0] = cuthalf/domain->prd[0];
     dist[1] = cuthalf/domain->prd[1];
     dist[2] = cuthalf/domain->prd[2];
   }
 
   int nlo,nhi;
 
   nlo = static_cast<int> ((sublo[0]-dist[0]-boxlo[0]) *
                             nx_pppm/xprd + shift) - OFFSET;
   nhi = static_cast<int> ((subhi[0]+dist[0]-boxlo[0]) *
                             nx_pppm/xprd + shift) - OFFSET;
   nxlo_out = nlo + nlower;
   nxhi_out = nhi + nupper;
 
   nlo = static_cast<int> ((sublo[1]-dist[1]-boxlo[1]) *
                             ny_pppm/yprd + shift) - OFFSET;
   nhi = static_cast<int> ((subhi[1]+dist[1]-boxlo[1]) *
                             ny_pppm/yprd + shift) - OFFSET;
   nylo_out = nlo + nlower;
   nyhi_out = nhi + nupper;
 
   nlo = static_cast<int> ((sublo[2]-dist[2]-boxlo[2]) *
                             nz_pppm/zprd_slab + shift) - OFFSET;
   nhi = static_cast<int> ((subhi[2]+dist[2]-boxlo[2]) *
                             nz_pppm/zprd_slab + shift) - OFFSET;
   nzlo_out = nlo + nlower;
   nzhi_out = nhi + nupper;
 
   // for slab PPPM, change the grid boundary for processors at +z end
   //   to include the empty volume between periodically repeating slabs
   // for slab PPPM, want charge data communicated from -z proc to +z proc,
   //   but not vice versa, also want field data communicated from +z proc to
   //   -z proc, but not vice versa
   // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells)
   // also insure no other procs use ghost cells beyond +z limit
 
   if (slabflag) {
     if (comm->myloc[2] == comm->procgrid[2]-1)
       nzhi_in = nzhi_out = nz_pppm - 1;
     nzhi_out = MIN(nzhi_out,nz_pppm-1);
   }
     
   // decomposition of FFT mesh
   // global indices range from 0 to N-1
   // proc owns entire x-dimension, clumps of columns in y,z dimensions
   // npey_fft,npez_fft = # of procs in y,z dims
   // if nprocs is small enough, proc can own 1 or more entire xy planes,
   //   else proc owns 2d sub-blocks of yz plane
   // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions
   // nlo_fft,nhi_fft = lower/upper limit of the section
   //   of the global FFT mesh that I own
 
   int npey_fft,npez_fft;
   if (nz_pppm >= nprocs) {
     npey_fft = 1;
     npez_fft = nprocs;
   } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
 
   int me_y = me % npey_fft;
   int me_z = me / npey_fft;
 
   nxlo_fft = 0;
   nxhi_fft = nx_pppm - 1;
   nylo_fft = me_y*ny_pppm/npey_fft;
   nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
   nzlo_fft = me_z*nz_pppm/npez_fft;
   nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
 
   // PPPM grid pts owned by this proc, including ghosts
 
   ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
     (nzhi_out-nzlo_out+1);
 
   // FFT grids owned by this proc, without ghosts
   // nfft = FFT points in FFT decomposition on this proc
   // nfft_brick = FFT points in 3d brick-decomposition on this proc
   // nfft_both = greater of 2 values
 
   nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) *
     (nzhi_fft-nzlo_fft+1);
   int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) *
     (nzhi_in-nzlo_in+1);
   nfft_both = MAX(nfft,nfft_brick);
 }
 
 /* ----------------------------------------------------------------------
    pre-compute Green's function denominator expansion coeffs, Gamma(2n)
 ------------------------------------------------------------------------- */
 
 void PPPM::compute_gf_denom()
 {
   int k,l,m;
 
   for (l = 1; l < order; l++) gf_b[l] = 0.0;
   gf_b[0] = 1.0;
 
   for (m = 1; m < order; m++) {
     for (l = m; l > 0; l--)
       gf_b[l] = 4.0 * (gf_b[l]*(l-m)*(l-m-0.5)-gf_b[l-1]*(l-m-1)*(l-m-1));
     gf_b[0] = 4.0 * (gf_b[0]*(l-m)*(l-m-0.5));
   }
 
   int ifact = 1;
   for (k = 1; k < 2*order; k++) ifact *= k;
   double gaminv = 1.0/ifact;
   for (l = 0; l < order; l++) gf_b[l] *= gaminv;
 }
 
 /* ----------------------------------------------------------------------
    pre-compute modified (Hockney-Eastwood) Coulomb Green's function
 ------------------------------------------------------------------------- */
 
 void PPPM::compute_gf_ik()
 {
   const double * const prd = (triclinic==0) ? domain->prd : domain->prd_lamda;
 
   const double xprd = prd[0];
   const double yprd = prd[1];
   const double zprd = prd[2];
   const double zprd_slab = zprd*slab_volfactor;
   const double unitkx = (MY_2PI/xprd);
   const double unitky = (MY_2PI/yprd);
   const double unitkz = (MY_2PI/zprd_slab);
 
   double snx,sny,snz;
   double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
   double sum1,dot1,dot2;
   double numerator,denominator;
   double sqk;
 
   int k,l,m,n,nx,ny,nz,kper,lper,mper;
 
   const int nbx = static_cast<int> ((g_ewald*xprd/(MY_PI*nx_pppm)) *
                                     pow(-log(EPS_HOC),0.25));
   const int nby = static_cast<int> ((g_ewald*yprd/(MY_PI*ny_pppm)) *
                                     pow(-log(EPS_HOC),0.25));
   const int nbz = static_cast<int> ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) *
                                     pow(-log(EPS_HOC),0.25));
   const int twoorder = 2*order;
 
   n = 0;
   for (m = nzlo_fft; m <= nzhi_fft; m++) {
     mper = m - nz_pppm*(2*m/nz_pppm);
     snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm));
 
     for (l = nylo_fft; l <= nyhi_fft; l++) {
       lper = l - ny_pppm*(2*l/ny_pppm);
       sny = square(sin(0.5*unitky*lper*yprd/ny_pppm));
 
       for (k = nxlo_fft; k <= nxhi_fft; k++) {
         kper = k - nx_pppm*(2*k/nx_pppm);
         snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm));
 
         sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
 
         if (sqk != 0.0) {
           numerator = 12.5663706/sqk;
           denominator = gf_denom(snx,sny,snz);
           sum1 = 0.0;
 
           for (nx = -nbx; nx <= nbx; nx++) {
             qx = unitkx*(kper+nx_pppm*nx);
             sx = exp(-0.25*square(qx/g_ewald));
             argx = 0.5*qx*xprd/nx_pppm;
             wx = powsinxx(argx,twoorder);
 
             for (ny = -nby; ny <= nby; ny++) {
               qy = unitky*(lper+ny_pppm*ny);
               sy = exp(-0.25*square(qy/g_ewald));
               argy = 0.5*qy*yprd/ny_pppm;
               wy = powsinxx(argy,twoorder);
 
               for (nz = -nbz; nz <= nbz; nz++) {
                 qz = unitkz*(mper+nz_pppm*nz);
                 sz = exp(-0.25*square(qz/g_ewald));
                 argz = 0.5*qz*zprd_slab/nz_pppm;
                 wz = powsinxx(argz,twoorder);
 
                 dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
                 dot2 = qx*qx+qy*qy+qz*qz;
                 sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz;
               }
             }
           }
           greensfn[n++] = numerator*sum1/denominator;
         } else greensfn[n++] = 0.0;
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    compute optimized Green's function for energy calculation
 ------------------------------------------------------------------------- */
 
 void PPPM::compute_gf_ad()
 {
 
   const double * const prd = (triclinic==0) ? domain->prd : domain->prd_lamda;
 
   const double xprd = prd[0];
   const double yprd = prd[1];
   const double zprd = prd[2];
   const double zprd_slab = zprd*slab_volfactor;
   const double unitkx = (MY_2PI/xprd);
   const double unitky = (MY_2PI/yprd);
   const double unitkz = (MY_2PI/zprd_slab);
 
   double snx,sny,snz,sqk;
   double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
   double numerator,denominator;
   int k,l,m,n,kper,lper,mper;
 
   const int twoorder = 2*order;
 
   for (int i = 0; i < 6; i++) sf_coeff[i] = 0.0;
 
   n = 0;
   for (m = nzlo_fft; m <= nzhi_fft; m++) {
     mper = m - nz_pppm*(2*m/nz_pppm);
     qz = unitkz*mper;
     snz = square(sin(0.5*qz*zprd_slab/nz_pppm));
     sz = exp(-0.25*square(qz/g_ewald));
     argz = 0.5*qz*zprd_slab/nz_pppm;
     wz = powsinxx(argz,twoorder);
 
     for (l = nylo_fft; l <= nyhi_fft; l++) {
       lper = l - ny_pppm*(2*l/ny_pppm);
       qy = unitky*lper;
       sny = square(sin(0.5*qy*yprd/ny_pppm));
       sy = exp(-0.25*square(qy/g_ewald));
       argy = 0.5*qy*yprd/ny_pppm;
       wy = powsinxx(argy,twoorder);
 
       for (k = nxlo_fft; k <= nxhi_fft; k++) {
         kper = k - nx_pppm*(2*k/nx_pppm);
         qx = unitkx*kper;
         snx = square(sin(0.5*qx*xprd/nx_pppm));
         sx = exp(-0.25*square(qx/g_ewald));
         argx = 0.5*qx*xprd/nx_pppm;
         wx = powsinxx(argx,twoorder);
 
         sqk = qx*qx + qy*qy + qz*qz;
 
         if (sqk != 0.0) {
           numerator = MY_4PI/sqk;
           denominator = gf_denom(snx,sny,snz);
           greensfn[n] = numerator*sx*sy*sz*wx*wy*wz/denominator;
           sf_coeff[0] += sf_precoeff1[n]*greensfn[n];
           sf_coeff[1] += sf_precoeff2[n]*greensfn[n];
           sf_coeff[2] += sf_precoeff3[n]*greensfn[n];
           sf_coeff[3] += sf_precoeff4[n]*greensfn[n];
           sf_coeff[4] += sf_precoeff5[n]*greensfn[n];
           sf_coeff[5] += sf_precoeff6[n]*greensfn[n];
           n++;
         } else {
           greensfn[n] = 0.0;
           sf_coeff[0] += sf_precoeff1[n]*greensfn[n];
           sf_coeff[1] += sf_precoeff2[n]*greensfn[n];
           sf_coeff[2] += sf_precoeff3[n]*greensfn[n];
           sf_coeff[3] += sf_precoeff4[n]*greensfn[n];
           sf_coeff[4] += sf_precoeff5[n]*greensfn[n];
           sf_coeff[5] += sf_precoeff6[n]*greensfn[n];
           n++;
         }
       }
     }
   }
 
   // compute the coefficients for the self-force correction
 
   double prex, prey, prez;
   prex = prey = prez = MY_PI/volume;
   prex *= nx_pppm/xprd;
   prey *= ny_pppm/yprd;
   prez *= nz_pppm/zprd_slab;
   sf_coeff[0] *= prex;
   sf_coeff[1] *= prex*2;
   sf_coeff[2] *= prey;
   sf_coeff[3] *= prey*2;
   sf_coeff[4] *= prez;
   sf_coeff[5] *= prez*2;
 
   // communicate values with other procs
 
   double tmp[6];
   MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world);
   for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n];
 }
 
 /* ----------------------------------------------------------------------
    compute self force coefficients for ad-differentiation scheme
 ------------------------------------------------------------------------- */
 
 void PPPM::compute_sf_precoeff()
 {
   int i,k,l,m,n;
   int nx,ny,nz,kper,lper,mper;
   double wx0[5],wy0[5],wz0[5],wx1[5],wy1[5],wz1[5],wx2[5],wy2[5],wz2[5];
   double qx0,qy0,qz0,qx1,qy1,qz1,qx2,qy2,qz2;
   double u0,u1,u2,u3,u4,u5,u6;
   double sum1,sum2,sum3,sum4,sum5,sum6;
 
   n = 0;
   for (m = nzlo_fft; m <= nzhi_fft; m++) {
     mper = m - nz_pppm*(2*m/nz_pppm);
 
     for (l = nylo_fft; l <= nyhi_fft; l++) {
       lper = l - ny_pppm*(2*l/ny_pppm);
 
       for (k = nxlo_fft; k <= nxhi_fft; k++) {
         kper = k - nx_pppm*(2*k/nx_pppm);
 
         sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0;
         for (i = 0; i < 5; i++) {
 
           qx0 = MY_2PI*(kper+nx_pppm*(i-2));
           qx1 = MY_2PI*(kper+nx_pppm*(i-1));
           qx2 = MY_2PI*(kper+nx_pppm*(i  ));
           wx0[i] = powsinxx(0.5*qx0/nx_pppm,order);
           wx1[i] = powsinxx(0.5*qx1/nx_pppm,order);
           wx2[i] = powsinxx(0.5*qx2/nx_pppm,order);
 
           qy0 = MY_2PI*(lper+ny_pppm*(i-2));
           qy1 = MY_2PI*(lper+ny_pppm*(i-1));
           qy2 = MY_2PI*(lper+ny_pppm*(i  ));
           wy0[i] = powsinxx(0.5*qy0/ny_pppm,order);
           wy1[i] = powsinxx(0.5*qy1/ny_pppm,order);
           wy2[i] = powsinxx(0.5*qy2/ny_pppm,order);
 
           qz0 = MY_2PI*(mper+nz_pppm*(i-2));
           qz1 = MY_2PI*(mper+nz_pppm*(i-1));
           qz2 = MY_2PI*(mper+nz_pppm*(i  ));
 
           wz0[i] = powsinxx(0.5*qz0/nz_pppm,order);
           wz1[i] = powsinxx(0.5*qz1/nz_pppm,order);
           wz2[i] = powsinxx(0.5*qz2/nz_pppm,order);
         }
 
         for (nx = 0; nx < 5; nx++) {
           for (ny = 0; ny < 5; ny++) {
             for (nz = 0; nz < 5; nz++) {
               u0 = wx0[nx]*wy0[ny]*wz0[nz];
               u1 = wx1[nx]*wy0[ny]*wz0[nz];
               u2 = wx2[nx]*wy0[ny]*wz0[nz];
               u3 = wx0[nx]*wy1[ny]*wz0[nz];
               u4 = wx0[nx]*wy2[ny]*wz0[nz];
               u5 = wx0[nx]*wy0[ny]*wz1[nz];
               u6 = wx0[nx]*wy0[ny]*wz2[nz];
 
               sum1 += u0*u1;
               sum2 += u0*u2;
               sum3 += u0*u3;
               sum4 += u0*u4;
               sum5 += u0*u5;
               sum6 += u0*u6;
             }
           }
         }
 
         // store values
 
         sf_precoeff1[n] = sum1;
         sf_precoeff2[n] = sum2;
         sf_precoeff3[n] = sum3;
         sf_precoeff4[n] = sum4;
         sf_precoeff5[n] = sum5;
         sf_precoeff6[n++] = sum6;
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    find center grid pt for each of my particles
    check that full stencil for the particle will fit in my 3d brick
    store central grid pt indices in part2grid array
 ------------------------------------------------------------------------- */
 
 void PPPM::particle_map()
 {
   int nx,ny,nz;
 
   double **x = atom->x;
   int nlocal = atom->nlocal;
 
   int flag = 0;
   for (int i = 0; i < nlocal; i++) {
 
     // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
     // current particle coord can be outside global and local box
     // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
 
     nx = static_cast<int> ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET;
     ny = static_cast<int> ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET;
     nz = static_cast<int> ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET;
 
     part2grid[i][0] = nx;
     part2grid[i][1] = ny;
     part2grid[i][2] = nz;
 
     // check that entire stencil around nx,ny,nz will fit in my 3d brick
 
     if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
         ny+nlower < nylo_out || ny+nupper > nyhi_out ||
         nz+nlower < nzlo_out || nz+nupper > nzhi_out)
       flag = 1;
   }
 
   if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM");
 }
 
 /* ----------------------------------------------------------------------
    create discretized "density" on section of global grid due to my particles
    density(x,y,z) = charge "density" at grid points of my 3d brick
    (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
    in global grid
 ------------------------------------------------------------------------- */
 
 void PPPM::make_rho()
 {
   int l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
 
   // clear 3d density array
 
   memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0,
          ngrid*sizeof(FFT_SCALAR));
 
   // loop over my charges, add their contribution to nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
 
   double *q = atom->q;
   double **x = atom->x;
   int nlocal = atom->nlocal;
 
   for (int i = 0; i < nlocal; i++) {
 
     nx = part2grid[i][0];
     ny = part2grid[i][1];
     nz = part2grid[i][2];
     dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
     dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
     dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
 
     compute_rho1d(dx,dy,dz);
 
     z0 = delvolinv * q[i];
     for (n = nlower; n <= nupper; n++) {
       mz = n+nz;
       y0 = z0*rho1d[2][n];
       for (m = nlower; m <= nupper; m++) {
         my = m+ny;
         x0 = y0*rho1d[1][m];
         for (l = nlower; l <= nupper; l++) {
           mx = l+nx;
           density_brick[mz][my][mx] += x0*rho1d[0][l];
         }
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    remap density from 3d brick decomposition to FFT decomposition
 ------------------------------------------------------------------------- */
 
 void PPPM::brick2fft()
 {
   int n,ix,iy,iz;
 
   // copy grabs inner portion of density from 3d brick
   // remap could be done as pre-stage of FFT,
   //   but this works optimally on only double values, not complex values
 
   n = 0;
   for (iz = nzlo_in; iz <= nzhi_in; iz++)
     for (iy = nylo_in; iy <= nyhi_in; iy++)
       for (ix = nxlo_in; ix <= nxhi_in; ix++)
         density_fft[n++] = density_brick[iz][iy][ix];
 
   remap->perform(density_fft,density_fft,work1);
 }
 
 /* ----------------------------------------------------------------------
    FFT-based Poisson solver
 ------------------------------------------------------------------------- */
 
 void PPPM::poisson()
 {
   if (differentiation_flag == 1) poisson_ad();
   else poisson_ik();
 }
 
 /* ----------------------------------------------------------------------
    FFT-based Poisson solver for ik
 ------------------------------------------------------------------------- */
 
 void PPPM::poisson_ik()
 {
   int i,j,k,n;
   double eng;
 
   // transform charge density (r -> k)
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work1[n++] = density_fft[i];
     work1[n++] = ZEROF;
   }
 
   fft1->compute(work1,work1,1);
 
   // global energy and virial contribution
 
   double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
   double s2 = scaleinv*scaleinv;
 
   if (eflag_global || vflag_global) {
     if (vflag_global) {
       n = 0;
       for (i = 0; i < nfft; i++) {
         eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
         for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j];
         if (eflag_global) energy += eng;
         n += 2;
       }
     } else {
       n = 0;
       for (i = 0; i < nfft; i++) {
         energy +=
           s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
         n += 2;
       }
     }
   }
 
   // scale by 1/total-grid-pts to get rho(k)
   // multiply by Green's function to get V(k)
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work1[n++] *= scaleinv * greensfn[i];
     work1[n++] *= scaleinv * greensfn[i];
   }
 
   // extra FFTs for per-atom energy/virial
 
   if (evflag_atom) poisson_peratom();
 
   // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
   // FFT leaves data in 3d brick decomposition
   // copy it into inner portion of vdx,vdy,vdz arrays
 
   // x direction gradient
 
   n = 0;
   for (k = nzlo_fft; k <= nzhi_fft; k++)
     for (j = nylo_fft; j <= nyhi_fft; j++)
       for (i = nxlo_fft; i <= nxhi_fft; i++) {
         work2[n] = fkx[i]*work1[n+1];
         work2[n+1] = -fkx[i]*work1[n];
         n += 2;
       }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in; i <= nxhi_in; i++) {
         vdx_brick[k][j][i] = work2[n];
         n += 2;
       }
 
   // y direction gradient
 
   n = 0;
   for (k = nzlo_fft; k <= nzhi_fft; k++)
     for (j = nylo_fft; j <= nyhi_fft; j++)
       for (i = nxlo_fft; i <= nxhi_fft; i++) {
         work2[n] = fky[j]*work1[n+1];
         work2[n+1] = -fky[j]*work1[n];
         n += 2;
       }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in; i <= nxhi_in; i++) {
         vdy_brick[k][j][i] = work2[n];
         n += 2;
       }
 
   // z direction gradient
 
   n = 0;
   for (k = nzlo_fft; k <= nzhi_fft; k++)
     for (j = nylo_fft; j <= nyhi_fft; j++)
       for (i = nxlo_fft; i <= nxhi_fft; i++) {
         work2[n] = fkz[k]*work1[n+1];
         work2[n+1] = -fkz[k]*work1[n];
         n += 2;
       }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in; i <= nxhi_in; i++) {
         vdz_brick[k][j][i] = work2[n];
         n += 2;
       }
 }
 
 /* ----------------------------------------------------------------------
    FFT-based Poisson solver for ad
 ------------------------------------------------------------------------- */
 
 void PPPM::poisson_ad()
 {
   int i,j,k,n;
   double eng;
 
   // transform charge density (r -> k)
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work1[n++] = density_fft[i];
     work1[n++] = ZEROF;
   }
 
   fft1->compute(work1,work1,1);
 
   // global energy and virial contribution
 
   double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
   double s2 = scaleinv*scaleinv;
 
   if (eflag_global || vflag_global) {
     if (vflag_global) {
       n = 0;
       for (i = 0; i < nfft; i++) {
         eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
         for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j];
         if (eflag_global) energy += eng;
         n += 2;
       }
     } else {
       n = 0;
       for (i = 0; i < nfft; i++) {
         energy +=
           s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
         n += 2;
       }
     }
   }
 
   // scale by 1/total-grid-pts to get rho(k)
   // multiply by Green's function to get V(k)
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work1[n++] *= scaleinv * greensfn[i];
     work1[n++] *= scaleinv * greensfn[i];
   }
 
   // extra FFTs for per-atom energy/virial
 
   if (vflag_atom) poisson_peratom();
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work2[n] = work1[n];
     work2[n+1] = work1[n+1];
     n += 2;
   }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in; i <= nxhi_in; i++) {
         u_brick[k][j][i] = work2[n];
         n += 2;
       }
 }
 
 /* ----------------------------------------------------------------------
    FFT-based Poisson solver for per-atom energy/virial
 ------------------------------------------------------------------------- */
 
 void PPPM::poisson_peratom()
 {
   int i,j,k,n;
 
   // energy
 
   if (eflag_atom && differentiation_flag != 1) {
     n = 0;
     for (i = 0; i < nfft; i++) {
       work2[n] = work1[n];
       work2[n+1] = work1[n+1];
       n += 2;
     }
 
     fft2->compute(work2,work2,-1);
 
     n = 0;
     for (k = nzlo_in; k <= nzhi_in; k++)
       for (j = nylo_in; j <= nyhi_in; j++)
         for (i = nxlo_in; i <= nxhi_in; i++) {
           u_brick[k][j][i] = work2[n];
           n += 2;
         }
   }
 
   // 6 components of virial in v0 thru v5
 
   if (!vflag_atom) return;
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work2[n] = work1[n]*vg[i][0];
     work2[n+1] = work1[n+1]*vg[i][0];
     n += 2;
   }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in; i <= nxhi_in; i++) {
         v0_brick[k][j][i] = work2[n];
         n += 2;
       }
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work2[n] = work1[n]*vg[i][1];
     work2[n+1] = work1[n+1]*vg[i][1];
     n += 2;
   }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in; i <= nxhi_in; i++) {
         v1_brick[k][j][i] = work2[n];
         n += 2;
       }
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work2[n] = work1[n]*vg[i][2];
     work2[n+1] = work1[n+1]*vg[i][2];
     n += 2;
   }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in; i <= nxhi_in; i++) {
         v2_brick[k][j][i] = work2[n];
         n += 2;
       }
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work2[n] = work1[n]*vg[i][3];
     work2[n+1] = work1[n+1]*vg[i][3];
     n += 2;
   }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in; i <= nxhi_in; i++) {
         v3_brick[k][j][i] = work2[n];
         n += 2;
       }
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work2[n] = work1[n]*vg[i][4];
     work2[n+1] = work1[n+1]*vg[i][4];
     n += 2;
   }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in; i <= nxhi_in; i++) {
         v4_brick[k][j][i] = work2[n];
         n += 2;
       }
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work2[n] = work1[n]*vg[i][5];
     work2[n+1] = work1[n+1]*vg[i][5];
     n += 2;
   }
 
   fft2->compute(work2,work2,-1);
 
   n = 0;
   for (k = nzlo_in; k <= nzhi_in; k++)
     for (j = nylo_in; j <= nyhi_in; j++)
       for (i = nxlo_in; i <= nxhi_in; i++) {
         v5_brick[k][j][i] = work2[n];
         n += 2;
       }
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get electric field & force on my particles
 ------------------------------------------------------------------------- */
 
 void PPPM::fieldforce()
 {
   if (differentiation_flag == 1) fieldforce_ad();
   else fieldforce_ik();
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get electric field & force on my particles for ik
 ------------------------------------------------------------------------- */
 
 void PPPM::fieldforce_ik()
 {
   int i,l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
   FFT_SCALAR ekx,eky,ekz;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of E-field on particle
 
   double *q = atom->q;
   double **x = atom->x;
   double **f = atom->f;
 
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
     nx = part2grid[i][0];
     ny = part2grid[i][1];
     nz = part2grid[i][2];
     dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
     dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
     dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
 
     compute_rho1d(dx,dy,dz);
 
     ekx = eky = ekz = ZEROF;
     for (n = nlower; n <= nupper; n++) {
       mz = n+nz;
       z0 = rho1d[2][n];
       for (m = nlower; m <= nupper; m++) {
         my = m+ny;
         y0 = z0*rho1d[1][m];
         for (l = nlower; l <= nupper; l++) {
           mx = l+nx;
           x0 = y0*rho1d[0][l];
           ekx -= x0*vdx_brick[mz][my][mx];
           eky -= x0*vdy_brick[mz][my][mx];
           ekz -= x0*vdz_brick[mz][my][mx];
         }
       }
     }
 
     // convert E-field to force
 
     const double qfactor = force->qqrd2e * scale * q[i];
     f[i][0] += qfactor*ekx;
     f[i][1] += qfactor*eky;
     if (slabflag != 2) f[i][2] += qfactor*ekz;
   }
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get electric field & force on my particles for ad
 ------------------------------------------------------------------------- */
 
 void PPPM::fieldforce_ad()
 {
   int i,l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz;
   FFT_SCALAR ekx,eky,ekz;
   double s1,s2,s3;
   double sf = 0.0;
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
 
   double hx_inv = nx_pppm/xprd;
   double hy_inv = ny_pppm/yprd;
   double hz_inv = nz_pppm/zprd;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of E-field on particle
 
   double *q = atom->q;
   double **x = atom->x;
   double **f = atom->f;
 
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
     nx = part2grid[i][0];
     ny = part2grid[i][1];
     nz = part2grid[i][2];
     dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
     dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
     dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
 
     compute_rho1d(dx,dy,dz);
     compute_drho1d(dx,dy,dz);
 
     ekx = eky = ekz = ZEROF;
     for (n = nlower; n <= nupper; n++) {
       mz = n+nz;
       for (m = nlower; m <= nupper; m++) {
         my = m+ny;
         for (l = nlower; l <= nupper; l++) {
           mx = l+nx;
           ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
           eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
           ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx];
         }
       }
     }
     ekx *= hx_inv;
     eky *= hy_inv;
     ekz *= hz_inv;
 
     // convert E-field to force and substract self forces
 
     const double qfactor = force->qqrd2e * scale;
 
     s1 = x[i][0]*hx_inv;
     s2 = x[i][1]*hy_inv;
     s3 = x[i][2]*hz_inv;
     sf = sf_coeff[0]*sin(2*MY_PI*s1);
     sf += sf_coeff[1]*sin(4*MY_PI*s1);
     sf *= 2*q[i]*q[i];
     f[i][0] += qfactor*(ekx*q[i] - sf);
 
     sf = sf_coeff[2]*sin(2*MY_PI*s2);
     sf += sf_coeff[3]*sin(4*MY_PI*s2);
     sf *= 2*q[i]*q[i];
     f[i][1] += qfactor*(eky*q[i] - sf);
 
 
     sf = sf_coeff[4]*sin(2*MY_PI*s3);
     sf += sf_coeff[5]*sin(4*MY_PI*s3);
     sf *= 2*q[i]*q[i];
     if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf);
   }
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get per-atom energy/virial
 ------------------------------------------------------------------------- */
 
 void PPPM::fieldforce_peratom()
 {
   int i,l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
   FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
 
   // loop over my charges, interpolate from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
 
   double *q = atom->q;
   double **x = atom->x;
 
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
     nx = part2grid[i][0];
     ny = part2grid[i][1];
     nz = part2grid[i][2];
     dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
     dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
     dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
 
     compute_rho1d(dx,dy,dz);
 
     u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
     for (n = nlower; n <= nupper; n++) {
       mz = n+nz;
       z0 = rho1d[2][n];
       for (m = nlower; m <= nupper; m++) {
         my = m+ny;
         y0 = z0*rho1d[1][m];
         for (l = nlower; l <= nupper; l++) {
           mx = l+nx;
           x0 = y0*rho1d[0][l];
           if (eflag_atom) u += x0*u_brick[mz][my][mx];
           if (vflag_atom) {
             v0 += x0*v0_brick[mz][my][mx];
             v1 += x0*v1_brick[mz][my][mx];
             v2 += x0*v2_brick[mz][my][mx];
             v3 += x0*v3_brick[mz][my][mx];
             v4 += x0*v4_brick[mz][my][mx];
             v5 += x0*v5_brick[mz][my][mx];
           }
         }
       }
     }
 
     if (eflag_atom) eatom[i] += q[i]*u;
     if (vflag_atom) {
       vatom[i][0] += q[i]*v0;
       vatom[i][1] += q[i]*v1;
       vatom[i][2] += q[i]*v2;
       vatom[i][3] += q[i]*v3;
       vatom[i][4] += q[i]*v4;
       vatom[i][5] += q[i]*v5;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    pack own values to buf to send to another proc
 ------------------------------------------------------------------------- */
 
 void PPPM::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
 {
   int n = 0;
 
   if (flag == FORWARD_IK) {
     FFT_SCALAR *xsrc = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *ysrc = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *zsrc = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       buf[n++] = xsrc[list[i]];
       buf[n++] = ysrc[list[i]];
       buf[n++] = zsrc[list[i]];
     }
   } else if (flag == FORWARD_AD) {
     FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++)
       buf[i] = src[list[i]];
   } else if (flag == FORWARD_IK_PERATOM) {
     FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       if (eflag_atom) buf[n++] = esrc[list[i]];
       if (vflag_atom) {
         buf[n++] = v0src[list[i]];
         buf[n++] = v1src[list[i]];
         buf[n++] = v2src[list[i]];
         buf[n++] = v3src[list[i]];
         buf[n++] = v4src[list[i]];
         buf[n++] = v5src[list[i]];
       }
     }
   } else if (flag == FORWARD_AD_PERATOM) {
     FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       buf[n++] = v0src[list[i]];
       buf[n++] = v1src[list[i]];
       buf[n++] = v2src[list[i]];
       buf[n++] = v3src[list[i]];
       buf[n++] = v4src[list[i]];
       buf[n++] = v5src[list[i]];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    unpack another proc's own values from buf and set own ghost values
 ------------------------------------------------------------------------- */
 
 void PPPM::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
 {
   int n = 0;
 
   if (flag == FORWARD_IK) {
     FFT_SCALAR *xdest = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *ydest = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *zdest = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       xdest[list[i]] = buf[n++];
       ydest[list[i]] = buf[n++];
       zdest[list[i]] = buf[n++];
     }
   } else if (flag == FORWARD_AD) {
     FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++)
       dest[list[i]] = buf[i];
   } else if (flag == FORWARD_IK_PERATOM) {
     FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       if (eflag_atom) esrc[list[i]] = buf[n++];
       if (vflag_atom) {
         v0src[list[i]] = buf[n++];
         v1src[list[i]] = buf[n++];
         v2src[list[i]] = buf[n++];
         v3src[list[i]] = buf[n++];
         v4src[list[i]] = buf[n++];
         v5src[list[i]] = buf[n++];
       }
     }
   } else if (flag == FORWARD_AD_PERATOM) {
     FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       v0src[list[i]] = buf[n++];
       v1src[list[i]] = buf[n++];
       v2src[list[i]] = buf[n++];
       v3src[list[i]] = buf[n++];
       v4src[list[i]] = buf[n++];
       v5src[list[i]] = buf[n++];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    pack ghost values into buf to send to another proc
 ------------------------------------------------------------------------- */
 
 void PPPM::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
 {
   if (flag == REVERSE_RHO) {
     FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++)
       buf[i] = src[list[i]];
   }
 }
 
 /* ----------------------------------------------------------------------
    unpack another proc's ghost values from buf and add to own values
 ------------------------------------------------------------------------- */
 
 void PPPM::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
 {
   if (flag == REVERSE_RHO) {
     FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++)
       dest[list[i]] += buf[i];
   } 
 }
 
 /* ----------------------------------------------------------------------
    map nprocs to NX by NY grid as PX by PY procs - return optimal px,py
 ------------------------------------------------------------------------- */
 
 void PPPM::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py)
 {
   // loop thru all possible factorizations of nprocs
   // surf = surface area of largest proc sub-domain
   // innermost if test minimizes surface area and surface/volume ratio
 
   int bestsurf = 2 * (nx + ny);
   int bestboxx = 0;
   int bestboxy = 0;
 
   int boxx,boxy,surf,ipx,ipy;
 
   ipx = 1;
   while (ipx <= nprocs) {
     if (nprocs % ipx == 0) {
       ipy = nprocs/ipx;
       boxx = nx/ipx;
       if (nx % ipx) boxx++;
       boxy = ny/ipy;
       if (ny % ipy) boxy++;
       surf = boxx + boxy;
       if (surf < bestsurf ||
           (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) {
         bestsurf = surf;
         bestboxx = boxx;
         bestboxy = boxy;
         *px = ipx;
         *py = ipy;
       }
     }
     ipx++;
   }
 }
 
 /* ----------------------------------------------------------------------
    charge assignment into rho1d
    dx,dy,dz = distance of particle from "lower left" grid point
 ------------------------------------------------------------------------- */
 
 void PPPM::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
                          const FFT_SCALAR &dz)
 {
   int k,l;
   FFT_SCALAR r1,r2,r3;
 
   for (k = (1-order)/2; k <= order/2; k++) {
     r1 = r2 = r3 = ZEROF;
 
     for (l = order-1; l >= 0; l--) {
       r1 = rho_coeff[l][k] + r1*dx;
       r2 = rho_coeff[l][k] + r2*dy;
       r3 = rho_coeff[l][k] + r3*dz;
     }
     rho1d[0][k] = r1;
     rho1d[1][k] = r2;
     rho1d[2][k] = r3;
   }
 }
 
 /* ----------------------------------------------------------------------
    charge assignment into drho1d
    dx,dy,dz = distance of particle from "lower left" grid point
 ------------------------------------------------------------------------- */
 
 void PPPM::compute_drho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
                           const FFT_SCALAR &dz)
 {
   int k,l;
   FFT_SCALAR r1,r2,r3;
 
   for (k = (1-order)/2; k <= order/2; k++) {
     r1 = r2 = r3 = ZEROF;
 
     for (l = order-2; l >= 0; l--) {
       r1 = drho_coeff[l][k] + r1*dx;
       r2 = drho_coeff[l][k] + r2*dy;
       r3 = drho_coeff[l][k] + r3*dz;
     }
     drho1d[0][k] = r1;
     drho1d[1][k] = r2;
     drho1d[2][k] = r3;
   }
 }
 
 /* ----------------------------------------------------------------------
    generate coeffients for the weight function of order n
 
               (n-1)
   Wn(x) =     Sum    wn(k,x) , Sum is over every other integer
            k=-(n-1)
   For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1
       k is odd integers if n is even and even integers if n is odd
               ---
              | n-1
              | Sum a(l,j)*(x-k/2)**l   if abs(x-k/2) < 1/2
   wn(k,x) = <  l=0
              |
              |  0                       otherwise
               ---
   a coeffients are packed into the array rho_coeff to eliminate zeros
   rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k)
 ------------------------------------------------------------------------- */
 
 void PPPM::compute_rho_coeff()
 {
   int j,k,l,m;
   FFT_SCALAR s;
 
   FFT_SCALAR **a;
   memory->create2d_offset(a,order,-order,order,"pppm:a");
 
   for (k = -order; k <= order; k++)
     for (l = 0; l < order; l++)
       a[l][k] = 0.0;
 
   a[0][0] = 1.0;
   for (j = 1; j < order; j++) {
     for (k = -j; k <= j; k += 2) {
       s = 0.0;
       for (l = 0; l < j; l++) {
         a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1);
 #ifdef FFT_SINGLE
         s += powf(0.5,(float) l+1) *
           (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1);
 #else
         s += pow(0.5,(double) l+1) *
           (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1);
 #endif
       }
       a[0][k] = s;
     }
   }
 
   m = (1-order)/2;
   for (k = -(order-1); k < order; k += 2) {
     for (l = 0; l < order; l++)
       rho_coeff[l][m] = a[l][k];
     for (l = 1; l < order; l++)
       drho_coeff[l-1][m] = l*a[l][k];
     m++;
   }
 
   memory->destroy2d_offset(a,-order);
 }
 
 /* ----------------------------------------------------------------------
    Slab-geometry correction term to dampen inter-slab interactions between
    periodically repeating slabs.  Yields good approximation to 2D Ewald if
    adequate empty space is left between repeating slabs (J. Chem. Phys.
    111, 3155).  Slabs defined here to be parallel to the xy plane.
 ------------------------------------------------------------------------- */
 
 void PPPM::slabcorr()
 {
   // compute local contribution to global dipole moment
 
   double *q = atom->q;
   double **x = atom->x;
   int nlocal = atom->nlocal;
 
   double dipole = 0.0;
   for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];
 
   // sum local contributions to get global dipole moment
 
   double dipole_all;
   MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
 
   // compute corrections
 
   const double e_slabcorr = MY_2PI*dipole_all*dipole_all/volume;
   const double qscale = force->qqrd2e * scale;
 
   if (eflag_global) energy += qscale * e_slabcorr;
 
   // per-atom energy
 
   if (eflag_atom) {
     double efact = MY_2PI*dipole_all/volume;
     for (int i = 0; i < nlocal; i++) eatom[i] += qscale * q[i]*x[i][2]*efact;
   }
 
   // add on force corrections
 
   double ffact = -4.0*MY_PI*dipole_all/volume;
   double **f = atom->f;
 
   for (int i = 0; i < nlocal; i++) f[i][2] += qscale * q[i]*ffact;
 }
 
 /* ----------------------------------------------------------------------
    perform and time the 1d FFTs required for N timesteps
 ------------------------------------------------------------------------- */
 
 int PPPM::timing_1d(int n, double &time1d)
 {
   double time1,time2;
 
   for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
 
   MPI_Barrier(world);
   time1 = MPI_Wtime();
 
   for (int i = 0; i < n; i++) {
     fft1->timing1d(work1,nfft_both,1);
     fft2->timing1d(work1,nfft_both,-1);
     if (differentiation_flag != 1) {
       fft2->timing1d(work1,nfft_both,-1);
       fft2->timing1d(work1,nfft_both,-1);
     }
   }
 
   MPI_Barrier(world);
   time2 = MPI_Wtime();
   time1d = time2 - time1;
 
   if (differentiation_flag) return 2;
   return 4;
 }
 
 /* ----------------------------------------------------------------------
    perform and time the 3d FFTs required for N timesteps
 ------------------------------------------------------------------------- */
 
 int PPPM::timing_3d(int n, double &time3d)
 {
   double time1,time2;
 
   for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
 
   MPI_Barrier(world);
   time1 = MPI_Wtime();
 
   for (int i = 0; i < n; i++) {
     fft1->compute(work1,work1,1);
     fft2->compute(work1,work1,-1);
     if (differentiation_flag != 1) {
       fft2->compute(work1,work1,-1);
       fft2->compute(work1,work1,-1);
     }
   }
 
   MPI_Barrier(world);
   time2 = MPI_Wtime();
   time3d = time2 - time1;
 
   if (differentiation_flag) return 2;
   return 4;
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local arrays
 ------------------------------------------------------------------------- */
 
 double PPPM::memory_usage()
 {
   double bytes = nmax*3 * sizeof(double);
   int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
     (nzhi_out-nzlo_out+1);
   if (differentiation_flag == 1) {
     bytes += 2 * nbrick * sizeof(FFT_SCALAR);
   } else {
     bytes += 4 * nbrick * sizeof(FFT_SCALAR);
   }
   bytes += 6 * nfft_both * sizeof(double);
   bytes += nfft_both * sizeof(double);
   bytes += nfft_both*5 * sizeof(FFT_SCALAR);
 
   if (peratom_allocate_flag)
     bytes += 6 * nbrick * sizeof(FFT_SCALAR);
 
   if (group_allocate_flag) {
     bytes += 2 * nbrick * sizeof(FFT_SCALAR);
     bytes += 2 * nfft_both * sizeof(FFT_SCALAR);;
   }
 
   bytes += cg->memory_usage();
 
   return bytes;
 }
 
 /* ----------------------------------------------------------------------
    group-group interactions
  ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    compute the PPPM total long-range force and energy for groups A and B
  ------------------------------------------------------------------------- */
 
 void PPPM::compute_group_group(int groupbit_A, int groupbit_B, int BA_flag)
 {
   if (slabflag)
     error->all(FLERR,"Cannot (yet) use K-space slab "
                "correction with compute group/group");
 
   if (!group_allocate_flag) {
     allocate_groups();
     group_allocate_flag = 1;
   }
 
   e2group = 0; //energy
   f2group[0] = 0; //force in x-direction
   f2group[1] = 0; //force in y-direction
   f2group[2] = 0; //force in z-direction
 
   // map my particle charge onto my local 3d density grid
 
   make_rho_groups(groupbit_A,groupbit_B,BA_flag);
 
   // all procs communicate density values from their ghost cells
   //   to fully sum contribution in their 3d bricks
   // remap from 3d decomposition to FFT decomposition
 
   // temporarily store and switch pointers so we can
   //  use brick2fft() for groups A and B (without
   //  writing an additional function)
 
   FFT_SCALAR ***density_brick_real = density_brick;
   FFT_SCALAR *density_fft_real = density_fft;
 
   // group A
 
   density_brick = density_A_brick;
   density_fft = density_A_fft;
 
   brick2fft();
 
   // group B
 
   density_brick = density_B_brick;
   density_fft = density_B_fft;
 
   brick2fft();
 
   // switch back pointers
 
   density_brick = density_brick_real;
   density_fft = density_fft_real;
 
   // compute potential gradient on my FFT grid and
   //   portion of group-group energy/force on this proc's FFT grid
 
   poisson_groups(BA_flag);
 
   const double qscale = force->qqrd2e * scale;
 
   // total group A <--> group B energy
   // self and boundary correction terms are in compute_group_group.cpp
 
   double e2group_all;
   MPI_Allreduce(&e2group,&e2group_all,1,MPI_DOUBLE,MPI_SUM,world);
   e2group = e2group_all;
 
   e2group *= qscale*0.5*volume;
 
   // total group A <--> group B force
 
   double f2group_all[3];
   MPI_Allreduce(f2group,f2group_all,3,MPI_DOUBLE,MPI_SUM,world);
 
   for (int i = 0; i < 3; i++) f2group[i] = qscale*volume*f2group_all[i];
 }
 
 /* ----------------------------------------------------------------------
  allocate group-group memory that depends on # of K-vectors and order
  ------------------------------------------------------------------------- */
 
 void PPPM::allocate_groups()
 {
   memory->create3d_offset(density_A_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
                           nxlo_out,nxhi_out,"pppm:density_A_brick");
   memory->create3d_offset(density_B_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
                           nxlo_out,nxhi_out,"pppm:density_B_brick");
   memory->create(density_A_fft,nfft_both,"pppm:density_A_fft");
   memory->create(density_B_fft,nfft_both,"pppm:density_B_fft");
 }
 
 /* ----------------------------------------------------------------------
  deallocate group-group memory that depends on # of K-vectors and order
  ------------------------------------------------------------------------- */
 
 void PPPM::deallocate_groups()
 {
   memory->destroy3d_offset(density_A_brick,nzlo_out,nylo_out,nxlo_out);
   memory->destroy3d_offset(density_B_brick,nzlo_out,nylo_out,nxlo_out);
   memory->destroy(density_A_fft);
   memory->destroy(density_B_fft);
 }
 
 /* ----------------------------------------------------------------------
  create discretized "density" on section of global grid due to my particles
  density(x,y,z) = charge "density" at grid points of my 3d brick
  (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
  in global grid for group-group interactions
  ------------------------------------------------------------------------- */
 
 void PPPM::make_rho_groups(int groupbit_A, int groupbit_B, int BA_flag)
 {
   int l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
 
   // clear 3d density arrays
 
   memset(&(density_A_brick[nzlo_out][nylo_out][nxlo_out]),0,
          ngrid*sizeof(FFT_SCALAR));
 
   memset(&(density_B_brick[nzlo_out][nylo_out][nxlo_out]),0,
          ngrid*sizeof(FFT_SCALAR));
 
   // loop over my charges, add their contribution to nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
 
   double *q = atom->q;
   double **x = atom->x;
   int nlocal = atom->nlocal;
   int *mask = atom->mask;
 
   for (int i = 0; i < nlocal; i++) {
 
     if ((mask[i] & groupbit_A) && (mask[i] & groupbit_B))
       if (BA_flag) continue;
 
     if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) {
 
       nx = part2grid[i][0];
       ny = part2grid[i][1];
       nz = part2grid[i][2];
       dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
       dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
       dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
 
       compute_rho1d(dx,dy,dz);
 
       z0 = delvolinv * q[i];
       for (n = nlower; n <= nupper; n++) {
         mz = n+nz;
         y0 = z0*rho1d[2][n];
         for (m = nlower; m <= nupper; m++) {
           my = m+ny;
           x0 = y0*rho1d[1][m];
           for (l = nlower; l <= nupper; l++) {
             mx = l+nx;
 
             // group A
 
             if (mask[i] & groupbit_A)
               density_A_brick[mz][my][mx] += x0*rho1d[0][l];
 
             // group B
 
             if (mask[i] & groupbit_B)
               density_B_brick[mz][my][mx] += x0*rho1d[0][l];
           }
         }
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    FFT-based Poisson solver for group-group interactions
  ------------------------------------------------------------------------- */
 
 void PPPM::poisson_groups(int BA_flag)
 {
   int i,j,k,n;
 
   // reuse memory (already declared)
 
   FFT_SCALAR *work_A = work1;
   FFT_SCALAR *work_B = work2;
 
   // transform charge density (r -> k)
 
   // group A
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work_A[n++] = density_A_fft[i];
     work_A[n++] = ZEROF;
   }
 
   fft1->compute(work_A,work_A,1);
 
   // group B
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work_B[n++] = density_B_fft[i];
     work_B[n++] = ZEROF;
   }
 
   fft1->compute(work_B,work_B,1);
 
   // group-group energy and force contribution,
   //  keep everything in reciprocal space so
   //  no inverse FFTs needed
 
   double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
   double s2 = scaleinv*scaleinv;
 
   // energy
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     e2group += s2 * greensfn[i] *
       (work_A[n]*work_B[n] + work_A[n+1]*work_B[n+1]);
     n += 2;
   }
 
   if (BA_flag) return;
 
 
   // multiply by Green's function and s2
   //  (only for work_A so it is not squared below)
 
   n = 0;
   for (i = 0; i < nfft; i++) {
     work_A[n++] *= s2 * greensfn[i];
     work_A[n++] *= s2 * greensfn[i];
   }
 
   double partial_group;
 
   // force, x direction
 
   n = 0;
   for (k = nzlo_fft; k <= nzhi_fft; k++)
     for (j = nylo_fft; j <= nyhi_fft; j++)
       for (i = nxlo_fft; i <= nxhi_fft; i++) {
         partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
         f2group[0] += fkx[i] * partial_group;
         n += 2;
       }
 
   // force, y direction
 
   n = 0;
   for (k = nzlo_fft; k <= nzhi_fft; k++)
     for (j = nylo_fft; j <= nyhi_fft; j++)
       for (i = nxlo_fft; i <= nxhi_fft; i++) {
         partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
         f2group[1] += fky[j] * partial_group;
         n += 2;
       }
 
   // force, z direction
 
   n = 0;
   for (k = nzlo_fft; k <= nzhi_fft; k++)
     for (j = nylo_fft; j <= nyhi_fft; j++)
       for (i = nxlo_fft; i <= nxhi_fft; i++) {
         partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
         f2group[2] += fkz[k] * partial_group;
         n += 2;
       }
 }
diff --git a/src/KSPACE/pppm.h b/src/KSPACE/pppm.h
index 96aa00e4d..f60159d6d 100644
--- a/src/KSPACE/pppm.h
+++ b/src/KSPACE/pppm.h
@@ -1,326 +1,315 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef KSPACE_CLASS
 
 KSpaceStyle(pppm,PPPM)
 
 #else
 
 #ifndef LMP_PPPM_H
 #define LMP_PPPM_H
 
 #include "lmptype.h"
 #include "mpi.h"
 
 #ifdef FFT_SINGLE
 typedef float FFT_SCALAR;
 #define MPI_FFT_SCALAR MPI_FLOAT
 #else
 typedef double FFT_SCALAR;
 #define MPI_FFT_SCALAR MPI_DOUBLE
 #endif
 
 #include "kspace.h"
 
 namespace LAMMPS_NS {
 
 class PPPM : public KSpace {
  public:
   PPPM(class LAMMPS *, int, char **);
   virtual ~PPPM();
   virtual void init();
   virtual void setup();
   void setup_grid();
   virtual void compute(int, int);
   virtual int timing_1d(int, double &);
   virtual int timing_3d(int, double &);
   virtual double memory_usage();
 
   virtual void compute_group_group(int, int, int);
 
  protected:
   int me,nprocs;
   int nfactors;
   int *factors;
   double qsum,qsqsum,q2;
   double cutoff;
   double volume;
   double delxinv,delyinv,delzinv,delvolinv;
   double shift,shiftone;
   int peratom_allocate_flag;
 
   int nxlo_in,nylo_in,nzlo_in,nxhi_in,nyhi_in,nzhi_in;
   int nxlo_out,nylo_out,nzlo_out,nxhi_out,nyhi_out,nzhi_out;
   int nxlo_ghost,nxhi_ghost,nylo_ghost,nyhi_ghost,nzlo_ghost,nzhi_ghost;
   int nxlo_fft,nylo_fft,nzlo_fft,nxhi_fft,nyhi_fft,nzhi_fft;
   int nlower,nupper;
   int ngrid,nfft,nfft_both;
 
   FFT_SCALAR ***density_brick;
   FFT_SCALAR ***vdx_brick,***vdy_brick,***vdz_brick;
   FFT_SCALAR ***u_brick;
   FFT_SCALAR ***v0_brick,***v1_brick,***v2_brick;
   FFT_SCALAR ***v3_brick,***v4_brick,***v5_brick;
   double *greensfn;
   double **vg;
   double *fkx,*fky,*fkz;
   FFT_SCALAR *density_fft;
   FFT_SCALAR *work1,*work2;
 
   double *gf_b;
   FFT_SCALAR **rho1d,**rho_coeff,**drho1d,**drho_coeff;
   double *sf_precoeff1, *sf_precoeff2, *sf_precoeff3;
   double *sf_precoeff4, *sf_precoeff5, *sf_precoeff6;
   double sf_coeff[6];          // coefficients for calculating ad self-forces
   double **acons;
 
   // group-group interactions
 
   int group_allocate_flag;
   FFT_SCALAR ***density_A_brick,***density_B_brick;
   FFT_SCALAR *density_A_fft,*density_B_fft;
 
   class FFT3d *fft1,*fft2;
   class Remap *remap;
   class CommGrid *cg;
   class CommGrid *cg_peratom;
 
   int **part2grid;             // storage for particle -> grid mapping
   int nmax;
 
   int triclinic;               // domain settings, orthog or triclinic
   double *boxlo;
                                // TIP4P settings
   int typeH,typeO;             // atom types of TIP4P water H and O atoms
   double qdist;                // distance from O site to negative charge
   double alpha;                // geometric factor
   
   void set_grid_global();
   void set_grid_local();
   void adjust_gewald();
   double newton_raphson_f();
   double derivf();
   double final_accuracy();
 
   virtual void allocate();
   virtual void allocate_peratom();
   virtual void deallocate();
   virtual void deallocate_peratom();
   int factorable(int);
   double compute_df_kspace();
   double estimate_ik_error(double, double, bigint);
   double compute_qopt();
   void compute_gf_denom();
   virtual void compute_gf_ik();
   virtual void compute_gf_ad();
   void compute_sf_precoeff();
   
   virtual void particle_map();
   virtual void make_rho();
   virtual void brick2fft();
   
   virtual void poisson();
   virtual void poisson_ik();
   virtual void poisson_ad();
   
   virtual void fieldforce();
   virtual void fieldforce_ik();
   virtual void fieldforce_ad();
   
   virtual void poisson_peratom();
   virtual void fieldforce_peratom();
   void procs2grid2d(int,int,int,int *, int*);
   void compute_rho1d(const FFT_SCALAR &, const FFT_SCALAR &,
                      const FFT_SCALAR &);
   void compute_drho1d(const FFT_SCALAR &, const FFT_SCALAR &,
                      const FFT_SCALAR &);
   void compute_rho_coeff();
   void slabcorr();
 
   // grid communication
 
   virtual void pack_forward(int, FFT_SCALAR *, int, int *);
   virtual void unpack_forward(int, FFT_SCALAR *, int, int *);
   virtual void pack_reverse(int, FFT_SCALAR *, int, int *);
   virtual void unpack_reverse(int, FFT_SCALAR *, int, int *);
 
   // group-group interactions
 
   virtual void allocate_groups();
   virtual void deallocate_groups();
   virtual void make_rho_groups(int, int, int);
   virtual void poisson_groups(int);
 
 /* ----------------------------------------------------------------------
    denominator for Hockney-Eastwood Green's function
      of x,y,z = sin(kx*deltax/2), etc
 
             inf                 n-1
    S(n,k) = Sum  W(k+pi*j)**2 = Sum b(l)*(z*z)**l
            j=-inf               l=0
 
           = -(z*z)**n /(2n-1)! * (d/dx)**(2n-1) cot(x)  at z = sin(x)
    gf_b = denominator expansion coeffs
 ------------------------------------------------------------------------- */
 
   inline double gf_denom(const double &x, const double &y,
                          const double &z) const {
     double sx,sy,sz;
     sz = sy = sx = 0.0;
     for (int l = order-1; l >= 0; l--) {
       sx = gf_b[l] + sx*x;
       sy = gf_b[l] + sy*y;
       sz = gf_b[l] + sz*z;
     }
     double s = sx*sy*sz;
     return s*s;
   };
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Cannot (yet) use PPPM with triclinic box
 
 This feature is not yet supported.
 
 E: Cannot use PPPM with 2d simulation
 
 The kspace style pppm cannot be used in 2d simulations.  You can use
 2d PPPM in a 3d simulation; see the kspace_modify command.
 
 E: Kspace style requires atom attribute q
 
 The atom style defined does not have these attributes.
 
 E: Cannot use nonperiodic boundaries with PPPM
 
 For kspace style pppm, all 3 dimensions must have periodic boundaries
 unless you use the kspace_modify command to define a 2d slab with a
 non-periodic z dimension.
 
 E: Incorrect boundaries with slab PPPM
 
 Must have periodic x,y dimensions and non-periodic z dimension to use
 2d slab option with PPPM.
 
 E: PPPM order cannot be < 2 or > than %d
 
 This is a limitation of the PPPM implementation in LAMMPS.
 
 E: KSpace style is incompatible with Pair style
 
 Setting a kspace style requires that a pair style with a long-range
 Coulombic component be selected.
 
 E: Bond and angle potentials must be defined for TIP4P
 
 Cannot use TIP4P pair potential unless bond and angle potentials
 are defined.
 
 E: Bad TIP4P angle type for PPPM/TIP4P
 
 Specified angle type is not valid.
 
 E: Bad TIP4P bond type for PPPM/TIP4P
 
 Specified bond type is not valid.
 
 E: Cannot use kspace solver on system with no charge
 
 No atoms in system have a non-zero charge.
 
 W: System is not charge neutral, net charge = %g
 
 The total charge on all atoms on the system is not 0.0, which
 is not valid for Ewald or PPPM.
 
 W: Reducing PPPM order b/c stencil extends beyond nearest neighbor processor
 
-UNDOCUMENTED
+This may lead to a larger grid than desired.  See the kspace_modify overlap
+command to prevent changing of the PPPM order.
 
 E: PPPM order < minimum allowed order
 
-UNDOCUMENTED
+The default minimum order is 2.  This can be reset by the
+kspace_modify minorder command.
 
 E: PPPM grid stencil extends beyond nearest neighbor processor
 
-UNDOCUMENTED
+This is not allowed if the kspace_modify overlap setting is no.
 
 E: KSpace accuracy must be > 0
 
 The kspace accuracy designated in the input must be greater than zero.
 
-E: Could not compute grid size!
+E: Could not compute grid size
 
-UNDOCUMENTED
+The code is unable to compute a grid size consistent with the desired
+accuracy.  This error should not occur for typical problems.  Please
+send an email to the developers.
 
 E: PPPM grid is too large
 
 The global PPPM grid is larger than OFFSET in one or more dimensions.
 OFFSET is currently set to 4096.  You likely need to decrease the
 requested accuracy.
 
 E: Could not compute g_ewald
 
-UNDOCUMENTED
+The Newton-Raphson solver failed to converge to a good value for
+g_ewald.  This error should not occur for typical problems.  Please
+send an email to the developers.
 
 E: Out of range atoms - cannot compute PPPM
 
 One or more atoms are attempting to map their charge to a PPPM grid
 point that is not owned by a processor.  This is likely for one of two
 reasons, both of them bad.  First, it may mean that an atom near the
 boundary of a processor's sub-domain has moved more than 1/2 the
 "neighbor skin distance"_neighbor.html without neighbor lists being
 rebuilt and atoms being migrated to new processors.  This also means
 you may be missing pairwise interactions that need to be computed.
 The solution is to change the re-neighboring criteria via the
 "neigh_modify"_neigh_modify command.  The safest settings are "delay 0
 every 1 check yes".  Second, it may mean that an atom has moved far
 outside a processor's sub-domain or even the entire simulation box.
 This indicates bad physics, e.g. due to highly overlapping atoms, too
 large a timestep, etc.
 
 E: Cannot (yet) use K-space slab correction with compute group/group
 
 This option is not yet supported.
 
-U: Reducing PPPM order b/c stencil extends beyond neighbor processor
-
-LAMMPS is attempting this in order to allow the simulation
-to run.  It should not effect the PPPM accuracy.
-
-U: PPPM order has been reduced to 0
-
-LAMMPS has attempted to reduce the PPPM order to enable the simulation
-to run, but can reduce the order no further.  Try increasing the
-accuracy of PPPM by reducing the tolerance size, thus inducing a
-larger PPPM grid.
-
-U: Cannot compute PPPM G
-
-LAMMPS failed to compute a valid approximation for the PPPM g_ewald
-factor that partitions the computation between real space and k-space.
-
 */
diff --git a/src/KSPACE/pppm_disp.cpp b/src/KSPACE/pppm_disp.cpp
index 25a447ccb..e31514788 100755
--- a/src/KSPACE/pppm_disp.cpp
+++ b/src/KSPACE/pppm_disp.cpp
@@ -1,6855 +1,6860 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Rolf Isele-Holder (Aachen University)
                          Paul Crozier (SNL)
 ------------------------------------------------------------------------- */
 
 #include "lmptype.h"
 #include "mpi.h"
 #include "string.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "math.h"
 #include "pppm_disp.h"
 #include "math_const.h"
 #include "atom.h"
 #include "comm.h"
 #include "commgrid.h"
 #include "neighbor.h"
 #include "force.h"
 #include "pair.h"
 #include "bond.h"
 #include "angle.h"
 #include "domain.h"
 #include "fft3d_wrap.h"
 #include "remap_wrap.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 #define MAXORDER   7
 #define OFFSET 16384
 #define SMALL 0.00001
 #define LARGE 10000.0
 #define EPS_HOC 1.0e-7
 
 enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER};
 enum{REVERSE_RHO, REVERSE_RHO_G, REVERSE_RHO_A};
 enum{FORWARD_IK, FORWARD_AD, FORWARD_IK_PERATOM, FORWARD_AD_PERATOM,
      FORWARD_IK_G, FORWARD_AD_G, FORWARD_IK_PERATOM_G, FORWARD_AD_PERATOM_G,
      FORWARD_IK_A, FORWARD_AD_A, FORWARD_IK_PERATOM_A, FORWARD_AD_PERATOM_A};
 
 
 #ifdef FFT_SINGLE
 #define ZEROF 0.0f
 #define ONEF  1.0f
 #else
 #define ZEROF 0.0
 #define ONEF  1.0
 #endif
 
 /* ---------------------------------------------------------------------- */
 
 PPPMDisp::PPPMDisp(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
 {
   if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm/disp command");
 
   pppmflag = dispersionflag = 1;
   accuracy_relative = atof(arg[0]);
   
   nfactors = 3;
   factors = new int[nfactors];
   factors[0] = 2;
   factors[1] = 3;
   factors[2] = 5;
 
   MPI_Comm_rank(world,&me);
   MPI_Comm_size(world,&nprocs);
 
   csumflag = 0;
   B = NULL;
   cii = NULL;
   csumi = NULL;
   peratom_allocate_flag = 0;
 
   density_brick = vdx_brick = vdy_brick = vdz_brick = NULL;
   density_fft = NULL;
   u_brick = v0_brick = v1_brick = v2_brick = v3_brick = 
     v4_brick = v5_brick = NULL;
 
   density_brick_g = vdx_brick_g = vdy_brick_g = vdz_brick_g = NULL;
   density_fft_g = NULL;
   u_brick_g = v0_brick_g = v1_brick_g = v2_brick_g = v3_brick_g = 
     v4_brick_g = v5_brick_g = NULL;
 
   density_brick_a0 = vdx_brick_a0 = vdy_brick_a0 = vdz_brick_a0 = NULL;
   density_fft_a0 = NULL;
   u_brick_a0 = v0_brick_a0 = v1_brick_a0 = v2_brick_a0 = v3_brick_a0 = 
     v4_brick_a0 = v5_brick_a0 = NULL;
 
   density_brick_a1 = vdx_brick_a1 = vdy_brick_a1 = vdz_brick_a1 = NULL;
   density_fft_a1 = NULL;
   u_brick_a1 = v0_brick_a1 = v1_brick_a1 = v2_brick_a1 = v3_brick_a1 = 
     v4_brick_a1 = v5_brick_a1 = NULL;
 
   density_brick_a2 = vdx_brick_a2 = vdy_brick_a2 = vdz_brick_a2 = NULL;
   density_fft_a2 = NULL;
   u_brick_a2 = v0_brick_a2 = v1_brick_a2 = v2_brick_a2 = v3_brick_a2 = 
     v4_brick_a2 = v5_brick_a2 = NULL;
 
   density_brick_a3 = vdx_brick_a3 = vdy_brick_a3 = vdz_brick_a3 = NULL;
   density_fft_a3 = NULL;
   u_brick_a3 = v0_brick_a3 = v1_brick_a3 = v2_brick_a3 = v3_brick_a3 = 
     v4_brick_a3 = v5_brick_a3 = NULL;
 
   density_brick_a4 = vdx_brick_a4 = vdy_brick_a4 = vdz_brick_a4 = NULL;
   density_fft_a4 = NULL;
   u_brick_a4 = v0_brick_a4 = v1_brick_a4 = v2_brick_a4 = v3_brick_a4 = 
     v4_brick_a4 = v5_brick_a4 = NULL;
 
   density_brick_a5 = vdx_brick_a5 = vdy_brick_a5 = vdz_brick_a5 = NULL;
   density_fft_a5 = NULL;
   u_brick_a5 = v0_brick_a5 = v1_brick_a5 = v2_brick_a5 = v3_brick_a5 = 
     v4_brick_a5 = v5_brick_a5 = NULL;
 
   density_brick_a6 = vdx_brick_a6 = vdy_brick_a6 = vdz_brick_a6 = NULL;
   density_fft_a6 = NULL;
   u_brick_a6 = v0_brick_a6 = v1_brick_a6 = v2_brick_a6 = v3_brick_a6 = 
     v4_brick_a6 = v5_brick_a6 = NULL;
 
   greensfn = NULL;
   greensfn_6 = NULL;
   work1 = work2 = NULL;
   work1_6 = work2_6 = NULL;
   vg = NULL;
   vg2 = NULL;
   vg_6 = NULL;
   vg2_6 = NULL;
   fkx = fky = fkz = NULL;
   fkx2 = fky2 = fkz2 = NULL;
   fkx_6 = fky_6 = fkz_6 = NULL;
   fkx2_6 = fky2_6 = fkz2_6 = NULL;
 
   sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = sf_precoeff4 = 
     sf_precoeff5 = sf_precoeff6 = NULL;
   sf_precoeff1_6 = sf_precoeff2_6 = sf_precoeff3_6 = sf_precoeff4_6 = 
     sf_precoeff5_6 = sf_precoeff6_6 = NULL;
 
   gf_b = NULL;
   gf_b_6 = NULL;
   rho1d = rho_coeff = NULL;
   drho1d = drho_coeff = NULL;
   rho1d_6 = rho_coeff_6 = NULL;
   drho1d_6 = drho_coeff_6 = NULL;
   fft1 = fft2 = NULL;
   fft1_6 = fft2_6 = NULL;
   remap = NULL;
   remap_6 = NULL;
 
   nmax = 0;
   part2grid = NULL;
   part2grid_6 = NULL;
 
   splitbuf1 = NULL;
   splitbuf2 = NULL;
   dict_send = NULL;
   dict_rec = NULL;
   com_each = NULL;
   com_order = NULL;
   split_1 = NULL;
   split_2 = NULL;
 
   cg = NULL;
   cg_peratom = NULL;
   cg_6 = NULL;
   cg_peratom_6 = NULL;
 }
 
 /* ----------------------------------------------------------------------
    free all memory 
 ------------------------------------------------------------------------- */
 
 PPPMDisp::~PPPMDisp()
 {
   delete [] factors;
   delete [] B;
   delete [] cii;
   delete [] csumi;
   deallocate();
   deallocate_peratom();
   memory->destroy(part2grid);
   memory->destroy(part2grid_6);
   memory->destroy(com_order);
   memory->destroy(com_each);
   memory->destroy(dict_send);
   memory->destroy(dict_rec);
   memory->destroy(splitbuf1);
   memory->destroy(splitbuf2);
 }
 
 /* ----------------------------------------------------------------------
    called once before run 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::init()
 {
   if (me == 0) {
     if (screen) fprintf(screen,"PPPMDisp initialization ...\n");
     if (logfile) fprintf(logfile,"PPPMDisp initialization ...\n");
   }
 
   if (domain->triclinic)
     error->all(FLERR,"Cannot (yet) use PPPMDisp with triclinic box");
   if (domain->dimension == 2)
     error->all(FLERR,"Cannot use PPPMDisp with 2d simulation");
 
   if (slabflag == 0 && domain->nonperiodic > 0)
     error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMDisp");
   if (slabflag == 1) {
     if (domain->xperiodic != 1 || domain->yperiodic != 1 || 
 	domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
       error->all(FLERR,"Incorrect boundaries with slab PPPMDisp");
   }
  
   if (order > MAXORDER || order_6 > MAXORDER) {
     char str[128];
     sprintf(str,"PPPMDisp coulomb order cannot be greater than %d",MAXORDER);
     error->all(FLERR,str);
   }
 
   // free all arrays previously allocated
 
   deallocate();
   deallocate_peratom(); 
   peratom_allocate_flag = 0;
 
 
   // set scale
 
   scale = 1.0;
 
   triclinic = domain->triclinic;
 
   // check whether cutoff and pair style are set
 
   pair_check();
 
   int tmp;
   Pair *pair = force->pair;
   int *ptr = pair ? (int *) pair->extract("ewald_order",tmp) : NULL;
   double *p_cutoff = pair ? (double *) pair->extract("cut_coul",tmp) : NULL;
   double *p_cutoff_lj = pair ? (double *) pair->extract("cut_LJ",tmp) : NULL;
   if (!(ptr||*p_cutoff||*p_cutoff_lj)) 
     error->all(FLERR,"KSpace style is incompatible with Pair style");
   cutoff = *p_cutoff;
   cutoff_lj = *p_cutoff_lj;
 
   double tmp2;
   MPI_Allreduce(&cutoff, &tmp2,1,MPI_DOUBLE,MPI_SUM,world); 
 
   // check out which types of potentials will have to be calculated
 
   int ewald_order = ptr ? *((int *) ptr) : 1<<1;
   int ewald_mix = ptr ? *((int *) pair->extract("ewald_mix",tmp)) : GEOMETRIC;
   memset(function, 0, EWALD_FUNCS*sizeof(int));
   for (int i=0; i<=EWALD_MAXORDER; ++i)			// transcribe order
     if (ewald_order&(1<<i)) {				// from pair_style
       int  k;
       char str[128];
       switch (i) {
 	case 1:
 	  k = 0; break;
 	case 6:
 	  if (ewald_mix==GEOMETRIC) { k = 1; break; }
 	  else if (ewald_mix==ARITHMETIC) { k = 2; break; }
-	  sprintf(str, "Unsupported mixing rule in kspace_style pppm/disp for pair_style %s", force->pair_style);
+	  sprintf(str,"Unsupported mixing rule in "
+		  "kspace_style pppm/disp for pair_style %s", force->pair_style);
 	  error->all(FLERR,str);
 	default:
-	  sprintf(str, "Unsupported order in kspace_style pppm/disp pair_style %s", force->pair_style);
+	  sprintf(str, "Unsupported order in "
+		  "kspace_style pppm/disp pair_style %s", force->pair_style);
 	  error->all(FLERR,str);
       }
       function[k] = 1;
     }
  
 
-  // warn, if function[0] is not set but charge attribute is set!
+  // warn, if function[0] is not set but charge attribute is set
+
   if (!function[0] && atom->q_flag && me == 0) {
     char str[128];
     sprintf(str, "Charges are set, but coulombic solver is not used");
     error->warning(FLERR, str);
   }
 
-  // compute qsum & qsqsum, if function[0] is set, print error if no charges are set or warn if not charge-neutral  
+  // compute qsum & qsqsum, if function[0] is set
+  // print error if no charges are set or warn if not charge-neutral  
  
   if (function[0]) {
     if (!atom->q_flag) error->all(FLERR,"Kspace style with selected options requires atom attribute q");
  
     qsum = qsqsum = 0.0;
     for (int i = 0; i < atom->nlocal; i++) {
       qsum += atom->q[i];
       qsqsum += atom->q[i]*atom->q[i];
 
     }
 
     double tmp;
     MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
     qsum = tmp;
     MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
     qsqsum = tmp;
 
     if (qsqsum == 0.0)
       error->all(FLERR,"Cannot use kspace solver with selected options on system with no charge");
     if (fabs(qsum) > SMALL && me == 0) {
       char str[128];
       sprintf(str,"System is not charge neutral, net charge = %g",qsum);
       error->warning(FLERR,str);
     }
   }
 
   // if kspace is TIP4P, extract TIP4P params from pair style
   // bond/angle are not yet init(), so insure equilibrium request is valid
 
   qdist = 0.0;
  
   if (tip4pflag) {
     int itmp;
     double *p_qdist = (double *) force->pair->extract("qdist",itmp);
     int *p_typeO = (int *) force->pair->extract("typeO",itmp);
     int *p_typeH = (int *) force->pair->extract("typeH",itmp);
     int *p_typeA = (int *) force->pair->extract("typeA",itmp);
     int *p_typeB = (int *) force->pair->extract("typeB",itmp);
     if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB)
       error->all(FLERR,"KSpace style is incompatible with Pair style");
     qdist = *p_qdist;
     typeO = *p_typeO;
     typeH = *p_typeH;
     int typeA = *p_typeA;
     int typeB = *p_typeB;
 
     if (force->angle == NULL || force->bond == NULL)
       error->all(FLERR,"Bond and angle potentials must be defined for TIP4P");
     if (typeA < 1 || typeA > atom->nangletypes || 
 	force->angle->setflag[typeA] == 0)
       error->all(FLERR,"Bad TIP4P angle type for PPPMDisp/TIP4P");
     if (typeB < 1 || typeB > atom->nbondtypes || 
 	force->bond->setflag[typeB] == 0)
       error->all(FLERR,"Bad TIP4P bond type for PPPMDisp/TIP4P");
     double theta = force->angle->equilibrium_angle(typeA);
     double blen = force->bond->equilibrium_distance(typeB);
     alpha = qdist / (cos(0.5*theta) * blen);
   }
 
 
   // initialize the pair style to get the coefficients
   pair->init();
   init_coeffs();
 
   //if g_ewald and g_ewald_6 have not been specified, set some initial value
   //  to avoid problems when calculating the energies!
 
   if (!gewaldflag) g_ewald = 1;
   if (!gewaldflag_6) g_ewald_6 = 1;
 
   // set accuracy (force units) from accuracy_relative or accuracy_absolute
   
   if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
   else accuracy = accuracy_relative * two_charge_force;
 
   int (*procneigh)[2] = comm->procneigh;
 
   int iteration = 0;
   if (function[0]) {
     CommGrid *cgtmp = NULL;
     while (order >= minorder) {
 
       if (iteration && me == 0)
           error->warning(FLERR,"Reducing PPPMDisp Coulomb order b/c stencil extends "
 			 "beyond neighbor processor.");
       iteration++;
 
       // set grid for dispersion interaction and coulomb interactions!
  
       set_grid();
 
       if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET)
       error->all(FLERR,"PPPMDisp Coulomb grid is too large");
 
       set_fft_parameters(nx_pppm, ny_pppm, nz_pppm,
                          nxlo_fft, nylo_fft, nzlo_fft,
                          nxhi_fft, nyhi_fft, nzhi_fft,
                          nxlo_in, nylo_in, nzlo_in,
                          nxhi_in, nyhi_in, nzhi_in,
                          nxlo_out, nylo_out, nzlo_out,
                          nxhi_out, nyhi_out, nzhi_out,
                          nlower, nupper,
                          ngrid, nfft, nfft_both,
                          shift, shiftone, order);
 
       if (overlap_allowed) break;
 
       cgtmp = new CommGrid(lmp, world,1,1,
                            nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                            nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                            procneigh[0][0],procneigh[0][1],procneigh[1][0],
                            procneigh[1][1],procneigh[2][0],procneigh[2][1]);
       cgtmp->ghost_notify();
       if (!cgtmp->ghost_overlap()) break;
       delete cgtmp;
 
       order--;
     }
 
     if (order < minorder)
-      error->all(FLERR,"Coulomb PPPMDisp order has been reduced below minorder");
+      error->all(FLERR,"Coulomb PPPMDisp order < minimum allowed order");
     if (cgtmp) delete cgtmp;
 
     // adjust g_ewald
   
     if (!gewaldflag) adjust_gewald();
 
     // calculate the final accuracy
   
     double acc = final_accuracy();
   
     // print stats
 
     int ngrid_max,nfft_both_max,nbuf_max;
     MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world);
     MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world);
 
     if (me == 0) {
     #ifdef FFT_SINGLE
       const char fft_prec[] = "single";
     #else
       const char fft_prec[] = "double";
     #endif
   
       if (screen) {
         fprintf(screen,"  Coulomb G vector (1/distance)= %g\n",g_ewald);
         fprintf(screen,"  Coulomb grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
         fprintf(screen,"  Coulomb stencil order = %d\n",order);
         fprintf(screen,"  Coulomb estimated absolute RMS force accuracy = %g\n",
                 acc);
         fprintf(screen,"  Coulomb estimated relative force accuracy = %g\n",
                 acc/two_charge_force);
         fprintf(screen,"  using %s precision FFTs\n",fft_prec);
         fprintf(screen,"  3d grid and FFT values/proc = %d %d\n",
 		ngrid_max, nfft_both_max);
       }
       if (logfile) {
         fprintf(logfile,"  Coulomb G vector (1/distance) = %g\n",g_ewald);
         fprintf(logfile,"  Coulomb grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
         fprintf(logfile,"  Coulomb stencil order = %d\n",order);
         fprintf(logfile,"  Coulomb estimated absolute RMS force accuracy = %g\n",
                 acc);
         fprintf(logfile,"  Coulomb estimated relative force accuracy = %g\n",
                 acc/two_charge_force);
         fprintf(logfile,"  using %s precision FFTs\n",fft_prec);
         fprintf(logfile,"  3d grid and FFT values/proc = %d %d\n",
 		ngrid_max, nfft_both_max);
       }
     }
   }
 
   iteration = 0;
   if (function[1] + function[2]) {
     CommGrid *cgtmp = NULL;
     while (order_6 >= minorder) {
 
       if (iteration && me == 0)
           error->warning(FLERR,"Reducing PPPMDisp Dispersion order b/c stencil extends "
   		     "beyond neighbor processor");
       iteration++;
 
       set_grid_6();
    
       if (nx_pppm_6 >= OFFSET || ny_pppm_6 >= OFFSET || nz_pppm_6 >= OFFSET)
       error->all(FLERR,"PPPMDisp Dispersion grid is too large");
 
       set_fft_parameters(nx_pppm_6, ny_pppm_6, nz_pppm_6,
                          nxlo_fft_6, nylo_fft_6, nzlo_fft_6,
                          nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
                          nxlo_in_6, nylo_in_6, nzlo_in_6,
                          nxhi_in_6, nyhi_in_6, nzhi_in_6,
                          nxlo_out_6, nylo_out_6, nzlo_out_6,
                          nxhi_out_6, nyhi_out_6, nzhi_out_6,
                          nlower_6, nupper_6,
                          ngrid_6, nfft_6, nfft_both_6,
                          shift_6, shiftone_6, order_6);
 
       if (overlap_allowed) break;
 
       cgtmp = new CommGrid(lmp,world,1,1,
                             nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                             nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                             procneigh[0][0],procneigh[0][1],procneigh[1][0],
                             procneigh[1][1],procneigh[2][0],procneigh[2][1]);
       cgtmp->ghost_notify();
       if (!cgtmp->ghost_overlap()) break;
       delete cgtmp;
       order_6--;
     }
 
     if (order_6 < minorder) error->all(FLERR,"Dispersion PPPMDisp order has been reduced below minorder");
     if (cgtmp) delete cgtmp;
 
     // adjust g_ewald_6
 
     if (!gewaldflag_6) adjust_gewald_6();
 
     // calculate the final accuracy
 
     double acc = final_accuracy_6();
 
 
     // print stats
 
     int ngrid_max,nfft_both_max,nbuf_max;
     MPI_Allreduce(&ngrid_6,&ngrid_max,1,MPI_INT,MPI_MAX,world);
     MPI_Allreduce(&nfft_both_6,&nfft_both_max,1,MPI_INT,MPI_MAX,world);
 
     if (me == 0) {
     #ifdef FFT_SINGLE
       const char fft_prec[] = "single";
     #else
       const char fft_prec[] = "double";
     #endif
   
       if (screen) {
         fprintf(screen,"  Dispersion G vector (1/distance)= %g\n",g_ewald_6);
         fprintf(screen,"  Dispersion grid = %d %d %d\n",nx_pppm_6,ny_pppm_6,nz_pppm_6);
         fprintf(screen,"  Dispersion stencil order = %d\n",order_6);
         fprintf(screen,"  Dispersion estimated absolute RMS force accuracy = %g\n",
                 acc);
         fprintf(screen,"  Dispersion estimated relative force accuracy = %g\n",
                 acc/two_charge_force);
         fprintf(screen,"  using %s precision FFTs\n",fft_prec);
         fprintf(screen,"  3d grid and FFT values/proc dispersion = %d %d\n",
                           ngrid_max,nfft_both_max);
       }
       if (logfile) {
         fprintf(logfile,"  Dispersion G vector (1/distance) = %g\n",g_ewald_6);
         fprintf(logfile,"  Dispersion grid = %d %d %d\n",nx_pppm_6,ny_pppm_6,nz_pppm_6);
         fprintf(logfile,"  Dispersion stencil order = %d\n",order_6);
         fprintf(logfile,"  Dispersion estimated absolute RMS force accuracy = %g\n",
                 acc);
         fprintf(logfile,"  Disperion estimated relative force accuracy = %g\n",
                 acc/two_charge_force);
         fprintf(logfile,"  using %s precision FFTs\n",fft_prec);
         fprintf(logfile,"  3d grid and FFT values/proc dispersion = %d %d\n",
                            ngrid_max,nfft_both_max);
       }
     }
   }
  
   // prepare the splitting of the Fourier Transformed vectors
 
   if (function[2]) prepare_splitting();
 
   // allocate K-space dependent memory
   allocate();
 
   // pre-compute Green's function denomiator expansion
   // pre-compute 1d charge distribution coefficients
   if (function[0]) {
     compute_gf_denom(gf_b, order);
     compute_rho_coeff(rho_coeff, drho_coeff, order);
     cg->ghost_notify();
     cg->setup();
     if (differentiation_flag == 1)
       compute_sf_precoeff(nx_pppm, ny_pppm, nz_pppm, order,
                           nxlo_fft, nylo_fft, nzlo_fft, 
                           nxhi_fft, nyhi_fft, nzhi_fft,
                           sf_precoeff1, sf_precoeff2, sf_precoeff3,
                           sf_precoeff4, sf_precoeff5, sf_precoeff6);
   }
   if (function[1] + function[2]) {
     compute_gf_denom(gf_b_6, order_6);
     compute_rho_coeff(rho_coeff_6, drho_coeff_6, order_6);
     cg_6->ghost_notify();
     cg_6->setup();
     if (differentiation_flag == 1)
       compute_sf_precoeff(nx_pppm_6, ny_pppm_6, nz_pppm_6, order_6,
                           nxlo_fft_6, nylo_fft_6, nzlo_fft_6, 
                           nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
                           sf_precoeff1_6, sf_precoeff2_6, sf_precoeff3_6,
                           sf_precoeff4_6, sf_precoeff5_6, sf_precoeff6_6);
   }
 
 }
 
 /* ----------------------------------------------------------------------
    adjust PPPM coeffs, called initially and whenever volume has changed 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::setup()
 {
   double *prd;
 
   // volume-dependent factors
   // adjust z dimension for 2d slab PPPM
   // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
   volume = xprd * yprd * zprd_slab;
 
  // compute fkx,fky,fkz for my FFT grid pts
 
   double unitkx = (2.0*MY_PI/xprd);
   double unitky = (2.0*MY_PI/yprd);
   double unitkz = (2.0*MY_PI/zprd_slab);
 
   //compute the virial coefficients and green functions
   if (function[0]){
 
     delxinv = nx_pppm/xprd;
     delyinv = ny_pppm/yprd;
     delzinv = nz_pppm/zprd_slab;
 
     delvolinv = delxinv*delyinv*delzinv;
 
     double per;
     int i, j, k, n;
 
     for (i = nxlo_fft; i <= nxhi_fft; i++) {
       per = i - nx_pppm*(2*i/nx_pppm);
       fkx[i] = unitkx*per;
       j = (nx_pppm - i) % nx_pppm;
       per = j - nx_pppm*(2*j/nx_pppm);
       fkx2[i] = unitkx*per;
     }
 
     for (i = nylo_fft; i <= nyhi_fft; i++) {
       per = i - ny_pppm*(2*i/ny_pppm);
       fky[i] = unitky*per;
       j = (ny_pppm - i) % ny_pppm;
       per = j - ny_pppm*(2*j/ny_pppm);
       fky2[i] = unitky*per;
     }
 
     for (i = nzlo_fft; i <= nzhi_fft; i++) {
       per = i - nz_pppm*(2*i/nz_pppm);
       fkz[i] = unitkz*per;
       j = (nz_pppm - i) % nz_pppm;
       per = j - nz_pppm*(2*j/nz_pppm);
       fkz2[i] = unitkz*per;
     }
 
     double sqk,vterm;
     double gew2inv = 1/(g_ewald*g_ewald);
     n = 0;
     for (k = nzlo_fft; k <= nzhi_fft; k++) {
       for (j = nylo_fft; j <= nyhi_fft; j++) {
         for (i = nxlo_fft; i <= nxhi_fft; i++) {
 	  sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k];
 	  if (sqk == 0.0) {
 	    vg[n][0] = 0.0;
 	    vg[n][1] = 0.0;
 	    vg[n][2] = 0.0;
 	    vg[n][3] = 0.0;
 	    vg[n][4] = 0.0;
 	    vg[n][5] = 0.0;
 	  } else {
 	    vterm = -2.0 * (1.0/sqk + 0.25*gew2inv);
 	    vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i];
 	    vg[n][1] = 1.0 + vterm*fky[j]*fky[j];
 	    vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k];
 	    vg[n][3] = vterm*fkx[i]*fky[j];
 	    vg[n][4] = vterm*fkx[i]*fkz[k];
 	    vg[n][5] = vterm*fky[j]*fkz[k];
             vg2[n][0] = vterm*0.5*(fkx[i]*fky[j] + fkx2[i]*fky2[j]);
             vg2[n][1] = vterm*0.5*(fkx[i]*fkz[k] + fkx2[i]*fkz2[k]);
             vg2[n][2] = vterm*0.5*(fky[j]*fkz[k] + fky2[j]*fkz2[k]);
   	  }
 	  n++;
         }
       }
     }
     compute_gf();
     if (differentiation_flag == 1) compute_sf_coeff();
   }
 
   if (function[1] + function[2]) {
     delxinv_6 = nx_pppm_6/xprd;
     delyinv_6 = ny_pppm_6/yprd;
     delzinv_6 = nz_pppm_6/zprd_slab;
     delvolinv_6 = delxinv_6*delyinv_6*delzinv_6;
 
     double per;
     int i, j, k, n;
     for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
       per = i - nx_pppm_6*(2*i/nx_pppm_6);
       fkx_6[i] = unitkx*per;
       j = (nx_pppm_6 - i) % nx_pppm_6;
       per = j - nx_pppm_6*(2*j/nx_pppm_6);
       fkx2_6[i] = unitkx*per;
     }
     for (i = nylo_fft_6; i <= nyhi_fft_6; i++) {
       per = i - ny_pppm_6*(2*i/ny_pppm_6);
       fky_6[i] = unitky*per;
       j = (ny_pppm_6 - i) % ny_pppm_6;
       per = j - ny_pppm_6*(2*j/ny_pppm_6);
       fky2_6[i] = unitky*per;
     }
     for (i = nzlo_fft_6; i <= nzhi_fft_6; i++) {
       per = i - nz_pppm_6*(2*i/nz_pppm_6);
       fkz_6[i] = unitkz*per;
       j = (nz_pppm_6 - i) % nz_pppm_6;
       per = j - nz_pppm_6*(2*j/nz_pppm_6);
       fkz2_6[i] = unitkz*per;
     }
     double sqk,vterm;
     long double erft, expt,nom, denom;
     long double b, bs, bt;
     double rtpi = sqrt(MY_PI);
     double gewinv = 1/g_ewald_6;
     n = 0;
     for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) {
       for (j = nylo_fft_6; j <= nyhi_fft_6; j++) {
         for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
 	  sqk = fkx_6[i]*fkx_6[i] + fky_6[j]*fky_6[j] + fkz_6[k]*fkz_6[k];
 	  if (sqk == 0.0) {
 	    vg_6[n][0] = 0.0;
 	    vg_6[n][1] = 0.0;
 	    vg_6[n][2] = 0.0;
 	    vg_6[n][3] = 0.0;
 	    vg_6[n][4] = 0.0;
 	    vg_6[n][5] = 0.0;
 	  } else {
             b = 0.5*sqrt(sqk)*gewinv;
             bs = b*b;
             bt = bs*b;
             erft = 2*bt*rtpi*erfc(b);
             expt = exp(-bs);
             nom = erft - 2*bs*expt;
             denom = nom + expt;
             if (denom == 0) vterm = 3.0/sqk;
             else vterm = 3.0*nom/(sqk*denom);
 	    vg_6[n][0] = 1.0 + vterm*fkx_6[i]*fkx_6[i];
 	    vg_6[n][1] = 1.0 + vterm*fky_6[j]*fky_6[j];
 	    vg_6[n][2] = 1.0 + vterm*fkz_6[k]*fkz_6[k];
 	    vg_6[n][3] = vterm*fkx_6[i]*fky_6[j];
 	    vg_6[n][4] = vterm*fkx_6[i]*fkz_6[k];
 	    vg_6[n][5] = vterm*fky_6[j]*fkz_6[k];
             vg2_6[n][0] = vterm*0.5*(fkx_6[i]*fky_6[j] + fkx2_6[i]*fky2_6[j]);
             vg2_6[n][1] = vterm*0.5*(fkx_6[i]*fkz_6[k] + fkx2_6[i]*fkz2_6[k]);
             vg2_6[n][2] = vterm*0.5*(fky_6[j]*fkz_6[k] + fky2_6[j]*fkz2_6[k]);
 	  }
 	  n++;
         }
       }
     }
     compute_gf_6();
     if (differentiation_flag == 1) compute_sf_coeff_6();
   }
 }
 
 /* ----------------------------------------------------------------------
    reset local grid arrays and communication stencils
    called by fix balance b/c it changed sizes of processor sub-domains
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::setup_grid()
 {
   // free all arrays previously allocated
 
   deallocate();
   deallocate_peratom();
   peratom_allocate_flag = 0;
 
   // reset portion of global grid that each proc owns
   if (function[0])
     set_fft_parameters(nx_pppm, ny_pppm, nz_pppm,
                        nxlo_fft, nylo_fft, nzlo_fft,
                        nxhi_fft, nyhi_fft, nzhi_fft,
                        nxlo_in, nylo_in, nzlo_in,
                        nxhi_in, nyhi_in, nzhi_in,
                        nxlo_out, nylo_out, nzlo_out,
                        nxhi_out, nyhi_out, nzhi_out,
                        nlower, nupper,
                        ngrid, nfft, nfft_both,
                        shift, shiftone, order);
 
   if (function[1] + function[2])
     set_fft_parameters(nx_pppm_6, ny_pppm_6, nz_pppm_6,
                        nxlo_fft_6, nylo_fft_6, nzlo_fft_6,
                        nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
                        nxlo_in_6, nylo_in_6, nzlo_in_6,
                        nxhi_in_6, nyhi_in_6, nzhi_in_6,
                        nxlo_out_6, nylo_out_6, nzlo_out_6,
                        nxhi_out_6, nyhi_out_6, nzhi_out_6,
                        nlower_6, nupper_6,
                        ngrid_6, nfft_6, nfft_both_6,
                        shift_6, shiftone_6, order_6);
 
   // reallocate K-space dependent memory
   // check if grid communication is now overlapping if not allowed
   // don't invoke allocate_peratom(), compute() will allocate when needed
 
   allocate();
 
   if (function[0]) {
     cg->ghost_notify();
     if (overlap_allowed == 0 && cg->ghost_overlap())
       error->all(FLERR,"PPPM grid stencil extends "
                  "beyond nearest neighbor processor");
     cg->setup();
   }
   if (function[1] + function[2]) {
     cg_6->ghost_notify();
     if (overlap_allowed == 0 && cg_6->ghost_overlap())
       error->all(FLERR,"PPPM grid stencil extends "
                  "beyond nearest neighbor processor");
     cg_6->setup();
   }
 
   // pre-compute Green's function denomiator expansion
   // pre-compute 1d charge distribution coefficients
 
   if (function[0]) {
     compute_gf_denom(gf_b, order);
     compute_rho_coeff(rho_coeff, drho_coeff, order);
     if (differentiation_flag == 1) 
       compute_sf_precoeff(nx_pppm, ny_pppm, nz_pppm, order,
                           nxlo_fft, nylo_fft, nzlo_fft, 
                           nxhi_fft, nyhi_fft, nzhi_fft,
                           sf_precoeff1, sf_precoeff2, sf_precoeff3,
                           sf_precoeff4, sf_precoeff5, sf_precoeff6);
   }
   if (function[1] + function[2]) {
     compute_gf_denom(gf_b_6, order_6);
     compute_rho_coeff(rho_coeff_6, drho_coeff_6, order_6);
     if (differentiation_flag == 1)
       compute_sf_precoeff(nx_pppm_6, ny_pppm_6, nz_pppm_6, order_6,
                           nxlo_fft_6, nylo_fft_6, nzlo_fft_6, 
                           nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
                           sf_precoeff1_6, sf_precoeff2_6, sf_precoeff3_6,
                           sf_precoeff4_6, sf_precoeff5_6, sf_precoeff6_6);
   }
 
   // pre-compute volume-dependent coeffs
 
   setup();
 }
 
 /* ----------------------------------------------------------------------
    compute the PPPM long-range force, energy, virial 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::compute(int eflag, int vflag)
 {
 
   int i;
   // convert atoms from box to lamda coords
 
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = evflag_atom = eflag_global = vflag_global = 
 	 eflag_atom = vflag_atom = 0;
   
   if (evflag_atom && !peratom_allocate_flag) {
     allocate_peratom();
     if (function[0]) {
       cg_peratom->ghost_notify();
       cg_peratom->setup();
     }
     if (function[1] + function[2]) {
       cg_peratom_6->ghost_notify();
       cg_peratom_6->setup();
     }
     peratom_allocate_flag = 1;
   }
   
   if (triclinic == 0) boxlo = domain->boxlo;
   else {
     boxlo = domain->boxlo_lamda;
     domain->x2lamda(atom->nlocal);
   }
   // extend size of per-atom arrays if necessary
 
   if (atom->nlocal > nmax) {
 
     if (function[0]) memory->destroy(part2grid);
     if (function[1] + function[2]) memory->destroy(part2grid_6);
     nmax = atom->nmax;
     if (function[0]) memory->create(part2grid,nmax,3,"pppm/disp:part2grid");
     if (function[1] + function[2]) memory->create(part2grid_6,nmax,3,"pppm/disp:part2grid_6");
   }
 
   energy = 0.0;
   energy_1 = 0.0;
   energy_6 = 0.0;
   if (vflag) for (i = 0; i < 6; i++) virial_6[i] = virial_1[i] = 0.0;
   // find grid points for all my particles
   // distribute partcles' charges/dispersion coefficients on the grid
   // communication between processors and remapping two fft
   // Solution of poissons equation in k-space and backtransformation
   // communication between processors
   // calculation of forces
   if (function[0]) {
 
     //perfrom calculations for coulomb interactions only
     particle_map_c(delxinv, delyinv, delzinv, shift, part2grid, nupper, nlower,
                  nxlo_out, nylo_out, nzlo_out, nxhi_out, nyhi_out, nzhi_out);
     make_rho_c();
     cg->reverse_comm(this,REVERSE_RHO);
     brick2fft(nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in,
 	      density_brick, density_fft, work1,remap); 
     if (differentiation_flag == 1) {
       poisson_ad(work1, work2, density_fft, fft1, fft2,
                  nx_pppm, ny_pppm, nz_pppm, nfft,
                  nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft,
                  nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in,
                  energy_1, greensfn, 
                  virial_1, vg,vg2,
                  u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, v5_brick);
 
       cg->forward_comm(this,FORWARD_AD);
  
       fieldforce_c_ad(); 
 
       if (vflag_atom) cg_peratom->forward_comm(this, FORWARD_AD_PERATOM);
 
     } else {
       poisson_ik(work1, work2, density_fft, fft1, fft2,
                  nx_pppm, ny_pppm, nz_pppm, nfft,
                  nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft,
                  nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in,
                  energy_1, greensfn, 
 	         fkx, fky, fkz,fkx2, fky2, fkz2,
                  vdx_brick, vdy_brick, vdz_brick, virial_1, vg,vg2,
                  u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, v5_brick);
 
       cg->forward_comm(this, FORWARD_IK);
 
       fieldforce_c_ik(); 
 
       if (evflag_atom) cg_peratom->forward_comm(this, FORWARD_IK_PERATOM);
     }
     if (evflag_atom) fieldforce_c_peratom();
   }
 
   if (function[1]) {
     //perfrom calculations for geometric mixing
     particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6,
                  nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6);
     make_rho_g();
 
 
     cg_6->reverse_comm(this, REVERSE_RHO_G);
 
     brick2fft(nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
 	      density_brick_g, density_fft_g, work1_6,remap_6);
  
     if (differentiation_flag == 1) {
 
       poisson_ad(work1_6, work2_6, density_fft_g, fft1_6, fft2_6,
                  nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
                  nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
                  nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
                  energy_6, greensfn_6, 
                  virial_6, vg_6, vg2_6,
                  u_brick_g, v0_brick_g, v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g);
 
       cg_6->forward_comm(this,FORWARD_AD_G);
 
       fieldforce_g_ad();
 
       if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_G);
 
     } else {
       poisson_ik(work1_6, work2_6, density_fft_g, fft1_6, fft2_6,
                  nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
                  nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
                  nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
                  energy_6, greensfn_6, 
 	         fkx_6, fky_6, fkz_6,fkx2_6, fky2_6, fkz2_6,
                  vdx_brick_g, vdy_brick_g, vdz_brick_g, virial_6, vg_6, vg2_6,
                  u_brick_g, v0_brick_g, v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g);
  
       cg_6->forward_comm(this,FORWARD_IK_G);
  
       fieldforce_g_ik();
 
 
       if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_G);
     }
     if (evflag_atom) fieldforce_g_peratom();
   }
 
   if (function[2]) {
     //perform calculations for arithmetic mixing
     particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6,
                  nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6);
     make_rho_a();
 
     cg_6->reverse_comm(this, REVERSE_RHO_A);
 
     brick2fft_a();
 
     if ( differentiation_flag == 1) {
 
       poisson_ad(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6,
                  nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
                  nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
                  nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
                  energy_6, greensfn_6, 
                  virial_6, vg_6, vg2_6,
                  u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3);
       poisson_2s_ad(density_fft_a0, density_fft_a6,
                     u_brick_a0, v0_brick_a0, v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0,
                     u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6);
       poisson_2s_ad(density_fft_a1, density_fft_a5,
                     u_brick_a1, v0_brick_a1, v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1,
                     u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5);
       poisson_2s_ad(density_fft_a2, density_fft_a4,
                     u_brick_a2, v0_brick_a2, v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2,
                     u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4);
 
       cg_6->forward_comm(this, FORWARD_AD_A);
 
       fieldforce_a_ad();
 
       if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_AD_PERATOM_A);
 
     }  else {
     
       poisson_ik(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6,
                  nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
                  nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
                  nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
                  energy_6, greensfn_6, 
 	         fkx_6, fky_6, fkz_6,fkx2_6, fky2_6, fkz2_6,
                  vdx_brick_a3, vdy_brick_a3, vdz_brick_a3, virial_6, vg_6, vg2_6,
                  u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3);
       poisson_2s_ik(density_fft_a0, density_fft_a6,
                     vdx_brick_a0, vdy_brick_a0, vdz_brick_a0,
                     vdx_brick_a6, vdy_brick_a6, vdz_brick_a6,
                     u_brick_a0, v0_brick_a0, v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0,
                     u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6);
       poisson_2s_ik(density_fft_a1, density_fft_a5,
                     vdx_brick_a1, vdy_brick_a1, vdz_brick_a1,
                     vdx_brick_a5, vdy_brick_a5, vdz_brick_a5,
                     u_brick_a1, v0_brick_a1, v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1,
                     u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5);
       poisson_2s_ik(density_fft_a2, density_fft_a4,
                     vdx_brick_a2, vdy_brick_a2, vdz_brick_a2,
                     vdx_brick_a4, vdy_brick_a4, vdz_brick_a4,
                     u_brick_a2, v0_brick_a2, v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2,
                     u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4);
 
       cg_6->forward_comm(this, FORWARD_IK_A);
 
       fieldforce_a_ik();
 
       if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_A);
     }
     if (evflag_atom) fieldforce_a_peratom();
   }
 
   // sum energy across procs and add in volume-dependent term
 
   const double qscale = force->qqrd2e * scale;
   if (eflag_global) {
     double energy_all;
     MPI_Allreduce(&energy_1,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
     energy_1 = energy_all;
     MPI_Allreduce(&energy_6,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
     energy_6 = energy_all;
    
     energy_1 *= 0.5*volume;
     energy_6 *= 0.5*volume;
     
     energy_1 -= g_ewald*qsqsum/MY_PIS +
       MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
     energy_6 += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij +
       1.0/12.0*pow(g_ewald_6,6)*csum;
     energy_1 *= qscale;
   }
 
   // sum virial across procs
 
   if (vflag_global) {
     double virial_all[6];
     MPI_Allreduce(virial_1,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
     for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
     MPI_Allreduce(virial_6,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
     for (i = 0; i < 6; i++) virial[i] += 0.5*volume*virial_all[i];
     if (function[1]+function[2]){
       double a =  MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij;
       virial[0] -= a;
       virial[1] -= a;
       virial[2] -= a;
     }
   }
 
   if (eflag_atom) {
     if (function[0]) {
       double *q = atom->q;
       for (i = 0; i < atom->nlocal; i++) {
         eatom[i] -= qscale*g_ewald*q[i]*q[i]/MY_PIS + qscale*MY_PI2*q[i]*qsum / (g_ewald*g_ewald*volume); //coulomb self energy correction
       }
     }
     if (function[1] + function[2]) {
       int tmp;
       for (i = 0; i < atom->nlocal; i++) {
         tmp = atom->type[i];
         eatom[i] += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp] +
                       1.0/12.0*pow(g_ewald_6,6)*cii[tmp];
       }
     }
   }
             
   if (vflag_atom) {
     if (function[1] + function[2]) {
       int tmp;
       for (i = 0; i < atom->nlocal; i++) {
         tmp = atom->type[i];
         for (int n = 0; n < 3; n++) vatom[i][n] -= MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp]; //dispersion self virial correction
       }
     }
   }
 
 
   // 2d slab correction
 
   if (slabflag) slabcorr(eflag);
   energy = energy_1 + energy_6;
 
   // convert atoms back from lamda to box coords
   
   if (triclinic) domain->lamda2x(atom->nlocal);
 }
 
 /* ----------------------------------------------------------------------
    initialize coefficients needed for the dispersion density on the grids
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::init_coeffs()				// local pair coeffs
 {
   int tmp;
   int n = atom->ntypes;
   delete [] B;
   if (function[1]) {					// geometric 1/r^6
     double **b = (double **) force->pair->extract("B",tmp);
     B = new double[n+1];
     for (int i=0; i<=n; ++i) B[i] = sqrt(fabs(b[i][i]));
   }
   if (function[2]) {					// arithmetic 1/r^6
     //cannot use epsilon, because this has not been set yet
     double **epsilon = (double **) force->pair->extract("epsilon",tmp);  
     //cannot use sigma, because this has not been set yet
     double **sigma = (double **) force->pair->extract("sigma",tmp);  
     if (!(epsilon&&sigma))
-      error->all(FLERR,"epsilon or sigma reference not set by pair style in PPPMDisp");
+      error->all(FLERR,"Epsilon or sigma reference not set by pair style in PPPMDisp");
     double eps_i, sigma_i, sigma_n, *bi = B = new double[7*n+7];
     double c[7] = {
       1.0, sqrt(6.0), sqrt(15.0), sqrt(20.0), sqrt(15.0), sqrt(6.0), 1.0};
     for (int i=0; i<=n; ++i) {
       eps_i = sqrt(epsilon[i][i]);
       sigma_i = sigma[i][i];
       sigma_n = 1.0;
       for (int j=0; j<7; ++j) {
 	*(bi++) = sigma_n*eps_i*c[j]*0.25;
         sigma_n *= sigma_i;
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    allocate memory that depends on # of K-vectors and order 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::allocate()
 {
 
   int (*procneigh)[2] = comm->procneigh;
 
   if (function[0]) {
     memory->create(work1,2*nfft_both,"pppm/disp:work1");
     memory->create(work2,2*nfft_both,"pppm/disp:work2");
 
     memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm/disp:fkx");
     memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm/disp:fky");
     memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm/disp:fkz");
 
     memory->create1d_offset(fkx2,nxlo_fft,nxhi_fft,"pppm/disp:fkx2");
     memory->create1d_offset(fky2,nylo_fft,nyhi_fft,"pppm/disp:fky2");
     memory->create1d_offset(fkz2,nzlo_fft,nzhi_fft,"pppm/disp:fkz2");
 
 
     memory->create(gf_b,order,"pppm/disp:gf_b");
     memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm/disp:rho1d");
     memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm/disp:rho_coeff");
     memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm/disp:rho1d");
     memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2,"pppm/disp:drho_coeff");
 
     memory->create(greensfn,nfft_both,"pppm/disp:greensfn");
     memory->create(vg,nfft_both,6,"pppm/disp:vg");
     memory->create(vg2,nfft_both,3,"pppm/disp:vg2");
 
     memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
   			    nxlo_out,nxhi_out,"pppm/disp:density_brick");
     if ( differentiation_flag == 1) {
       memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
   		  	      nxlo_out,nxhi_out,"pppm/disp:u_brick");
       memory->create(sf_precoeff1,nfft_both,"pppm/disp:sf_precoeff1");
       memory->create(sf_precoeff2,nfft_both,"pppm/disp:sf_precoeff2");
       memory->create(sf_precoeff3,nfft_both,"pppm/disp:sf_precoeff3");
       memory->create(sf_precoeff4,nfft_both,"pppm/disp:sf_precoeff4");
       memory->create(sf_precoeff5,nfft_both,"pppm/disp:sf_precoeff5");
       memory->create(sf_precoeff6,nfft_both,"pppm/disp:sf_precoeff6");
 
     } else {
       memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
   			      nxlo_out,nxhi_out,"pppm/disp:vdx_brick");
       memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
 			      nxlo_out,nxhi_out,"pppm/disp:vdy_brick");
       memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
 			      nxlo_out,nxhi_out,"pppm/disp:vdz_brick");
     }
     memory->create(density_fft,nfft_both,"pppm/disp:density_fft");
 
     int tmp;
 
     fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
 		     nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
 		     nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
 		     0,0,&tmp);
 
     fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
 		     nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
 		     nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
 		     0,0,&tmp);
 
     remap = new Remap(lmp,world,
 		      nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
 		      nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
 		      1,0,0,FFT_PRECISION);
 
   // create ghost grid object for rho and electric field communication
 
   if (differentiation_flag == 1)
     cg = new CommGrid(lmp,world,1,1,
                       nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                       nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                       procneigh[0][0],procneigh[0][1],procneigh[1][0],
                       procneigh[1][1],procneigh[2][0],procneigh[2][1]);
   else
     cg = new CommGrid(lmp,world,3,1,
                       nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                       nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                       procneigh[0][0],procneigh[0][1],procneigh[1][0],
                       procneigh[1][1],procneigh[2][0],procneigh[2][1]);
   }
 
   if (function[1]) {
     memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6");
     memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6");
 
     memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6");
     memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6");
     memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6");
 
     memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6");
     memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6");
     memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6");
 
     memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6");
     memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6");
     memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6");
     memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6");
     memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6");
 
     memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6");
     memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6");
     memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6");
 
     memory->create3d_offset(density_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_g");
     if ( differentiation_flag == 1) {
       memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g");
 
       memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6");
       memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6");
       memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6");
       memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6");
       memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6");
       memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6");
 
     }  else {
       memory->create3d_offset(vdx_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_g");
       memory->create3d_offset(vdy_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_g");
       memory->create3d_offset(vdz_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_g");
     }
     memory->create(density_fft_g,nfft_both_6,"pppm/disp:density_fft_g");
 
 
     int tmp;
 
     fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
 		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
 		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
 		     0,0,&tmp);
 
     fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
 		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
 		     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
 		     0,0,&tmp);
 
     remap_6 = new Remap(lmp,world,
 		      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
 		      nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
 		      1,0,0,FFT_PRECISION);
 
     // create ghost grid object for rho and electric field communication
 
     if (differentiation_flag == 1)
       cg_6 = new CommGrid(lmp,world,1,1,
                         nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                         nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                         procneigh[0][0],procneigh[0][1],procneigh[1][0],
                         procneigh[1][1],procneigh[2][0],procneigh[2][1]);
     else
       cg_6 = new CommGrid(lmp,world,3,1,
                         nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                         nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                         procneigh[0][0],procneigh[0][1],procneigh[1][0],
                         procneigh[1][1],procneigh[2][0],procneigh[2][1]);
   }
 
   if (function[2]) {
     memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6");
     memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6");
 
     memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6");
     memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6");
     memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6");
 
     memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6");
     memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6");
     memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6");
 
     memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6");
     memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6");
     memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6");
     memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6");
     memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6");
 
     memory->create(split_1,2*nfft_both_6 , "pppm/disp:split_1");
     memory->create(split_2,2*nfft_both_6 , "pppm/disp:split_2");
     memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6");
     memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6");
     memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6");
 
     memory->create3d_offset(density_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a0");
     memory->create3d_offset(density_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a1");
     memory->create3d_offset(density_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a2");
     memory->create3d_offset(density_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a3");
     memory->create3d_offset(density_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a4");
     memory->create3d_offset(density_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a5");
     memory->create3d_offset(density_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a6");
 
     memory->create(density_fft_a0,nfft_both_6,"pppm/disp:density_fft_a0");
     memory->create(density_fft_a1,nfft_both_6,"pppm/disp:density_fft_a1");
     memory->create(density_fft_a2,nfft_both_6,"pppm/disp:density_fft_a2");
     memory->create(density_fft_a3,nfft_both_6,"pppm/disp:density_fft_a3");
     memory->create(density_fft_a4,nfft_both_6,"pppm/disp:density_fft_a4");
     memory->create(density_fft_a5,nfft_both_6,"pppm/disp:density_fft_a5");
     memory->create(density_fft_a6,nfft_both_6,"pppm/disp:density_fft_a6");
 
 
     if ( differentiation_flag == 1 ) {
       memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a0");
       memory->create3d_offset(u_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a1");
       memory->create3d_offset(u_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a2");
       memory->create3d_offset(u_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a3");
       memory->create3d_offset(u_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a4");
       memory->create3d_offset(u_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a5");
       memory->create3d_offset(u_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a6");
 
       memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6");
       memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6");
       memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6");
       memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6");
       memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6");
       memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6");
 
     } else {
 
       memory->create3d_offset(vdx_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a0");
       memory->create3d_offset(vdy_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a0");
       memory->create3d_offset(vdz_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a0");
 
       memory->create3d_offset(vdx_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a1");
       memory->create3d_offset(vdy_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a1");
       memory->create3d_offset(vdz_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a1");
 
       memory->create3d_offset(vdx_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a2");
       memory->create3d_offset(vdy_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a2");
       memory->create3d_offset(vdz_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a2");
 
       memory->create3d_offset(vdx_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a3");
       memory->create3d_offset(vdy_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a3");
       memory->create3d_offset(vdz_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a3");
 
       memory->create3d_offset(vdx_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a4");
       memory->create3d_offset(vdy_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a4");
       memory->create3d_offset(vdz_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a4");
 
       memory->create3d_offset(vdx_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a5");
       memory->create3d_offset(vdy_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a5");
       memory->create3d_offset(vdz_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a5");
 
       memory->create3d_offset(vdx_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a6");
       memory->create3d_offset(vdy_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a6");
       memory->create3d_offset(vdz_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
 			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a6");
     }
 
 
 
     int tmp;
 
     fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
 		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
 		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
 		     0,0,&tmp);
 
     fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
 		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
 		     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
 		     0,0,&tmp);
 
     remap_6 = new Remap(lmp,world,
 		      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
 		      nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
 		      1,0,0,FFT_PRECISION);
 
     // create ghost grid object for rho and electric field communication
 
 
     if (differentiation_flag == 1)
       cg_6 = new CommGrid(lmp,world,7,7,
                         nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                         nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                         procneigh[0][0],procneigh[0][1],procneigh[1][0],
                         procneigh[1][1],procneigh[2][0],procneigh[2][1]);
     else
       cg_6 = new CommGrid(lmp,world,21,7,
                         nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                         nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                         procneigh[0][0],procneigh[0][1],procneigh[1][0],
                         procneigh[1][1],procneigh[2][0],procneigh[2][1]);
   }  
 }
 
 /* ----------------------------------------------------------------------
    allocate memory that depends on # of K-vectors and order
    for per atom calculations 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::allocate_peratom()
 {
 
   int (*procneigh)[2] = comm->procneigh;
 
   if (function[0]) {
 
     if (differentiation_flag != 1)
       memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
     	                      nxlo_out,nxhi_out,"pppm/disp:u_brick");
 
     memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
 			    nxlo_out,nxhi_out,"pppm/disp:v0_brick");
     memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
   			    nxlo_out,nxhi_out,"pppm/disp:v1_brick");
     memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
   			    nxlo_out,nxhi_out,"pppm/disp:v2_brick");
     memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
   			    nxlo_out,nxhi_out,"pppm/disp:v3_brick");
     memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
   			    nxlo_out,nxhi_out,"pppm/disp:v4_brick");
     memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
   			    nxlo_out,nxhi_out,"pppm/disp:v5_brick");
 
     // create ghost grid object for rho and electric field communication
 
     if (differentiation_flag == 1)
       cg_peratom =
         new CommGrid(lmp,world,6,1,
                      nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                      nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
                      procneigh[1][1],procneigh[2][0],procneigh[2][1]);
     else
       cg_peratom =
         new CommGrid(lmp,world,7,1,
                      nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                      nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
                      procneigh[1][1],procneigh[2][0],procneigh[2][1]);
 
   }
 
 
   if (function[1]) {
 
     if ( differentiation_flag != 1 )
       memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g");
 
     memory->create3d_offset(v0_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_g");
     memory->create3d_offset(v1_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_g");
     memory->create3d_offset(v2_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_g");
     memory->create3d_offset(v3_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_g");
     memory->create3d_offset(v4_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_g");
     memory->create3d_offset(v5_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_g");
 
     // create ghost grid object for rho and electric field communication
 
     if (differentiation_flag == 1)
       cg_peratom_6 =
         new CommGrid(lmp,world,6,1,
                      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                      nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
                      procneigh[1][1],procneigh[2][0],procneigh[2][1]);
     else
       cg_peratom_6 =
         new CommGrid(lmp,world,7,1,
                      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                      nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
                      procneigh[1][1],procneigh[2][0],procneigh[2][1]);
 
   }
 
   if (function[2]) {
    
     if ( differentiation_flag != 1 ) {
       memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a0");
       memory->create3d_offset(u_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a1");
       memory->create3d_offset(u_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a2");
       memory->create3d_offset(u_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a3");
       memory->create3d_offset(u_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a4");
       memory->create3d_offset(u_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a5");
       memory->create3d_offset(u_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a6");
     }
 
     memory->create3d_offset(v0_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a0");
     memory->create3d_offset(v1_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
     	                        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a0");
     memory->create3d_offset(v2_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a0");
     memory->create3d_offset(v3_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a0");
     memory->create3d_offset(v4_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a0");
     memory->create3d_offset(v5_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a0");
 
     memory->create3d_offset(v0_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a1");
     memory->create3d_offset(v1_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
    	                        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a1");
     memory->create3d_offset(v2_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a1");
     memory->create3d_offset(v3_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a1");
     memory->create3d_offset(v4_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   	  	                nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a1");
     memory->create3d_offset(v5_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a1");
 
     memory->create3d_offset(v0_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a2");
     memory->create3d_offset(v1_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a2");
     memory->create3d_offset(v2_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a2");
     memory->create3d_offset(v3_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a2");
     memory->create3d_offset(v4_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a2");
     memory->create3d_offset(v5_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a2");
 
     memory->create3d_offset(v0_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a3");
     memory->create3d_offset(v1_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a3");
     memory->create3d_offset(v2_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a3");
     memory->create3d_offset(v3_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   	  	                nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a3");
     memory->create3d_offset(v4_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a3");
     memory->create3d_offset(v5_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a3");
 
     memory->create3d_offset(v0_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a4");
     memory->create3d_offset(v1_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a4");
     memory->create3d_offset(v2_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a4");
     memory->create3d_offset(v3_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a4");
     memory->create3d_offset(v4_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a4");
     memory->create3d_offset(v5_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a4");
 
     memory->create3d_offset(v0_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a5");
     memory->create3d_offset(v1_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a5");
     memory->create3d_offset(v2_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a5");
     memory->create3d_offset(v3_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a5");
     memory->create3d_offset(v4_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a5");
     memory->create3d_offset(v5_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a5");
 
     memory->create3d_offset(v0_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   	  	                nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a6");
     memory->create3d_offset(v1_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a6");
     memory->create3d_offset(v2_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a6");
     memory->create3d_offset(v3_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a6");
     memory->create3d_offset(v4_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a6");
     memory->create3d_offset(v5_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a6");
 
     // create ghost grid object for rho and electric field communication
 
     if (differentiation_flag == 1)
       cg_peratom_6 =
         new CommGrid(lmp,world,42,1,
                      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                      nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
                      procneigh[1][1],procneigh[2][0],procneigh[2][1]);
     else
       cg_peratom_6 =
         new CommGrid(lmp,world,49,1,
                      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                      nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
                      procneigh[1][1],procneigh[2][0],procneigh[2][1]);
 
   }  
 }
 
 
 /* ----------------------------------------------------------------------
    deallocate memory that depends on # of K-vectors and order 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::deallocate()
 {
   memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out);
   memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out);
   memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out);
   memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out);
   memory->destroy(density_fft);
 
   memory->destroy3d_offset(density_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdx_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdy_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdz_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy(density_fft_g);
 
   memory->destroy3d_offset(density_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdx_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdy_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdz_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy(density_fft_a0);
 
   memory->destroy3d_offset(density_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdx_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdy_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdz_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy(density_fft_a1);
 
   memory->destroy3d_offset(density_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdx_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdy_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdz_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy(density_fft_a2);
 
   memory->destroy3d_offset(density_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdx_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdy_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdz_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy(density_fft_a3);
  
   memory->destroy3d_offset(density_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdx_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdy_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdz_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy(density_fft_a4);
  
   memory->destroy3d_offset(density_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdx_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdy_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdz_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy(density_fft_a5);
 
   memory->destroy3d_offset(density_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdx_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdy_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy3d_offset(vdz_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
   memory->destroy(density_fft_a6);
 
   memory->destroy(sf_precoeff1);
   memory->destroy(sf_precoeff2);
   memory->destroy(sf_precoeff3);
   memory->destroy(sf_precoeff4);
   memory->destroy(sf_precoeff5);
   memory->destroy(sf_precoeff6);
 
   memory->destroy(sf_precoeff1_6);
   memory->destroy(sf_precoeff2_6);
   memory->destroy(sf_precoeff3_6);
   memory->destroy(sf_precoeff4_6);
   memory->destroy(sf_precoeff5_6);
   memory->destroy(sf_precoeff6_6);
 
   memory->destroy(greensfn);
   memory->destroy(greensfn_6);
   memory->destroy(work1);
   memory->destroy(work2);
   memory->destroy(work1_6);
   memory->destroy(work2_6);
   memory->destroy(vg);
   memory->destroy(vg2);
   memory->destroy(vg_6);
   memory->destroy(vg2_6);
 
   memory->destroy1d_offset(fkx,nxlo_fft);
   memory->destroy1d_offset(fky,nylo_fft);
   memory->destroy1d_offset(fkz,nzlo_fft);
 
   memory->destroy1d_offset(fkx2,nxlo_fft);
   memory->destroy1d_offset(fky2,nylo_fft);
   memory->destroy1d_offset(fkz2,nzlo_fft);
 
   memory->destroy1d_offset(fkx_6,nxlo_fft_6);
   memory->destroy1d_offset(fky_6,nylo_fft_6);
   memory->destroy1d_offset(fkz_6,nzlo_fft_6);
 
   memory->destroy1d_offset(fkx2_6,nxlo_fft_6);
   memory->destroy1d_offset(fky2_6,nylo_fft_6);
   memory->destroy1d_offset(fkz2_6,nzlo_fft_6);
 
   memory->destroy(split_1);
   memory->destroy(split_2);
  
 
   memory->destroy(gf_b);
   memory->destroy2d_offset(rho1d,-order/2);
   memory->destroy2d_offset(rho_coeff,(1-order)/2);
   memory->destroy2d_offset(drho1d,-order/2);
   memory->destroy2d_offset(drho_coeff, (1-order)/2);
 
   memory->destroy(gf_b_6);
   memory->destroy2d_offset(rho1d_6,-order_6/2);
   memory->destroy2d_offset(rho_coeff_6,(1-order_6)/2);
   memory->destroy2d_offset(drho1d_6,-order_6/2); 
   memory->destroy2d_offset(drho_coeff_6,(1-order_6)/2);
 
   delete fft1;
   delete fft2;
   delete remap;
   delete cg;
 
   delete fft1_6;
   delete fft2_6;
   delete remap_6;
   delete cg_6;
 }
 
 
 /* ----------------------------------------------------------------------
    deallocate memory that depends on # of K-vectors and order
    for per atom calculations 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::deallocate_peratom()
 {
   memory->destroy3d_offset(u_brick, nzlo_out, nylo_out, nxlo_out);
   memory->destroy3d_offset(v0_brick, nzlo_out, nylo_out, nxlo_out);
   memory->destroy3d_offset(v1_brick, nzlo_out, nylo_out, nxlo_out);
   memory->destroy3d_offset(v2_brick, nzlo_out, nylo_out, nxlo_out);
   memory->destroy3d_offset(v3_brick, nzlo_out, nylo_out, nxlo_out);
   memory->destroy3d_offset(v4_brick, nzlo_out, nylo_out, nxlo_out);
   memory->destroy3d_offset(v5_brick, nzlo_out, nylo_out, nxlo_out);
 
   memory->destroy3d_offset(u_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v0_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v1_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v2_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v3_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v4_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v5_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
 
   memory->destroy3d_offset(u_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v0_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v1_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v2_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v3_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v4_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v5_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
 
   memory->destroy3d_offset(u_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v0_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v1_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v2_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v3_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v4_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v5_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
 
   memory->destroy3d_offset(u_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v0_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v1_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v2_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v3_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v4_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v5_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
 
   memory->destroy3d_offset(u_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v0_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v1_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v2_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v3_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v4_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v5_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
  
   memory->destroy3d_offset(u_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v0_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v1_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v2_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v3_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v4_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v5_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
  
   memory->destroy3d_offset(u_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v0_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v1_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v2_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v3_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v4_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v5_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
 
   memory->destroy3d_offset(u_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v0_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v1_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v2_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v3_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v4_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
   memory->destroy3d_offset(v5_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
 
   delete cg_peratom;
   delete cg_peratom_6;
 }
 
 /* ----------------------------------------------------------------------
    set size of FFT grid (nx,ny,nz_pppm) and g_ewald
    for Coulomb interactions
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::set_grid()
 {
   double q2 = qsqsum * force->qqrd2e / force->dielectric;
 
   // use xprd,yprd,zprd even if triclinic so grid size is the same
   // adjust z dimension for 2d slab PPPM
   // 3d PPPM just uses zprd since slab_volfactor = 1.0
 
   double xprd = domain->xprd;
   double yprd = domain->yprd;
   double zprd = domain->zprd;
   double zprd_slab = zprd*slab_volfactor;
   
   // make initial g_ewald estimate
   // based on desired accuracy and real space cutoff
   // fluid-occupied volume used to estimate real-space error
   // zprd used rather than zprd_slab
 
   double h, h_x,h_y,h_z;
   bigint natoms = atom->natoms;
 
   if (!gewaldflag) {
     g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2);
     if (g_ewald >= 1.0)  
       error->all(FLERR,"KSpace accuracy too large to estimate G vector");
     g_ewald = sqrt(-log(g_ewald)) / cutoff;
   } 
 
   // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy
   // nz_pppm uses extended zprd_slab instead of zprd
   // reduce it until accuracy target is met
 
   if (!gridflag) {
     h = h_x = h_y = h_z = 4.0/g_ewald;  
     int count = 0;
     while (1) {
       
       // set grid dimension
       nx_pppm = static_cast<int> (xprd/h_x);
       ny_pppm = static_cast<int> (yprd/h_y);
       nz_pppm = static_cast<int> (zprd_slab/h_z);
 
       if (nx_pppm <= 1) nx_pppm = 2;
       if (ny_pppm <= 1) ny_pppm = 2;
       if (nz_pppm <= 1) nz_pppm = 2;
 
       //set local grid dimension
       int npey_fft,npez_fft;
       if (nz_pppm >= nprocs) {
         npey_fft = 1;
         npez_fft = nprocs;
       } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
 
       int me_y = me % npey_fft;
       int me_z = me / npey_fft;
 
       nxlo_fft = 0;
       nxhi_fft = nx_pppm - 1;
       nylo_fft = me_y*ny_pppm/npey_fft;
       nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
       nzlo_fft = me_z*nz_pppm/npez_fft;
       nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
 
       double qopt = compute_qopt();
    
       double dfkspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);
 
       count++;
 
       // break loop if the accuracy has been reached or too many loops have been performed
       if (dfkspace <= accuracy) break;
-      if (count > 500) error->all(FLERR, "Could not compute grid size for Coulomb interaction!");
+      if (count > 500) 
+	error->all(FLERR,"Could not compute grid size for Coulomb interaction");
       h *= 0.95;
       h_x = h_y = h_z = h;
     }
   }
   
   // boost grid size until it is factorable
 
   while (!factorable(nx_pppm)) nx_pppm++;
   while (!factorable(ny_pppm)) ny_pppm++;
   while (!factorable(nz_pppm)) nz_pppm++;
 }
 
 /* ----------------------------------------------------------------------
    set the FFT parameters 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::set_fft_parameters(int& nx_p,int& ny_p,int& nz_p,
                                    int& nxlo_f,int& nylo_f,int& nzlo_f,
                                    int& nxhi_f,int& nyhi_f,int& nzhi_f,
                                    int& nxlo_i,int& nylo_i,int& nzlo_i,
                                    int& nxhi_i,int& nyhi_i,int& nzhi_i,
                                    int& nxlo_o,int& nylo_o,int& nzlo_o,
                                    int& nxhi_o,int& nyhi_o,int& nzhi_o,
 		                   int& nlow, int& nupp,
                                    int& ng, int& nf, int& nfb,
 		                   double& sft,double& sftone, int& ord)
 {
   // global indices of PPPM grid range from 0 to N-1
   // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
   //   global PPPM grid that I own without ghost cells
   // for slab PPPM, assign z grid as if it were not extended
 
   nxlo_i = static_cast<int> (comm->xsplit[comm->myloc[0]] * nx_p);
   nxhi_i = static_cast<int> (comm->xsplit[comm->myloc[0]+1] * nx_p) - 1;
 
   nylo_i = static_cast<int> (comm->ysplit[comm->myloc[1]] * ny_p);
   nyhi_i = static_cast<int> (comm->ysplit[comm->myloc[1]+1] * ny_p) - 1;
 
   nzlo_i = static_cast<int> 
       (comm->zsplit[comm->myloc[2]] * nz_p/slab_volfactor);
   nzhi_i = static_cast<int> 
       (comm->zsplit[comm->myloc[2]+1] * nz_p/slab_volfactor) - 1;
 
 
   // nlow,nupp = stencil size for mapping particles to PPPM grid
 
   nlow = -(ord-1)/2;
   nupp = ord/2;
 
   // sft values for particle <-> grid mapping
   // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
 
   if (ord % 2) sft = OFFSET + 0.5;
   else sft = OFFSET;
   if (ord % 2) sftone = 0.0;
   else sftone = 0.5;
 
   // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of
   //   global PPPM grid that my particles can contribute charge to
   // effectively nlo_in,nhi_in + ghost cells
   // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest
   //           position a particle in my box can be at
   // dist[3] = particle position bound = subbox + skin/2.0 + qdist
   //   qdist = offset due to TIP4P fictitious charge
   //   convert to triclinic if necessary
   // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping
   // for slab PPPM, assign z grid as if it were not extended
 
   double *prd,*sublo,*subhi;
 
   if (triclinic == 0) {
     prd = domain->prd;
     boxlo = domain->boxlo;
     sublo = domain->sublo;
     subhi = domain->subhi;
   } else {
     prd = domain->prd_lamda;
     boxlo = domain->boxlo_lamda;
     sublo = domain->sublo_lamda;
     subhi = domain->subhi_lamda;
   }
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
 
   double dist[3];
   double cuthalf = 0.5*neighbor->skin + qdist;
   if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf;
   else {
     dist[0] = cuthalf/domain->prd[0];
     dist[1] = cuthalf/domain->prd[1];
     dist[2] = cuthalf/domain->prd[2];
   }
     
   int nlo,nhi;
     
   nlo = static_cast<int> ((sublo[0]-dist[0]-boxlo[0]) * 
                             nx_p/xprd + sft) - OFFSET;
   nhi = static_cast<int> ((subhi[0]+dist[0]-boxlo[0]) * 
                             nx_p/xprd + sft) - OFFSET;
   nxlo_o = nlo + nlow;
   nxhi_o = nhi + nupp;
 
   nlo = static_cast<int> ((sublo[1]-dist[1]-boxlo[1]) * 
                             ny_p/yprd + sft) - OFFSET;
   nhi = static_cast<int> ((subhi[1]+dist[1]-boxlo[1]) * 
                             ny_p/yprd + sft) - OFFSET;
   nylo_o = nlo + nlow;
   nyhi_o = nhi + nupp;
 
   nlo = static_cast<int> ((sublo[2]-dist[2]-boxlo[2]) * 
                             nz_p/zprd_slab + sft) - OFFSET;
   nhi = static_cast<int> ((subhi[2]+dist[2]-boxlo[2]) * 
                             nz_p/zprd_slab + sft) - OFFSET;
   nzlo_o = nlo + nlow;
   nzhi_o = nhi + nupp;
 
   // for slab PPPM, change the grid boundary for processors at +z end
   //   to include the empty volume between periodically repeating slabs
   // for slab PPPM, want charge data communicated from -z proc to +z proc,
   //   but not vice versa, also want field data communicated from +z proc to
   //   -z proc, but not vice versa
   // this is accomplished by nzhi_i = nzhi_o on +z end (no ghost cells)
 
   if (slabflag && (comm->myloc[2] == comm->procgrid[2]-1)) {
     nzhi_i = nz_p - 1;
     nzhi_o = nz_p - 1;
   }
   
   // decomposition of FFT mesh
   // global indices range from 0 to N-1
   // proc owns entire x-dimension, clump of columns in y,z dimensions
   // npey_fft,npez_fft = # of procs in y,z dims
   // if nprocs is small enough, proc can own 1 or more entire xy planes,
   //   else proc owns 2d sub-blocks of yz plane
   // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions
   // nlo_fft,nhi_fft = lower/upper limit of the section
   //   of the global FFT mesh that I own
 
   int npey_fft,npez_fft;
   if (nz_p >= nprocs) {
     npey_fft = 1;
     npez_fft = nprocs;
   } else procs2grid2d(nprocs,ny_p,nz_p,&npey_fft,&npez_fft);
 
   int me_y = me % npey_fft;
   int me_z = me / npey_fft;
 
   nxlo_f = 0;
   nxhi_f = nx_p - 1;
   nylo_f = me_y*ny_p/npey_fft;
   nyhi_f = (me_y+1)*ny_p/npey_fft - 1;
   nzlo_f = me_z*nz_p/npez_fft;
   nzhi_f = (me_z+1)*nz_p/npez_fft - 1;
 
   // PPPM grid for this proc, including ghosts
 
   ng = (nxhi_o-nxlo_o+1) * (nyhi_o-nylo_o+1) *
     (nzhi_o-nzlo_o+1);
 
   // FFT arrays on this proc, without ghosts
   // nfft = FFT points in FFT decomposition on this proc
   // nfft_brick = FFT points in 3d brick-decomposition on this proc
   // nfft_both = greater of 2 values
 
   nf = (nxhi_f-nxlo_f+1) * (nyhi_f-nylo_f+1) *
     (nzhi_f-nzlo_f+1);
   int nfft_brick = (nxhi_i-nxlo_i+1) * (nyhi_i-nylo_i+1) *
     (nzhi_i-nzlo_i+1);
   nfb = MAX(nf,nfft_brick);
 
 }
 
 /* ----------------------------------------------------------------------
    check if all factors of n are in list of factors
    return 1 if yes, 0 if no 
 ------------------------------------------------------------------------- */
 
 int PPPMDisp::factorable(int n)
 {
   int i;
 
   while (n > 1) {
     for (i = 0; i < nfactors; i++) {
       if (n % factors[i] == 0) {
 	n /= factors[i];
 	break;
       }
     }
     if (i == nfactors) return 0;
   }
 
   return 1;
 }
 
 /* ----------------------------------------------------------------------
    pre-compute Green's function denominator expansion coeffs, Gamma(2n) 
 ------------------------------------------------------------------------- */
 void PPPMDisp::adjust_gewald()
 {
   
   // Use Newton solver to find g_ewald
 
   double dx;
         
   // Begin algorithm
   
   for (int i = 0; i < LARGE; i++) {
     dx = f() / derivf(); 
     g_ewald -= dx; //Update g_ewald
     if (fabs(f()) < SMALL) return;
   }
    
   // Failed to converge
   
   char str[128];
   sprintf(str, "Could not compute g_ewald");
   error->all(FLERR, str);
 
 }
 
 /* ----------------------------------------------------------------------
  Calculate f(x)
  ------------------------------------------------------------------------- */
 
 double PPPMDisp::f()
 {
   double df_rspace, df_kspace;
   double q2 = qsqsum * force->qqrd2e / force->dielectric;
   double xprd = domain->xprd;
   double yprd = domain->yprd;
   double zprd = domain->zprd;
   double zprd_slab = zprd*slab_volfactor;
   bigint natoms = atom->natoms;
 
   df_rspace = 2.0*q2*exp(-g_ewald*g_ewald*cutoff*cutoff) / 
        sqrt(natoms*cutoff*xprd*yprd*zprd);
    
   double qopt = compute_qopt();
   df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);
    
   return df_rspace - df_kspace;
 }
 
 /* ----------------------------------------------------------------------
  Calculate numerical derivative f'(x) using forward difference
  [f(x + h) - f(x)] / h
  ------------------------------------------------------------------------- */
             
 double PPPMDisp::derivf()
 {  
   double h = 0.000001;  //Derivative step-size
   double df,f1,f2,g_ewald_old;
   
   f1 = f();
   g_ewald_old = g_ewald;
   g_ewald += h;
   f2 = f();
   g_ewald = g_ewald_old;
   df = (f2 - f1)/h;
   
   return df;
 } 
 
 /* ----------------------------------------------------------------------
    Calculate the final estimator for the accuracy
 ------------------------------------------------------------------------- */
 
 double PPPMDisp::final_accuracy()
 {
   double df_rspace, df_kspace;
   double q2 = qsqsum * force->qqrd2e / force->dielectric;
   double xprd = domain->xprd;
   double yprd = domain->yprd;
   double zprd = domain->zprd;
   double zprd_slab = zprd*slab_volfactor;
   bigint natoms = atom->natoms;
   df_rspace = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) / 
              sqrt(natoms*cutoff*xprd*yprd*zprd);
 
   double qopt = compute_qopt();
 
   df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);
 
   double acc = sqrt(df_rspace*df_rspace + df_kspace*df_kspace);
   return acc;
 }
 
 /* ----------------------------------------------------------------------
    Calculate the final estimator for the Dispersion accuracy
 ------------------------------------------------------------------------- */
 
 double PPPMDisp::final_accuracy_6()
 {
   double df_rspace, df_kspace;
   double xprd = domain->xprd;
   double yprd = domain->yprd;
   double zprd = domain->zprd;
   double zprd_slab = zprd*slab_volfactor;
   bigint natoms = atom->natoms;
   df_rspace = lj_rspace_error();
 
   double qopt = compute_qopt_6();
 
   df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab);
 
   double acc = sqrt(df_rspace*df_rspace + df_kspace*df_kspace);
   return acc;
 }
 
 /* ----------------------------------------------------------------------
    Compute qopt for Coulomb interactions
 ------------------------------------------------------------------------- */
 
 double PPPMDisp::compute_qopt()
 {
   double qopt;
   if (differentiation_flag == 1) {
     qopt = compute_qopt_ad();
   } else {
     qopt = compute_qopt_ik();
   }
   double qopt_all;
   MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
   return qopt_all;
 }
 
 /* ----------------------------------------------------------------------
    Compute qopt for Dispersion interactions
 ------------------------------------------------------------------------- */
 
 double PPPMDisp::compute_qopt_6()
 {
   double qopt;
   if (differentiation_flag == 1) {
     qopt = compute_qopt_6_ad();
   } else {
     qopt = compute_qopt_6_ik();
   }
   double qopt_all;
   MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
   return qopt_all;
 }
 
 /* ----------------------------------------------------------------------
    Compute qopt for the ik differentiation scheme and Coulomb interaction
 ------------------------------------------------------------------------- */
 
 double PPPMDisp::compute_qopt_ik()
 {
   double qopt = 0.0;
   int k,l,m;
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
 
   double unitkx = (2.0*MY_PI/xprd);
   double unitky = (2.0*MY_PI/yprd);
   double unitkz = (2.0*MY_PI/zprd_slab);
 
   int nx,ny,nz,kper,lper,mper;
   double sqk, u2;
   double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
   double sum1,sum2, sum3,dot1,dot2;
 
   int nbx = 2;
   int nby = 2;
   int nbz = 2;
 
   for (m = nzlo_fft; m <= nzhi_fft; m++) {
     mper = m - nz_pppm*(2*m/nz_pppm);
 
     for (l = nylo_fft; l <= nyhi_fft; l++) {
       lper = l - ny_pppm*(2*l/ny_pppm);
 
       for (k = nxlo_fft; k <= nxhi_fft; k++) {
         kper = k - nx_pppm*(2*k/nx_pppm);
       
         sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + 
           pow(unitkz*mper,2.0);
 
         if (sqk != 0.0) {
           sum1 = 0.0;
           sum2 = 0.0;
           sum3 = 0.0;
           for (nx = -nbx; nx <= nbx; nx++) {
             qx = unitkx*(kper+nx_pppm*nx);
             sx = exp(-0.25*pow(qx/g_ewald,2.0));
             wx = 1.0;
             argx = 0.5*qx*xprd/nx_pppm;
             if (argx != 0.0) wx = pow(sin(argx)/argx,order);
             for (ny = -nby; ny <= nby; ny++) {
               qy = unitky*(lper+ny_pppm*ny);
               sy = exp(-0.25*pow(qy/g_ewald,2.0));
               wy = 1.0;
               argy = 0.5*qy*yprd/ny_pppm;
               if (argy != 0.0) wy = pow(sin(argy)/argy,order);
               for (nz = -nbz; nz <= nbz; nz++) {
                 qz = unitkz*(mper+nz_pppm*nz);
                 sz = exp(-0.25*pow(qz/g_ewald,2.0));
                 wz = 1.0;
                 argz = 0.5*qz*zprd_slab/nz_pppm;
                 if (argz != 0.0) wz = pow(sin(argz)/argz,order);
 
                 dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
                 dot2 = qx*qx+qy*qy+qz*qz;
                 u2 =  pow(wx*wy*wz,2.0);
                 sum1 += sx*sy*sz*sx*sy*sz/dot2*4.0*4.0*MY_PI*MY_PI;
                 sum2 += u2*sx*sy*sz*4.0*MY_PI/dot2*dot1;
 		sum3 += u2;
               }
             }
           }
 	  sum2 *= sum2;
 	  sum3 *= sum3*sqk;
           qopt += sum1 -sum2/sum3;
         }
       }
     }
   }
   return qopt;
 }
 
 /* ----------------------------------------------------------------------
    Compute qopt for the ad differentiation scheme and Coulomb interaction
 ------------------------------------------------------------------------- */
 
 double PPPMDisp::compute_qopt_ad()
 {
   double qopt = 0.0;
   int k,l,m;
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
 
 
   double unitkx = (2.0*MY_PI/xprd);
   double unitky = (2.0*MY_PI/yprd);
   double unitkz = (2.0*MY_PI/zprd_slab);
 
   int nx,ny,nz,kper,lper,mper;
   double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
   double u2, sqk;
   double sum1,sum2,sum3,sum4,dot2;
   double numerator;
 
   int nbx = 2;
   int nby = 2;
   int nbz = 2;
   double form = 1.0;
 
   for (m = nzlo_fft; m <= nzhi_fft; m++) {
     mper = m - nz_pppm*(2*m/nz_pppm);
 
     for (l = nylo_fft; l <= nyhi_fft; l++) {
       lper = l - ny_pppm*(2*l/ny_pppm);
 
       for (k = nxlo_fft; k <= nxhi_fft; k++) {
         kper = k - nx_pppm*(2*k/nx_pppm);
       
         sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + 
           pow(unitkz*mper,2.0);
 
         if (sqk != 0.0) {
           numerator = form*12.5663706;
     
           sum1 = 0.0;
           sum2 = 0.0;
           sum3 = 0.0;
           sum4 = 0.0;
           for (nx = -nbx; nx <= nbx; nx++) {
             qx = unitkx*(kper+nx_pppm*nx);
             sx = exp(-0.25*pow(qx/g_ewald,2.0));
             wx = 1.0;
             argx = 0.5*qx*xprd/nx_pppm;
             if (argx != 0.0) wx = pow(sin(argx)/argx,order);
             for (ny = -nby; ny <= nby; ny++) {
               qy = unitky*(lper+ny_pppm*ny);
               sy = exp(-0.25*pow(qy/g_ewald,2.0));
               wy = 1.0;
               argy = 0.5*qy*yprd/ny_pppm;
               if (argy != 0.0) wy = pow(sin(argy)/argy,order);
               for (nz = -nbz; nz <= nbz; nz++) {
                 qz = unitkz*(mper+nz_pppm*nz);
                 sz = exp(-0.25*pow(qz/g_ewald,2.0));
                 wz = 1.0;
                 argz = 0.5*qz*zprd_slab/nz_pppm;
                 if (argz != 0.0) wz = pow(sin(argz)/argz,order);
 
                 dot2 = qx*qx+qy*qy+qz*qz;
                 u2 =  pow(wx*wy*wz,2.0);
                 sum1 += sx*sy*sz*sx*sy*sz/dot2*4.0*4.0*MY_PI*MY_PI;
                 sum2 += sx*sy*sz * u2*4.0*MY_PI;
                 sum3 += u2;
                 sum4 += dot2*u2;
               }
             }
           }
           sum2 *= sum2;
           qopt += sum1 - sum2/(sum3*sum4);
         }
       }
     }
   }
   return qopt;
 }
 
 /* ----------------------------------------------------------------------
    Compute qopt for the ik differentiation scheme and Dispersion interaction
 ------------------------------------------------------------------------- */
 
 double PPPMDisp::compute_qopt_6_ik()
 {
   double qopt = 0.0;
   int k,l,m,n;
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
 
   double unitkx = (2.0*MY_PI/xprd);
   double unitky = (2.0*MY_PI/yprd);
   double unitkz = (2.0*MY_PI/zprd_slab);
 
   int nx,ny,nz,kper,lper,mper;
   double sqk, u2;
   double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
   double sum1,sum2, sum3;
   double dot1,dot2, rtdot2, term;
   double inv2ew = 2*g_ewald_6;
   inv2ew = 1.0/inv2ew;
   double rtpi = sqrt(MY_PI);
 
   int nbx = 2;
   int nby = 2;
   int nbz = 2;
 
   n = 0;
   for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
     mper = m - nz_pppm_6*(2*m/nz_pppm_6);
 
     for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
       lper = l - ny_pppm_6*(2*l/ny_pppm_6);
 
       for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
         kper = k - nx_pppm_6*(2*k/nx_pppm_6);
       
         sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + 
           pow(unitkz*mper,2.0);
 
         if (sqk != 0.0) {
           sum1 = 0.0;
           sum2 = 0.0;
           sum3 = 0.0;
           for (nx = -nbx; nx <= nbx; nx++) {
             qx = unitkx*(kper+nx_pppm_6*nx);
             sx = exp(-qx*qx*inv2ew*inv2ew);
             wx = 1.0;
             argx = 0.5*qx*xprd/nx_pppm_6;
             if (argx != 0.0) wx = pow(sin(argx)/argx,order_6);
             for (ny = -nby; ny <= nby; ny++) {
               qy = unitky*(lper+ny_pppm_6*ny);
               sy = exp(-qy*qy*inv2ew*inv2ew);
               wy = 1.0;
               argy = 0.5*qy*yprd/ny_pppm_6;
               if (argy != 0.0) wy = pow(sin(argy)/argy,order_6);
               for (nz = -nbz; nz <= nbz; nz++) {
                 qz = unitkz*(mper+nz_pppm_6*nz);
                 sz = exp(-qz*qz*inv2ew*inv2ew);
                 wz = 1.0;
                 argz = 0.5*qz*zprd_slab/nz_pppm_6;
                 if (argz != 0.0) wz = pow(sin(argz)/argz,order_6);
 
                 dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
                 dot2 = qx*qx+qy*qy+qz*qz;
                 rtdot2 = sqrt(dot2);
                 term = (1-2*dot2*inv2ew*inv2ew)*sx*sy*sz +
 		       2*dot2*rtdot2*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtdot2*inv2ew);
                 term *= g_ewald_6*g_ewald_6*g_ewald_6;
                 u2 =  pow(wx*wy*wz,2.0);
                 sum1 += term*term*MY_PI*MY_PI*MY_PI/9.0 * dot2;
                 sum2 += -u2*term*MY_PI*rtpi/3.0*dot1;
 		sum3 += u2;
               }
             }
           }
 	  sum2 *= sum2;
 	  sum3 *= sum3*sqk;
           qopt += sum1 -sum2/sum3;
         }
       }
     }
   }
   return qopt;
 }
 
 /* ----------------------------------------------------------------------
    Compute qopt for the ad differentiation scheme and Dispersion interaction
 ------------------------------------------------------------------------- */
 
 double PPPMDisp::compute_qopt_6_ad()
 {
   double qopt = 0.0;
   int k,l,m;
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
 
   double unitkx = (2.0*MY_PI/xprd);
   double unitky = (2.0*MY_PI/yprd);
   double unitkz = (2.0*MY_PI/zprd_slab);
 
   int nx,ny,nz,kper,lper,mper;
   double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
   double u2, sqk;
   double sum1,sum2,sum3,sum4;
   double dot2, rtdot2, term;
   double inv2ew = 2*g_ewald_6;
   inv2ew = 1/inv2ew;
   double rtpi = sqrt(MY_PI);
 
   int nbx = 2;
   int nby = 2;
   int nbz = 2;
 
   for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
     mper = m - nz_pppm_6*(2*m/nz_pppm_6);
 
     for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
       lper = l - ny_pppm_6*(2*l/ny_pppm_6);
 
       for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
         kper = k - nx_pppm_6*(2*k/nx_pppm_6);
       
         sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + 
           pow(unitkz*mper,2.0);
 
         if (sqk != 0.0) {
     
           sum1 = 0.0;
           sum2 = 0.0;
           sum3 = 0.0;
           sum4 = 0.0;
           for (nx = -nbx; nx <= nbx; nx++) {
             qx = unitkx*(kper+nx_pppm_6*nx);
             sx = exp(-qx*qx*inv2ew*inv2ew);
             wx = 1.0;
             argx = 0.5*qx*xprd/nx_pppm_6;
             if (argx != 0.0) wx = pow(sin(argx)/argx,order_6);
             for (ny = -nby; ny <= nby; ny++) {
               qy = unitky*(lper+ny_pppm_6*ny);
               sy = exp(-qy*qy*inv2ew*inv2ew);
               wy = 1.0;
               argy = 0.5*qy*yprd/ny_pppm_6;
               if (argy != 0.0) wy = pow(sin(argy)/argy,order_6);
               for (nz = -nbz; nz <= nbz; nz++) {
                 qz = unitkz*(mper+nz_pppm_6*nz);
                 sz = exp(-qz*qz*inv2ew*inv2ew);
                 wz = 1.0;
                 argz = 0.5*qz*zprd_slab/nz_pppm_6;
                 if (argz != 0.0) wz = pow(sin(argz)/argz,order_6);
 
                 dot2 = qx*qx+qy*qy+qz*qz;
                 rtdot2 = sqrt(dot2);
                 term = (1-2*dot2*inv2ew*inv2ew)*sx*sy*sz +
 		       2*dot2*rtdot2*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtdot2*inv2ew);
                 term *= g_ewald_6*g_ewald_6*g_ewald_6;
                 u2 =  pow(wx*wy*wz,2.0);
                 sum1 += term*term*MY_PI*MY_PI*MY_PI/9.0 * dot2;
                 sum2 += -term*MY_PI*rtpi/3.0 * u2 * dot2;
                 sum3 += u2;
                 sum4 += dot2*u2;
               }
             }
           }
           sum2 *= sum2;
           qopt += sum1 - sum2/(sum3*sum4);
         }
       }
     }
   }
   return qopt;
 }
 
 /* ----------------------------------------------------------------------
    set size of FFT grid  and g_ewald_6
    for Dispersion interactions
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::set_grid_6()
 {
   // Calculate csum
   if (!csumflag) calc_csum();
   if (!gewaldflag_6) set_init_g6();
   if (!gridflag_6) set_n_pppm_6();
   while (!factorable(nx_pppm_6)) nx_pppm_6++;
   while (!factorable(ny_pppm_6)) ny_pppm_6++;
   while (!factorable(nz_pppm_6)) nz_pppm_6++;
   
 }
 
 /* ----------------------------------------------------------------------
    Calculate the sum of the squared dispersion coefficients and other 
    related quantities required for the calculations
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::calc_csum()
 {
   csumij = 0.0;
   csum = 0.0;
 
   int ntypes = atom->ntypes;   
   int i,j,k;
 
   delete [] cii;
   cii = new double[ntypes +1];
   for (i = 0; i<=ntypes; i++) cii[i] = 0.0;
   delete [] csumi; 
   csumi = new double[ntypes +1];
   for (i = 0; i<=ntypes; i++) csumi[i] = 0.0; 
   int *neach = new int[ntypes+1];
   for (i = 0; i<=ntypes; i++) neach[i] = 0; 
 
   //the following variables are needed to distinguish between arithmetic
   //  and geometric mixing
 
   double mix1;    // scales 20/16 to 4
   int mix2;       // shifts the value to the sigma^3 value
   int mix3;       // shifts the value to the right atom type
   if (function[1]) {
     mix1 = 1;
     mix2 = 0;
     mix3 = 1;
   }
   if (function[2]) {
     mix1 = 64.0 / 20.0;
     mix2 = 3;
     mix3 = 7;
   }
   for (i = 1; i <= ntypes; i++) {
     cii[i] = mix1*B[mix3*i+mix2]*B[mix3*i+mix2];
   }
 
   int tmp;
   for (i = 0; i < atom->nlocal; i++) {
     tmp = atom->type[i];
     neach[tmp]++;
     csum += mix1*B[mix3*tmp+mix2]*B[mix3*tmp+mix2];    
   }
 
   double tmp2;
   MPI_Allreduce(&csum,&tmp2,1,MPI_DOUBLE,MPI_SUM,world);
   csum = tmp2;
   csumflag = 1;
 
   int *neach_all = new int[ntypes+1];
   MPI_Allreduce(neach,neach_all,ntypes+1,MPI_INT,MPI_SUM,world);
 
   // copmute csumij and csumi
 
   if (function[1]){
     for (i=1; i<=ntypes; i++) {
       for (j=1; j<=ntypes; j++) {
         csumi[i] += neach_all[j]*B[i]*B[j];
         csumij += neach_all[i]*neach_all[j]*B[i]*B[j]; 
       }
     }
   } else {
     for (i=1; i<=ntypes; i++) {
       for (j=1; j<=ntypes; j++) {
         for (k=0; k<=6; k++) {
           csumi[i] += neach_all[j]*B[7*i + k]*B[7*(j+1)-k-1];
           csumij += neach_all[i]*neach_all[j]*B[7*i + k]*B[7*(j+1)-k-1];
         }
       }
     }
   }    
 
   delete [] neach;
   delete [] neach_all;
 }
 
 /* ----------------------------------------------------------------------
    adjust g_ewald_6 to the new grid size
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::adjust_gewald_6()
 {
   // Use Newton solver to find g_ewald_6
   double dx;
 
   // Start loop
 
   for (int i = 0; i <  LARGE; i++) {
     dx = f_6() / derivf_6();
     g_ewald_6 -= dx; //update g_ewald_6
     if (fabs(f_6()) < SMALL) return;
   }
 
   // Failed to converge
 
   char str[128];
   sprintf(str, "Could not adjust g_ewald_6");
   error->all(FLERR, str);
 
 }
 
 /* ----------------------------------------------------------------------
  Calculate f(x) for Dispersion interaction
  ------------------------------------------------------------------------- */
 
 double PPPMDisp::f_6()
 {
   double df_rspace, df_kspace;
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
   bigint natoms = atom->natoms;
 
   df_rspace = lj_rspace_error();
    
   double qopt = compute_qopt_6();
   df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab);
    
   return df_rspace - df_kspace;
 }
 
 /* ----------------------------------------------------------------------
  Calculate numerical derivative f'(x) using forward difference
  [f(x + h) - f(x)] / h
  ------------------------------------------------------------------------- */
             
 double PPPMDisp::derivf_6()
 {  
   double h = 0.000001;  //Derivative step-size
   double df,f1,f2,g_ewald_old;
   
   f1 = f_6();
   g_ewald_old = g_ewald_6;
   g_ewald_6 += h;
   f2 = f_6();
   g_ewald_6 = g_ewald_old;
   df = (f2 - f1)/h;
   
   return df;
 } 
 
 
 /* ----------------------------------------------------------------------
    calculate an initial value for g_ewald_6
    ---------------------------------------------------------------------- */
 
 void PPPMDisp::set_init_g6()
 {
   // use xprd,yprd,zprd even if triclinic so grid size is the same
   // adjust z dimension for 2d slab PPPM
   // 3d PPPM just uses zprd since slab_volfactor = 1.0
 
   // make initial g_ewald estimate
   // based on desired error and real space cutoff
  
   // compute initial value for df_real with g_ewald_6 = 1/cutoff_lj
   // if df_real > 0, repeat divide g_ewald_6 by 2 until df_real < 0
   // else, repeat multiply g_ewald_6 by 2 until df_real > 0
   // perform bisection for the last two values of
   double df_real;
   double g_ewald_old; 
   double gmin, gmax;
 
   g_ewald_6 = 1.0/cutoff_lj;
   df_real = lj_rspace_error() - accuracy;
   int counter = 0;
   if (df_real > 0) {
     while (df_real > 0 && counter < LARGE) {
       counter++;
       g_ewald_old = g_ewald_6;
       g_ewald_6 *= 2;
       df_real = lj_rspace_error() - accuracy;
     }
   }
 
   if (df_real < 0) {
     while (df_real < 0 && counter < LARGE) {
       counter++;
       g_ewald_old = g_ewald_6;
       g_ewald_6 *= 0.5;
       df_real = lj_rspace_error() - accuracy;
     }
   }
 
   if (counter >= LARGE-1) error->all(FLERR,"Cannot compute initial g_ewald_disp");
 
   gmin = MIN(g_ewald_6, g_ewald_old);
   gmax = MAX(g_ewald_6, g_ewald_old);
   g_ewald_6 = gmin + 0.5*(gmax-gmin);
   counter = 0;
   while (gmax-gmin > SMALL && counter < LARGE) {
     counter++;
     df_real = lj_rspace_error() -accuracy;
     if (df_real < 0) gmax = g_ewald_6;
     else gmin = g_ewald_6;
     g_ewald_6 = gmin + 0.5*(gmax-gmin);
   }
   if (counter >= LARGE-1) error->all(FLERR,"Cannot compute initial g_ewald_disp");
 
 }
 
 /* ----------------------------------------------------------------------
    calculate nx_pppm, ny_pppm, nz_pppm for dispersion interaction
    ---------------------------------------------------------------------- */
 
 void PPPMDisp::set_n_pppm_6()
 {
   bigint natoms = atom->natoms;
 
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
   double h, h_x,h_y,h_z;
 
   // initial value for the grid spacing
   h = h_x = h_y = h_z = 4.0/g_ewald_6;
   // decrease grid spacing untill required precision is obtained
   int count = 0;
   while(1) {
   
     // set grid dimension
     nx_pppm_6 = static_cast<int> (xprd/h_x);
     ny_pppm_6 = static_cast<int> (yprd/h_y);
     nz_pppm_6 = static_cast<int> (zprd_slab/h_z);
 
     if (nx_pppm_6 <= 1) nx_pppm_6 = 2;
     if (ny_pppm_6 <= 1) ny_pppm_6 = 2;
     if (nz_pppm_6 <= 1) nz_pppm_6 = 2;
 
     //set local grid dimension
     int npey_fft,npez_fft;
     if (nz_pppm_6 >= nprocs) {
       npey_fft = 1;
       npez_fft = nprocs;
     } else procs2grid2d(nprocs,ny_pppm_6,nz_pppm_6,&npey_fft,&npez_fft);
 
     int me_y = me % npey_fft;
     int me_z = me / npey_fft;
 
     nxlo_fft_6 = 0;
     nxhi_fft_6 = nx_pppm_6 - 1;
     nylo_fft_6 = me_y*ny_pppm_6/npey_fft;
     nyhi_fft_6 = (me_y+1)*ny_pppm_6/npey_fft - 1;
     nzlo_fft_6 = me_z*nz_pppm_6/npez_fft;
     nzhi_fft_6 = (me_z+1)*nz_pppm_6/npez_fft - 1;
 
     double qopt = compute_qopt_6();
  
     double df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab);
 
     count++;
 
     // break loop if the accuracy has been reached or too many loops have been performed
     if (df_kspace <= accuracy) break;
     if (count > 500) error->all(FLERR, "Could not compute grid size for Dispersion!");
     h *= 0.95;
     h_x = h_y = h_z = h;
   }
 }
 
 /* ----------------------------------------------------------------------
    calculate the real space error for dispersion interactions
    ---------------------------------------------------------------------- */
 
 double PPPMDisp::lj_rspace_error()
 {
   bigint natoms = atom->natoms;
   double xprd = domain->xprd;
   double yprd = domain->yprd;
   double zprd = domain->zprd;
   double zprd_slab = zprd*slab_volfactor;
 
   double deltaf;
   double rgs = (cutoff_lj*g_ewald_6);
   rgs *= rgs;
   double rgs_inv = 1.0/rgs;
   deltaf = csum/sqrt(natoms*xprd*yprd*zprd_slab*cutoff_lj)*sqrt(MY_PI)*pow(g_ewald_6, 5)*
     exp(-rgs)*(1+rgs_inv*(3+rgs_inv*(6+rgs_inv*6)));
   return deltaf;
 }
 
 /* ----------------------------------------------------------------------
    make all preperations for later being able to rapidely split the
    fourier transformed vectors
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::prepare_splitting()
 {
   // allocate vectors
   // communication = stores how many points are exchanged with each processor
   // com_matrix, com_matrix_all = communication matrix between the processors
   // fftpoins stores the maximum and minimum value of the fft points of each proc
   int *communication;
   int **com_matrix;
   int **com_matrix_all;
   int **fftpoints;
  
   memory->create(communication, nprocs, "pppm/disp:communication");
   memory->create(com_matrix, nprocs, nprocs, "pppm/disp:com_matrix");
   memory->create(com_matrix_all, nprocs, nprocs, "pppm/disp:com_matrix_all");
   memory->create(fftpoints, nprocs, 4, "pppm/disp:fftpoints");
   memset(&(com_matrix[0][0]), 0, nprocs*nprocs*sizeof(int));
   memset(communication, 0, nprocs*sizeof(int));
  
   //// loop over all values of me to determine the fft_points
 
   int npey_fft,npez_fft;
   if (nz_pppm_6 >= nprocs) {
     npey_fft = 1;
     npez_fft = nprocs;
   } else procs2grid2d(nprocs,ny_pppm_6,nz_pppm_6,&npey_fft,&npez_fft);
 
   int me_y = me % npey_fft;
   int me_z = me / npey_fft;
 
   int i,m,n;
   for (m = 0; m < nprocs; m++) {
     me_y = m % npey_fft;
     me_z = m / npey_fft;
     fftpoints[m][0] = me_y*ny_pppm_6/npey_fft;
     fftpoints[m][1] = (me_y+1)*ny_pppm_6/npey_fft - 1;
     fftpoints[m][2] = me_z*nz_pppm_6/npez_fft;
     fftpoints[m][3] = (me_z+1)*nz_pppm_6/npez_fft - 1;
   }
 
   //// loop over all local fft points to find out on which processor its counterpart is!
   int x1,y1,z1,x2,y2,z2;
   for (x1 = nxlo_fft_6; x1 <= nxhi_fft_6; x1++)
     for (y1 = nylo_fft_6; y1 <= nyhi_fft_6; y1++) {
       y2 = (ny_pppm_6 - y1) % ny_pppm_6;
       for (z1 = nzlo_fft_6; z1 <= nzhi_fft_6; z1++) {
         z2 = (nz_pppm_6 - z1) % nz_pppm_6;
         m = -1;
         while (1) {
           m++;
           if (y2 >= fftpoints[m][0] && y2 <= fftpoints[m][1] &&
               z2 >= fftpoints[m][2] && z2 <= fftpoints[m][3] ) break;
         }
         communication[m]++;
         com_matrix[m][me] = 1;
         com_matrix[me][m] = 1;
       }
     }
 
   //// set com_max and com_procs
   //// com_max = maximum amount of points that have to be communicated with a processor
   //// com_procs = number of processors with which has to be communicated
   com_max = 0;
   com_procs = 0;
   for (m = 0; m < nprocs; m++) {
     com_max = MAX(com_max, communication[m]);
     com_procs += com_matrix[me][m];
   }
   if (!com_matrix[me][me]) com_procs++;
  
   //// allocate further vectors
   memory->create(splitbuf1, com_procs, com_max*2, "pppm/disp:splitbuf1");
   memory->create(splitbuf2, com_procs, com_max*2, "pppm/disp:splitbuf2");
   memory->create(dict_send, nfft_6, 2, "pppm/disp:dict_send");
   memory->create(dict_rec,com_procs, com_max, "pppm/disp:dict_rec");
   memory->create(com_each, com_procs, "pppm/disp:com_each");
   memory->create(com_order, com_procs, "pppm/disp:com_order");
  
   //// exchange communication matrix between the procs
   if (nprocs > 1){
     for (m = 0; m < nprocs; m++) MPI_Allreduce(com_matrix[m],
       com_matrix_all[m], nprocs, MPI_INT, MPI_MAX, world);
   }
   //// determine the communication order!!!
 
   split_order(com_matrix_all);
  
   //// fill com_each
   for (i = 0; i < com_procs; i++) com_each[i] = 2*communication[com_order[i]];
  
   int *com_send;
   memory->create(com_send, com_procs, "pppm/disp:com_send");
   memset(com_send, 0, com_procs*sizeof(int));
   int **changelist;
   memory->create(changelist, nfft_6, 5, "pppm/disp:changelist");
   int whichproc;
  
   //// loop over mesh points to fill dict_send
   n = 0;
   for (z1 = nzlo_fft_6; z1 <= nzhi_fft_6; z1++) {
     z2 = (nz_pppm_6 - z1) % nz_pppm_6;
     for (y1 = nylo_fft_6; y1 <= nyhi_fft_6; y1++) {
       y2 = (ny_pppm_6 - y1) % ny_pppm_6;
       for (x1 = nxlo_fft_6; x1 <= nxhi_fft_6; x1++) {
         x2 = (nx_pppm_6 - x1) % nx_pppm_6;
         m = -1;
         while (1) {
           m++;
           if (y2 >= fftpoints[m][0] && y2 <= fftpoints[m][1] &&
               z2 >= fftpoints[m][2] && z2 <= fftpoints[m][3] ) break;
         }
         whichproc = -1;
         while (1) {
           whichproc++;
           if (m == com_order[whichproc]) break;
 	}
         dict_send[n][0] = whichproc;
         dict_send[n][1] = 2*com_send[whichproc]++;
         changelist[n][0] = x2;
         changelist[n][1] = y2;
         changelist[n][2] = z2;
         changelist[n][3] = n;;
         changelist[n++][4] = whichproc;
       }
     }
   }
 
   //// change the order of changelist
   int changed;
   int help;
   int j, k, l;
   for ( l = 0; l < 3; l++) {
     k = nfft_6;
     changed = 1;
     while (k > 1 && changed == 1) {
       changed = 0;
       for (i = 0; i < k-1; i++) {
         if (changelist[i][l] > changelist[i+1][l]){
           for (j = 0; j < 5; j++) {
             help = changelist[i][j];
             changelist[i][j] = changelist[i+1][j];
             changelist[i+1][j] = help;
 	  }
           changed = 1;
         }
       }
       k = k - 1;
     }
   }
 
   //// determine the values for dict_rec
   memset(com_send, 0, com_procs*sizeof(int));
   for (n = 0; n < nfft_6; n++) {
     whichproc = changelist[n][4];
     dict_rec[whichproc][com_send[whichproc]++] = 2*changelist[n][3];
   }
 
   memory->destroy(communication);
   memory->destroy(com_matrix);
   memory->destroy(com_matrix_all);
   memory->destroy(fftpoints);
   memory->destroy(com_send);
   memory->destroy(changelist);
 }
 
 /* ----------------------------------------------------------------------
    Compyute the modified (hockney-eastwood) coulomb green function
    ---------------------------------------------------------------------- */ 
 
 void PPPMDisp::compute_gf()
 {
   int k,l,m,n;
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
   volume = xprd * yprd * zprd_slab;
 
   double unitkx = (2.0*MY_PI/xprd);
   double unitky = (2.0*MY_PI/yprd);
   double unitkz = (2.0*MY_PI/zprd_slab);
 
   int kper,lper,mper;
   double snx,sny,snz,snx2,sny2,snz2;
   double sqk;
   double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
   double numerator,denominator;
 
 
   n = 0;
   for (m = nzlo_fft; m <= nzhi_fft; m++) {
     mper = m - nz_pppm*(2*m/nz_pppm);
     qz = unitkz*mper;
     snz = sin(0.5*qz*zprd_slab/nz_pppm);
     snz2 = snz*snz;
     sz = exp(-0.25*pow(qz/g_ewald,2.0));
     wz = 1.0;
     argz = 0.5*qz*zprd_slab/nz_pppm;
     if (argz != 0.0) wz = pow(sin(argz)/argz,order);
     wz *= wz;
 
     for (l = nylo_fft; l <= nyhi_fft; l++) {
       lper = l - ny_pppm*(2*l/ny_pppm);
       qy = unitky*lper;
       sny = sin(0.5*qy*yprd/ny_pppm);
       sny2 = sny*sny;
       sy = exp(-0.25*pow(qy/g_ewald,2.0));
       wy = 1.0;
       argy = 0.5*qy*yprd/ny_pppm;
       if (argy != 0.0) wy = pow(sin(argy)/argy,order);
       wy *= wy;
 
       for (k = nxlo_fft; k <= nxhi_fft; k++) {
         kper = k - nx_pppm*(2*k/nx_pppm);
         qx = unitkx*kper;
         snx = sin(0.5*qx*xprd/nx_pppm);
         snx2 = snx*snx;
         sx = exp(-0.25*pow(qx/g_ewald,2.0));
         wx = 1.0;
         argx = 0.5*qx*xprd/nx_pppm;
         if (argx != 0.0) wx = pow(sin(argx)/argx,order);
         wx *= wx;
 
         sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0);
 
         if (sqk != 0.0) {
           numerator = 4.0*MY_PI/sqk;
           denominator = gf_denom(snx2,sny2,snz2, gf_b, order);  
           greensfn[n++] = numerator*sx*sy*sz*wx*wy*wz/denominator;
         } else greensfn[n++] = 0.0;
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    compute self force coefficients for ad-differentiation scheme
    and Coulomb interaction 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::compute_sf_precoeff(int nxp, int nyp, int nzp, int ord, 
                                     int nxlo_ft, int nylo_ft, int nzlo_ft,
                                     int nxhi_ft, int nyhi_ft, int nzhi_ft,
                                     double *sf_pre1, double *sf_pre2, double *sf_pre3,
                                     double *sf_pre4, double *sf_pre5, double *sf_pre6)
 {
 
   int i,k,l,m,n;
   double *prd;
 
   // volume-dependent factors
   // adjust z dimension for 2d slab PPPM
   // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
 
   double unitkx = (2.0*MY_PI/xprd);
   double unitky = (2.0*MY_PI/yprd);
   double unitkz = (2.0*MY_PI/zprd_slab);
 
   int nx,ny,nz,kper,lper,mper;
   double argx,argy,argz;
   double wx0[5],wy0[5],wz0[5],wx1[5],wy1[5],wz1[5],wx2[5],wy2[5],wz2[5];
   double qx0,qy0,qz0,qx1,qy1,qz1,qx2,qy2,qz2;
   double u0,u1,u2,u3,u4,u5,u6;
   double sum1,sum2,sum3,sum4,sum5,sum6;
 
   int nb = 2;
 
   n = 0;
   for (m = nzlo_ft; m <= nzhi_ft; m++) {
     mper = m - nzp*(2*m/nzp);
 
     for (l = nylo_ft; l <= nyhi_ft; l++) {
       lper = l - nyp*(2*l/nyp);
 
       for (k = nxlo_ft; k <= nxhi_ft; k++) {
         kper = k - nxp*(2*k/nxp);
       
         sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0;
         for (i = -nb; i <= nb; i++) {
 
           qx0 = unitkx*(kper+nxp*i);
           qx1 = unitkx*(kper+nxp*(i+1));
           qx2 = unitkx*(kper+nxp*(i+2));
           wx0[i+2] = 1.0;
           wx1[i+2] = 1.0;
           wx2[i+2] = 1.0;
           argx = 0.5*qx0*xprd/nxp;
           if (argx != 0.0) wx0[i+2] = pow(sin(argx)/argx,ord);
           argx = 0.5*qx1*xprd/nxp;
           if (argx != 0.0) wx1[i+2] = pow(sin(argx)/argx,ord);
           argx = 0.5*qx2*xprd/nxp;
           if (argx != 0.0) wx2[i+2] = pow(sin(argx)/argx,ord);
 
           qy0 = unitky*(lper+nyp*i);
           qy1 = unitky*(lper+nyp*(i+1));
           qy2 = unitky*(lper+nyp*(i+2));
           wy0[i+2] = 1.0;
           wy1[i+2] = 1.0;
           wy2[i+2] = 1.0;
           argy = 0.5*qy0*yprd/nyp;
           if (argy != 0.0) wy0[i+2] = pow(sin(argy)/argy,ord);
           argy = 0.5*qy1*yprd/nyp;
           if (argy != 0.0) wy1[i+2] = pow(sin(argy)/argy,ord);
           argy = 0.5*qy2*yprd/nyp;
           if (argy != 0.0) wy2[i+2] = pow(sin(argy)/argy,ord);
    
           qz0 = unitkz*(mper+nzp*i);
           qz1 = unitkz*(mper+nzp*(i+1));
           qz2 = unitkz*(mper+nzp*(i+2));
           wz0[i+2] = 1.0;
           wz1[i+2] = 1.0;
           wz2[i+2] = 1.0;
           argz = 0.5*qz0*zprd_slab/nzp;
           if (argz != 0.0) wz0[i+2] = pow(sin(argz)/argz,ord);
           argz = 0.5*qz1*zprd_slab/nzp;
           if (argz != 0.0) wz1[i+2] = pow(sin(argz)/argz,ord);
            argz = 0.5*qz2*zprd_slab/nzp;
           if (argz != 0.0) wz2[i+2] = pow(sin(argz)/argz,ord);
         }
     
         for (nx = 0; nx <= 4; nx++) {
           for (ny = 0; ny <= 4; ny++) {
             for (nz = 0; nz <= 4; nz++) {
               u0 = wx0[nx]*wy0[ny]*wz0[nz];
               u1 = wx1[nx]*wy0[ny]*wz0[nz];
               u2 = wx2[nx]*wy0[ny]*wz0[nz];
               u3 = wx0[nx]*wy1[ny]*wz0[nz];
               u4 = wx0[nx]*wy2[ny]*wz0[nz];
               u5 = wx0[nx]*wy0[ny]*wz1[nz];
               u6 = wx0[nx]*wy0[ny]*wz2[nz];
 
               sum1 += u0*u1;
               sum2 += u0*u2;
               sum3 += u0*u3;
               sum4 += u0*u4;
               sum5 += u0*u5;
               sum6 += u0*u6;
             }
           }
         }
         
         // store values
 
         sf_pre1[n] = sum1;
         sf_pre2[n] = sum2;
         sf_pre3[n] = sum3;
         sf_pre4[n] = sum4;
         sf_pre5[n] = sum5;
         sf_pre6[n++] = sum6;
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    Compute the modified (hockney-eastwood) dispersion green function
    ---------------------------------------------------------------------- */
 
 void PPPMDisp::compute_gf_6()
 {
   double *prd;
   int k,l,m,n;
 
   // volume-dependent factors
   // adjust z dimension for 2d slab PPPM
   // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
 
   double unitkx = (2.0*MY_PI/xprd);
   double unitky = (2.0*MY_PI/yprd);
   double unitkz = (2.0*MY_PI/zprd_slab);
 
   int kper,lper,mper;
   double sqk;
   double snx,sny,snz,snx2,sny2,snz2;
   double argx,argy,argz,wx,wy,wz,sx,sy,sz;
   double qx,qy,qz;
   double rtsqk, term;
   double numerator,denominator;
   double inv2ew = 2*g_ewald_6;
   inv2ew = 1/inv2ew;
   double rtpi = sqrt(MY_PI);
 
   numerator = -MY_PI*rtpi*g_ewald_6*g_ewald_6*g_ewald_6/(3.0);
 
   n = 0;
   for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
     mper = m - nz_pppm_6*(2*m/nz_pppm_6);
     qz = unitkz*mper;
     snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm_6);
     snz2 = snz*snz;
     sz = exp(-qz*qz*inv2ew*inv2ew);
     wz = 1.0;
     argz = 0.5*qz*zprd_slab/nz_pppm_6;
     if (argz != 0.0) wz = pow(sin(argz)/argz,order_6);
     wz *= wz;
               
     for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
       lper = l - ny_pppm_6*(2*l/ny_pppm_6);
       qy = unitky*lper;
       sny = sin(0.5*unitky*lper*yprd/ny_pppm_6);
       sny2 = sny*sny;
       sy = exp(-qy*qy*inv2ew*inv2ew);
       wy = 1.0;
       argy = 0.5*qy*yprd/ny_pppm_6;
       if (argy != 0.0) wy = pow(sin(argy)/argy,order_6);
       wy *= wy;
 
       for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
 	kper = k - nx_pppm_6*(2*k/nx_pppm_6);
         qx = unitkx*kper;
 	snx = sin(0.5*unitkx*kper*xprd/nx_pppm_6);
 	snx2 = snx*snx;
         sx = exp(-qx*qx*inv2ew*inv2ew);
 	wx = 1.0;
 	argx = 0.5*qx*xprd/nx_pppm_6;
 	if (argx != 0.0) wx = pow(sin(argx)/argx,order_6);
         wx *= wx;
       
 	sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0);
 
 	denominator = gf_denom(snx2,sny2,snz2, gf_b_6, order_6); 
 	rtsqk = sqrt(sqk);
         term = (1-2*sqk*inv2ew*inv2ew)*sx*sy*sz +
                 2*sqk*rtsqk*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtsqk*inv2ew);
 	greensfn_6[n++] = numerator*term*wx*wy*wz/denominator;
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    compute self force coefficients for ad-differentiation scheme
    and Coulomb interaction 
 ------------------------------------------------------------------------- */
 void PPPMDisp::compute_sf_coeff()
 {
   int i,k,l,m,n;
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
   volume = xprd * yprd * zprd_slab;
 
   for (i = 0; i <= 5; i++) sf_coeff[i] = 0.0;
 
   n = 0;
   for (m = nzlo_fft; m <= nzhi_fft; m++) {
     for (l = nylo_fft; l <= nyhi_fft; l++) {
       for (k = nxlo_fft; k <= nxhi_fft; k++) {
         sf_coeff[0] += sf_precoeff1[n]*greensfn[n];
         sf_coeff[1] += sf_precoeff2[n]*greensfn[n];
         sf_coeff[2] += sf_precoeff3[n]*greensfn[n];
         sf_coeff[3] += sf_precoeff4[n]*greensfn[n];
         sf_coeff[4] += sf_precoeff5[n]*greensfn[n];
         sf_coeff[5] += sf_precoeff6[n]*greensfn[n++];
       }
     }
   }
 
   // Compute the coefficients for the self-force correction
 
   double prex, prey, prez;
   prex = prey = prez = MY_PI/volume;
   prex *= nx_pppm/xprd;
   prey *= ny_pppm/yprd;
   prez *= nz_pppm/zprd_slab;
   sf_coeff[0] *= prex;
   sf_coeff[1] *= prex*2;
   sf_coeff[2] *= prey;
   sf_coeff[3] *= prey*2;
   sf_coeff[4] *= prez;
   sf_coeff[5] *= prez*2;
 
   // communicate values with other procs
 
   double tmp[6];
   MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world);
   for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n];
 }
 
 /* ----------------------------------------------------------------------
    compute self force coefficients for ad-differentiation scheme
    and Dispersion interaction 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::compute_sf_coeff_6()
 {
   int i,k,l,m,n;
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
   volume = xprd * yprd * zprd_slab;
 
   for (i = 0; i <= 5; i++) sf_coeff_6[i] = 0.0;
 
   n = 0;
   for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
     for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
       for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
         sf_coeff_6[0] += sf_precoeff1_6[n]*greensfn_6[n];
         sf_coeff_6[1] += sf_precoeff2_6[n]*greensfn_6[n];
         sf_coeff_6[2] += sf_precoeff3_6[n]*greensfn_6[n];
         sf_coeff_6[3] += sf_precoeff4_6[n]*greensfn_6[n];
         sf_coeff_6[4] += sf_precoeff5_6[n]*greensfn_6[n];
         sf_coeff_6[5] += sf_precoeff6_6[n]*greensfn_6[n++];
       }
     }
   }
 
   
   // perform multiplication with prefactors
   
   double prex, prey, prez;
   prex = prey = prez = MY_PI/volume;
   prex *= nx_pppm_6/xprd;
   prey *= ny_pppm_6/yprd;
   prez *= nz_pppm_6/zprd_slab;
   sf_coeff_6[0] *= prex;
   sf_coeff_6[1] *= prex*2;
   sf_coeff_6[2] *= prey;
   sf_coeff_6[3] *= prey*2;
   sf_coeff_6[4] *= prez;
   sf_coeff_6[5] *= prez*2;
   
   // communicate values with other procs
   
   double tmp[6];
   MPI_Allreduce(sf_coeff_6,tmp,6,MPI_DOUBLE,MPI_SUM,world);
   for (n = 0; n < 6; n++) sf_coeff_6[n] = tmp[n];
 
 }
 
 /* ----------------------------------------------------------------------
    denominator for Hockney-Eastwood Green's function
      of x,y,z = sin(kx*deltax/2), etc
 
             inf                 n-1
    S(n,k) = Sum  W(k+pi*j)**2 = Sum b(l)*(z*z)**l
            j=-inf               l=0
 
           = -(z*z)**n /(2n-1)! * (d/dx)**(2n-1) cot(x)  at z = sin(x)
    gf_b = denominator expansion coeffs 
 ------------------------------------------------------------------------- */
 
 double PPPMDisp::gf_denom(double x, double y, double z, double *g_b, int ord)
 {
   double sx,sy,sz;
   sz = sy = sx = 0.0;
   for (int l = ord-1; l >= 0; l--) {
     sx = g_b[l] + sx*x;
     sy = g_b[l] + sy*y;
     sz = g_b[l] + sz*z;
   }
   double s = sx*sy*sz;
   return s*s;
 }
 
 /* ----------------------------------------------------------------------
    pre-compute Green's function denominator expansion coeffs, Gamma(2n) 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::compute_gf_denom(double* gf, int ord)
 {
   int k,l,m;
   
   for (l = 1; l < ord; l++) gf[l] = 0.0;
   gf[0] = 1.0;
   
   for (m = 1; m < ord; m++) {
     for (l = m; l > 0; l--) 
       gf[l] = 4.0 * (gf[l]*(l-m)*(l-m-0.5)-gf[l-1]*(l-m-1)*(l-m-1));
     gf[0] = 4.0 * (gf[0]*(l-m)*(l-m-0.5));
   }
 
   bigint ifact = 1;
   for (k = 1; k < 2*ord; k++) ifact *= k;
   double gaminv = 1.0/ifact;
   for (l = 0; l < ord; l++) gf[l] *= gaminv;
 }
 
 /* ----------------------------------------------------------------------
    ghost-swap to accumulate full density in brick decomposition 
    remap density from 3d brick decomposition to FFTdecomposition
    for coulomb interaction or dispersion interaction with geometric
    mixing
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::brick2fft(int nxlo_i, int nylo_i, int nzlo_i,
                          int nxhi_i, int nyhi_i, int nzhi_i,
                          FFT_SCALAR*** dbrick, FFT_SCALAR* dfft, FFT_SCALAR* work,
                          LAMMPS_NS::Remap* rmp)
 {
   int n,ix,iy,iz;
 
   // copy grabs inner portion of density from 3d brick
   // remap could be done as pre-stage of FFT,
   //   but this works optimally on only double values, not complex values
 
   n = 0;
   for (iz = nzlo_i; iz <= nzhi_i; iz++)
     for (iy = nylo_i; iy <= nyhi_i; iy++)
       for (ix = nxlo_i; ix <= nxhi_i; ix++)
 	dfft[n++] = dbrick[iz][iy][ix];
 
   rmp->perform(dfft,dfft,work);
 }
 
 
 /* ----------------------------------------------------------------------
    ghost-swap to accumulate full density in brick decomposition 
    remap density from 3d brick decomposition to FFTdecomposition
    for dispersion with arithmetic mixing rule
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::brick2fft_a()
 {
   int n,ix,iy,iz;
 
   // copy grabs inner portion of density from 3d brick
   // remap could be done as pre-stage of FFT,
   //   but this works optimally on only double values, not complex values
 
   n = 0;
   for (iz = nzlo_in_6; iz <= nzhi_in_6; iz++)
     for (iy = nylo_in_6; iy <= nyhi_in_6; iy++)
       for (ix = nxlo_in_6; ix <= nxhi_in_6; ix++) {
         density_fft_a0[n] = density_brick_a0[iz][iy][ix];
         density_fft_a1[n] = density_brick_a1[iz][iy][ix];
         density_fft_a2[n] = density_brick_a2[iz][iy][ix];
         density_fft_a3[n] = density_brick_a3[iz][iy][ix];
         density_fft_a4[n] = density_brick_a4[iz][iy][ix];
         density_fft_a5[n] = density_brick_a5[iz][iy][ix];
         density_fft_a6[n++] = density_brick_a6[iz][iy][ix];
       }
 
   remap_6->perform(density_fft_a0,density_fft_a0,work1_6);
   remap_6->perform(density_fft_a1,density_fft_a1,work1_6);
   remap_6->perform(density_fft_a2,density_fft_a2,work1_6);
   remap_6->perform(density_fft_a3,density_fft_a3,work1_6);
   remap_6->perform(density_fft_a4,density_fft_a4,work1_6);
   remap_6->perform(density_fft_a5,density_fft_a5,work1_6);
   remap_6->perform(density_fft_a6,density_fft_a6,work1_6);
 
 }
 
 /* ----------------------------------------------------------------------
    find center grid pt for each of my particles
    check that full stencil for the particle will fit in my 3d brick
    store central grid pt indices in part2grid array 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::particle_map(double delx, double dely, double delz,
                              double sft, int** p2g, int nup, int nlow,
                              int nxlo, int nylo, int nzlo,
                              int nxhi, int nyhi, int nzhi)
 {
   int nx,ny,nz;
 
   double **x = atom->x;
   int nlocal = atom->nlocal;
 
   int flag = 0;
   for (int i = 0; i < nlocal; i++) {
     
     // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
     // current particle coord can be outside global and local box
     // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
 
     nx = static_cast<int> ((x[i][0]-boxlo[0])*delx+sft) - OFFSET;
     ny = static_cast<int> ((x[i][1]-boxlo[1])*dely+sft) - OFFSET;
     nz = static_cast<int> ((x[i][2]-boxlo[2])*delz+sft) - OFFSET;
 
     p2g[i][0] = nx;
     p2g[i][1] = ny;
     p2g[i][2] = nz;
 
     // check that entire stencil around nx,ny,nz will fit in my 3d brick
 
     if (nx+nlow < nxlo || nx+nup > nxhi ||
 	ny+nlow < nylo || ny+nup > nyhi ||
 	nz+nlow < nzlo || nz+nup > nzhi)
       flag = 1;
   }
 
   if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPMDisp");
 }
 
 
 void PPPMDisp::particle_map_c(double delx, double dely, double delz,
                                double sft, int** p2g, int nup, int nlow,
                                int nxlo, int nylo, int nzlo,
                                int nxhi, int nyhi, int nzhi)
 {
   particle_map(delx, dely, delz, sft, p2g, nup, nlow,
                nxlo, nylo, nzlo, nxhi, nyhi, nzhi);
 }
 
 /* ----------------------------------------------------------------------
    create discretized "density" on section of global grid due to my particles
    density(x,y,z) = charge "density" at grid points of my 3d brick
    (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
    in global grid 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::make_rho_c()
 {
   int l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
 
   // clear 3d density array
 
   memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0,
 	 ngrid*sizeof(FFT_SCALAR));
 
   // loop over my charges, add their contribution to nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
 
   double *q = atom->q;
   double **x = atom->x;
   int nlocal = atom->nlocal;
 
   for (int i = 0; i < nlocal; i++) {
 
     nx = part2grid[i][0];
     ny = part2grid[i][1];
     nz = part2grid[i][2];
     dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
     dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
     dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
 
     compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
 
     z0 = delvolinv * q[i];
     for (n = nlower; n <= nupper; n++) {
       mz = n+nz;
       y0 = z0*rho1d[2][n];
       for (m = nlower; m <= nupper; m++) {
 	my = m+ny;
 	x0 = y0*rho1d[1][m];
 	for (l = nlower; l <= nupper; l++) {
 	  mx = l+nx;
 	  density_brick[mz][my][mx] += x0*rho1d[0][l];
 	}
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    create discretized "density" on section of global grid due to my particles
    density(x,y,z) = dispersion "density" at grid points of my 3d brick
    (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
    in global grid --- geometric mixing
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::make_rho_g()
 {
   int l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
 
   // clear 3d density array
 
   memset(&(density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
 	 ngrid_6*sizeof(FFT_SCALAR));
 
   // loop over my charges, add their contribution to nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   int type;
   double **x = atom->x;
   int nlocal = atom->nlocal;
 
   for (int i = 0; i < nlocal; i++) {
 
     nx = part2grid_6[i][0];
     ny = part2grid_6[i][1];
     nz = part2grid_6[i][2];
     dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
     dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
     dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
 
     compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
     type = atom->type[i];
     z0 = delvolinv_6 * B[type];
     for (n = nlower_6; n <= nupper_6; n++) {
       mz = n+nz;
       y0 = z0*rho1d_6[2][n];
       for (m = nlower_6; m <= nupper_6; m++) {
 	my = m+ny;
 	x0 = y0*rho1d_6[1][m];
 	for (l = nlower_6; l <= nupper_6; l++) {
 	  mx = l+nx;
 	  density_brick_g[mz][my][mx] += x0*rho1d_6[0][l];
 	}
       }
     }
   }
 }
 
 
 /* ----------------------------------------------------------------------
    create discretized "density" on section of global grid due to my particles
    density(x,y,z) = dispersion "density" at grid points of my 3d brick
    (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
    in global grid --- arithmetic mixing
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::make_rho_a()
 {
   int l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0,w;
 
   // clear 3d density array
 
   memset(&(density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
 	 ngrid_6*sizeof(FFT_SCALAR));
   memset(&(density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
 	 ngrid_6*sizeof(FFT_SCALAR));
   memset(&(density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
 	 ngrid_6*sizeof(FFT_SCALAR));
   memset(&(density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
 	 ngrid_6*sizeof(FFT_SCALAR));
   memset(&(density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
 	 ngrid_6*sizeof(FFT_SCALAR));
   memset(&(density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
 	 ngrid_6*sizeof(FFT_SCALAR));
   memset(&(density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
 	 ngrid_6*sizeof(FFT_SCALAR));
 
   // loop over my particles, add their contribution to nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   int type;
   double **x = atom->x;
   int nlocal = atom->nlocal;
   
   for (int i = 0; i < nlocal; i++) {
 
     //do the following for all 4 grids
     nx = part2grid_6[i][0];
     ny = part2grid_6[i][1];
     nz = part2grid_6[i][2];
     dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
     dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
     dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
     compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
     type = atom->type[i];
     z0 = delvolinv_6;
     for (n = nlower_6; n <= nupper_6; n++) {
       mz = n+nz;
       y0 = z0*rho1d_6[2][n];
       for (m = nlower_6; m <= nupper_6; m++) {
 	my = m+ny;
 	x0 = y0*rho1d_6[1][m];
 	for (l = nlower_6; l <= nupper_6; l++) {
 	  mx = l+nx;
           w = x0*rho1d_6[0][l];
 	  density_brick_a0[mz][my][mx] += w*B[7*type];
 	  density_brick_a1[mz][my][mx] += w*B[7*type+1];
 	  density_brick_a2[mz][my][mx] += w*B[7*type+2];
 	  density_brick_a3[mz][my][mx] += w*B[7*type+3];
 	  density_brick_a4[mz][my][mx] += w*B[7*type+4];
 	  density_brick_a5[mz][my][mx] += w*B[7*type+5];
 	  density_brick_a6[mz][my][mx] += w*B[7*type+6];
 	}
       }
     }
   }
 }
 
 
 /* ----------------------------------------------------------------------
    FFT-based Poisson solver for ik differentiation
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::poisson_ik(FFT_SCALAR* wk1, FFT_SCALAR* wk2,
                            FFT_SCALAR* dfft, LAMMPS_NS::FFT3d* ft1,LAMMPS_NS::FFT3d* ft2, 
                            int nx_p, int ny_p, int nz_p, int nft,
                            int nxlo_ft, int nylo_ft, int nzlo_ft,
                            int nxhi_ft, int nyhi_ft, int nzhi_ft,
                            int nxlo_i, int nylo_i, int nzlo_i,
                            int nxhi_i, int nyhi_i, int nzhi_i,
                            double& egy, double* gfn,
                            double* kx, double* ky, double* kz,
                            double* kx2, double* ky2, double* kz2,
                            FFT_SCALAR*** vx_brick, FFT_SCALAR*** vy_brick, FFT_SCALAR*** vz_brick,
                            double* vir, double** vcoeff, double** vcoeff2,
                            FFT_SCALAR*** u_pa, FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa,
                            FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa)
 
 
 {
   int i,j,k,n;
   double eng;
 
   // transform charge/dispersion density (r -> k) 
   n = 0;
   for (i = 0; i < nft; i++) {
     wk1[n++] = dfft[i];
     wk1[n++] = ZEROF;
   }
 
   ft1->compute(wk1,wk1,1);
 
   // if requested, compute energy and virial contribution
 
   double scaleinv = 1.0/(nx_p*ny_p*nz_p);
   double s2 = scaleinv*scaleinv;
 
   if (eflag_global || vflag_global) {
     if (vflag_global) {
       n = 0;
       for (i = 0; i < nft; i++) {
 	eng = s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
 	for (j = 0; j < 6; j++) vir[j] += eng*vcoeff[i][j];
 	if (eflag_global) egy += eng;
 	n += 2;
       }
     } else {
       n = 0;
       for (i = 0; i < nft; i++) {
 	egy += 
 	  s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
 	n += 2;
       }
     }
   }
 
   // scale by 1/total-grid-pts to get rho(k)
   // multiply by Green's function to get V(k)
 
   n = 0;
   for (i = 0; i < nft; i++) {
     wk1[n++] *= scaleinv * gfn[i];
     wk1[n++] *= scaleinv * gfn[i];
   }
 
   // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
   // FFT leaves data in 3d brick decomposition
   // copy it into inner portion of vdx,vdy,vdz arrays
 
   // x & y direction gradient
 
   n = 0;
   for (k = nzlo_ft; k <= nzhi_ft; k++)
     for (j = nylo_ft; j <= nyhi_ft; j++)
       for (i = nxlo_ft; i <= nxhi_ft; i++) {
 	wk2[n] = 0.5*(kx[i]-kx2[i])*wk1[n+1] + 0.5*(ky[j]-ky2[j])*wk1[n];
 	wk2[n+1] = -0.5*(kx[i]-kx2[i])*wk1[n] + 0.5*(ky[j]-ky2[j])*wk1[n+1];
 	n += 2;
       }
 
   ft2->compute(wk2,wk2,-1);
 
   n = 0;
   for (k = nzlo_i; k <= nzhi_i; k++)
     for (j = nylo_i; j <= nyhi_i; j++)
       for (i = nxlo_i; i <= nxhi_i; i++) {
 	vx_brick[k][j][i] = wk2[n++];
 	vy_brick[k][j][i] = wk2[n++];
       }
 
   if (!eflag_atom) {
     // z direction gradient only
 
     n = 0;
     for (k = nzlo_ft; k <= nzhi_ft; k++)
       for (j = nylo_ft; j <= nyhi_ft; j++)
         for (i = nxlo_ft; i <= nxhi_ft; i++) {
 	  wk2[n] = kz[k]*wk1[n+1];
 	  wk2[n+1] = -kz[k]*wk1[n];
 	  n += 2;
         }
 
     ft2->compute(wk2,wk2,-1);
 
 
     n = 0;
     for (k = nzlo_i; k <= nzhi_i; k++)
       for (j = nylo_i; j <= nyhi_i; j++)
         for (i = nxlo_i; i <= nxhi_i; i++) {
 	  vz_brick[k][j][i] = wk2[n];
 	  n += 2;
         }
 
   }
   else {
     // z direction gradient & per-atom energy
 
     n = 0;
     for (k = nzlo_ft; k <= nzhi_ft; k++)
       for (j = nylo_ft; j <= nyhi_ft; j++)
         for (i = nxlo_ft; i <= nxhi_ft; i++) {
 	  wk2[n] = 0.5*(kz[k]-kz2[k])*wk1[n+1] - wk1[n+1];
 	  wk2[n+1] = -0.5*(kz[k]-kz2[k])*wk1[n] + wk1[n];
 	  n += 2;
         }
 
     ft2->compute(wk2,wk2,-1);
 
     n = 0;
     for (k = nzlo_i; k <= nzhi_i; k++)
       for (j = nylo_i; j <= nyhi_i; j++)
         for (i = nxlo_i; i <= nxhi_i; i++) {
 	  vz_brick[k][j][i] = wk2[n++];
 	  u_pa[k][j][i] = wk2[n++];;
         }
   }
 
    
   if (vflag_atom) poisson_peratom(wk1, wk2, ft2, vcoeff, vcoeff2, nft,
                                   nxlo_i, nylo_i, nzlo_i, nxhi_i, nyhi_i, nzhi_i,
                                   v0_pa, v1_pa, v2_pa, v3_pa, v4_pa, v5_pa);
 }
 
 /* ----------------------------------------------------------------------
    FFT-based Poisson solver for ad differentiation
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::poisson_ad(FFT_SCALAR* wk1, FFT_SCALAR* wk2,
                            FFT_SCALAR* dfft, LAMMPS_NS::FFT3d* ft1,LAMMPS_NS::FFT3d* ft2, 
                            int nx_p, int ny_p, int nz_p, int nft,
                            int nxlo_ft, int nylo_ft, int nzlo_ft,
                            int nxhi_ft, int nyhi_ft, int nzhi_ft,
                            int nxlo_i, int nylo_i, int nzlo_i,
                            int nxhi_i, int nyhi_i, int nzhi_i,
                            double& egy, double* gfn,
                            double* vir, double** vcoeff, double** vcoeff2,
                            FFT_SCALAR*** u_pa, FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa,
                            FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa)
 
 
 {
   int i,j,k,n;
   double eng;
 
   // transform charge/dispersion density (r -> k) 
   n = 0;
   for (i = 0; i < nft; i++) {
     wk1[n++] = dfft[i];
     wk1[n++] = ZEROF;
   }
 
   ft1->compute(wk1,wk1,1);
  
   // if requested, compute energy and virial contribution
 
   double scaleinv = 1.0/(nx_p*ny_p*nz_p);
   double s2 = scaleinv*scaleinv;
 
   if (eflag_global || vflag_global) {
     if (vflag_global) {
       n = 0;
       for (i = 0; i < nft; i++) {
 	eng = s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
 	for (j = 0; j < 6; j++) vir[j] += eng*vcoeff[i][j];
 	if (eflag_global) egy += eng;
 	n += 2;
       }
     } else {
       n = 0;
       for (i = 0; i < nft; i++) {
 	egy += 
 	  s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
 	n += 2;
       }
     }
   }
 
   // scale by 1/total-grid-pts to get rho(k)
   // multiply by Green's function to get V(k)
 
   n = 0;
   for (i = 0; i < nft; i++) {
     wk1[n++] *= scaleinv * gfn[i];
     wk1[n++] *= scaleinv * gfn[i];
   }
 
 
   n = 0;
   for (k = nzlo_ft; k <= nzhi_ft; k++)
     for (j = nylo_ft; j <= nyhi_ft; j++)
       for (i = nxlo_ft; i <= nxhi_ft; i++) {
         wk2[n] = wk1[n];
 	wk2[n+1] = wk1[n+1];
 	n += 2;
       }
 
   ft2->compute(wk2,wk2,-1);
 
   n = 0;
   for (k = nzlo_i; k <= nzhi_i; k++)
     for (j = nylo_i; j <= nyhi_i; j++)
       for (i = nxlo_i; i <= nxhi_i; i++) {
 	u_pa[k][j][i] = wk2[n++];
         n++;
       }
 
   if (vflag_atom) poisson_peratom(wk1, wk2, ft2, vcoeff, vcoeff2, nft,
                                   nxlo_i, nylo_i, nzlo_i, nxhi_i, nyhi_i, nzhi_i,
                                   v0_pa, v1_pa, v2_pa, v3_pa, v4_pa, v5_pa);
 }
 
 /* ----------------------------------------------------------------------
    Fourier Transform for per atom virial calculations
 ------------------------------------------------------------------------- */
 
 void PPPMDisp:: poisson_peratom(FFT_SCALAR* wk1, FFT_SCALAR* wk2, LAMMPS_NS::FFT3d* ft2, 
                                  double** vcoeff, double** vcoeff2, int nft,
                                  int nxlo_i, int nylo_i, int nzlo_i,
                                  int nxhi_i, int nyhi_i, int nzhi_i,
                                  FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa,
                                  FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa)
 {
  //v0 & v1 term
   int n, i, j, k;
   n = 0;
   for (i = 0; i < nft; i++) {
     wk2[n] = wk1[n]*vcoeff[i][0] - wk1[n+1]*vcoeff[i][1];
     wk2[n+1] = wk1[n+1]*vcoeff[i][0] +  wk1[n]*vcoeff[i][1];
     n += 2;
   }
     
   ft2->compute(wk2,wk2,-1); 
     
   n = 0;
   for (k = nzlo_i; k <= nzhi_i; k++)
     for (j = nylo_i; j <= nyhi_i; j++)
       for (i = nxlo_i; i <= nxhi_i; i++) {
         v0_pa[k][j][i] = wk2[n++];
         v1_pa[k][j][i] = wk2[n++];
       }
 
   //v2 & v3 term
    
   n = 0;
   for (i = 0; i < nft; i++) {
     wk2[n] = wk1[n]*vcoeff[i][2] - wk1[n+1]*vcoeff2[i][0];
     wk2[n+1] = wk1[n+1]*vcoeff[i][2] + wk1[n]*vcoeff2[i][0];
     n += 2;
   }
     
   ft2->compute(wk2,wk2,-1); 
     
   n = 0;
   for (k = nzlo_i; k <= nzhi_i; k++)
     for (j = nylo_i; j <= nyhi_i; j++)
       for (i = nxlo_i; i <= nxhi_i; i++) {
         v2_pa[k][j][i] = wk2[n++];
         v3_pa[k][j][i] = wk2[n++];
       }
 
   //v4 & v5 term
    
   n = 0;
   for (i = 0; i < nft; i++) {
     wk2[n] = wk1[n]*vcoeff2[i][1] - wk1[n+1]*vcoeff2[i][2];
     wk2[n+1] = wk1[n+1]*vcoeff2[i][1] + wk1[n]*vcoeff2[i][2];
     n += 2;
   }
     
   ft2->compute(wk2,wk2,-1); 
 
   n = 0;
   for (k = nzlo_i; k <= nzhi_i; k++)
     for (j = nylo_i; j <= nyhi_i; j++)
       for (i = nxlo_i; i <= nxhi_i; i++) {
         v4_pa[k][j][i] = wk2[n++];
         v5_pa[k][j][i] = wk2[n++];
       }	 
  
 }
 
 /* ----------------------------------------------------------------------
    Poisson solver for one mesh with 2 different dispersion densities 
    for ik scheme
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::poisson_2s_ik(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
                               FFT_SCALAR*** vxbrick_1, FFT_SCALAR*** vybrick_1, FFT_SCALAR*** vzbrick_1,
                               FFT_SCALAR*** vxbrick_2, FFT_SCALAR*** vybrick_2, FFT_SCALAR*** vzbrick_2,
                               FFT_SCALAR*** u_pa_1, FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
                               FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
                               FFT_SCALAR*** u_pa_2, FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
                               FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
 
 {
   int i,j,k,n;
   double eng;
 
   // transform charge/dispersion density (r -> k) 
 
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work1_6[n++] = dfft_1[i];
     work1_6[n++] = dfft_2[i];
   }
   
   fft1_6->compute(work1_6,work1_6,1);
 
   // if requested, compute energy and virial contribution
   double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
   double s2 = scaleinv*scaleinv;
   if (eflag_global || vflag_global) {
     //split the work1_6 vector into its even an odd parts!
     split_fourier();
     if (vflag_global) {
       n = 0;
       for (i = 0; i < nfft_6; i++) {
 	eng = 2 * s2 * greensfn_6[i] * (split_1[n]*split_2[n+1] + split_1[n+1]*split_2[n]);
 	for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
 	if (eflag_global)energy_6 += eng;
 	n += 2;
       }
     } else {
       n = 0;
       for (i = 0; i < nfft_6; i++) {
 	energy_6 += 
 	  2 * s2 * greensfn_6[i] * (split_1[n]*split_2[n+1] + split_1[n+1]*split_2[n]);
 	n += 2;
       }
     }
   }
 
 
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work1_6[n++] *= scaleinv * greensfn_6[i];
     work1_6[n++] *= scaleinv * greensfn_6[i];
   }
 
   // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
   // FFT leaves data in 3d brick decomposition
   // copy it into inner portion of vdx,vdy,vdz arrays
 
   // x direction gradient
 
   n = 0;
   for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
     for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
       for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
 	work2_6[n] = 0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n+1];
 	work2_6[n+1] = -0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n];
 	n += 2;
       }
 
   fft2_6->compute(work2_6,work2_6,-1);
   
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
 	vxbrick_1[k][j][i] = work2_6[n++];
         vxbrick_2[k][j][i] = work2_6[n++];
       }
 
   // y direction gradient
 
   n = 0;
   for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
     for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
       for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
 	work2_6[n] = 0.5*(fky_6[j]-fky2_6[j])*work1_6[n+1];
 	work2_6[n+1] = -0.5*(fky_6[j]-fky2_6[j])*work1_6[n];
 	n += 2;
       }
 
   fft2_6->compute(work2_6,work2_6,-1);
 
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
 	vybrick_1[k][j][i] = work2_6[n++];
         vybrick_2[k][j][i] = work2_6[n++];
       }
 
   // z direction gradient
 
   n = 0;
   for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
     for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
       for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
 	work2_6[n] = 0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n+1];
 	work2_6[n+1] = -0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n];
 	n += 2;
       }
 
   fft2_6->compute(work2_6,work2_6,-1);
 
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
 	vzbrick_1[k][j][i] = work2_6[n++];
 	vzbrick_2[k][j][i] = work2_6[n++];
       }
 
   //Per-atom energy
     
   if (eflag_atom) {
     n = 0;
     for (i = 0; i < nfft_6; i++) {
       work2_6[n] = work1_6[n];
       work2_6[n+1] = work1_6[n+1];
       n += 2;
     }
     
     fft2_6->compute(work2_6,work2_6,-1); 
     
     n = 0;
     for (k = nzlo_in_6; k <= nzhi_in_6; k++)
       for (j = nylo_in_6; j <= nyhi_in_6; j++)
         for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
           u_pa_1[k][j][i] = work2_6[n++];
           u_pa_2[k][j][i] = work2_6[n++];
         }
   } 
 
   if (vflag_atom) poisson_2s_peratom(v0_pa_1, v1_pa_1, v2_pa_1, v3_pa_1, v4_pa_1, v5_pa_1,
                                      v0_pa_2, v1_pa_2, v2_pa_2, v3_pa_2, v4_pa_2, v5_pa_2);
 }
 
 
 /* ----------------------------------------------------------------------
    Poisson solver for one mesh with 2 different dispersion densities 
    for ik scheme
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::poisson_2s_ad(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
                               FFT_SCALAR*** u_pa_1, FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
                               FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
                               FFT_SCALAR*** u_pa_2, FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
                               FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
 
 {
   int i,j,k,n;
   double eng;
 
   // transform charge/dispersion density (r -> k) 
 
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work1_6[n++] = dfft_1[i];
     work1_6[n++] = dfft_2[i];
   }
   
   fft1_6->compute(work1_6,work1_6,1);
 
   // if requested, compute energy and virial contribution
   double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
   double s2 = scaleinv*scaleinv;
   if (eflag_global || vflag_global) {
     //split the work1_6 vector into its even an odd parts!
     split_fourier();
     if (vflag_global) {
       n = 0;
       for (i = 0; i < nfft_6; i++) {
 	eng = 2 * s2 * greensfn_6[i] * (split_1[n]*split_2[n+1] + split_1[n+1]*split_2[n]);
 	for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
 	if (eflag_global)energy_6 += eng;
 	n += 2;
       }
     } else {
       n = 0;
       for (i = 0; i < nfft_6; i++) {
 	energy_6 += 
 	  2 * s2 * greensfn_6[i] * (split_1[n]*split_2[n+1] + split_1[n+1]*split_2[n]);
 	n += 2;
       }
     }
   }
 
 
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work1_6[n++] *= scaleinv * greensfn_6[i];
     work1_6[n++] *= scaleinv * greensfn_6[i];
   }
 
 
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work2_6[n] = work1_6[n];
     work2_6[n+1] = work1_6[n+1];
     n += 2;
   }
     
   fft2_6->compute(work2_6,work2_6,-1); 
     
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
         u_pa_1[k][j][i] = work2_6[n++];
         u_pa_2[k][j][i] = work2_6[n++];
       } 
 
   if (vflag_atom) poisson_2s_peratom(v0_pa_1, v1_pa_1, v2_pa_1, v3_pa_1, v4_pa_1, v5_pa_1,
                                      v0_pa_2, v1_pa_2, v2_pa_2, v3_pa_2, v4_pa_2, v5_pa_2);
 }
 
 /* ----------------------------------------------------------------------
    Fourier Transform for per atom virial calculations
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::poisson_2s_peratom(FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
                                    FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
                                    FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
                                    FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
 {
   //Compute first virial term v0
   int n, i, j, k;
 
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work2_6[n] = work1_6[n]*vg_6[i][0];
     work2_6[n+1] = work1_6[n+1]*vg_6[i][0];
     n += 2;
   }
    
   fft2_6->compute(work2_6,work2_6,-1); 
     
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
         v0_pa_1[k][j][i] = work2_6[n++];
         v0_pa_2[k][j][i] = work2_6[n++];
       }
 	 
   //Compute second virial term v1  
   
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work2_6[n] = work1_6[n]*vg_6[i][1];
     work2_6[n+1] = work1_6[n+1]*vg_6[i][1];
     n += 2;
   }
     
   fft2_6->compute(work2_6,work2_6,-1); 
   
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
         v1_pa_1[k][j][i] = work2_6[n++];
         v1_pa_2[k][j][i] = work2_6[n++];
       }
 	  
   //Compute third virial term v2
    
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work2_6[n] = work1_6[n]*vg_6[i][2];
     work2_6[n+1] = work1_6[n+1]*vg_6[i][2];
     n += 2;
   }
     
   fft2_6->compute(work2_6,work2_6,-1); 
     
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
         v2_pa_1[k][j][i] = work2_6[n++];
         v2_pa_2[k][j][i] = work2_6[n++];
       }
 
   //Compute fourth virial term v3
    
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work2_6[n] = work1_6[n]*vg2_6[i][0];
     work2_6[n+1] = work1_6[n+1]*vg2_6[i][0];
     n += 2;
   }
     
   fft2_6->compute(work2_6,work2_6,-1); 
     
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
         v3_pa_1[k][j][i] = work2_6[n++];
         v3_pa_2[k][j][i] = work2_6[n++];
       }
 
   //Compute fifth virial term v4
    
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work2_6[n] = work1_6[n]*vg2_6[i][1];
     work2_6[n+1] = work1_6[n+1]*vg2_6[i][1];
     n += 2;
   }
     
   fft2_6->compute(work2_6,work2_6,-1); 
     
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
         v4_pa_1[k][j][i] = work2_6[n++];
         v4_pa_2[k][j][i] = work2_6[n++];
       }
    
   //Compute last virial term v5
    
   n = 0;
   for (i = 0; i < nfft_6; i++) {
     work2_6[n] = work1_6[n]*vg2_6[i][2];
     work2_6[n+1] = work1_6[n+1]*vg2_6[i][2];
     n += 2;
   }
     
   fft2_6->compute(work2_6,work2_6,-1); 
     
   n = 0;
   for (k = nzlo_in_6; k <= nzhi_in_6; k++)
     for (j = nylo_in_6; j <= nyhi_in_6; j++)
       for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
         v5_pa_1[k][j][i] = work2_6[n++];
         v5_pa_2[k][j][i] = work2_6[n++];
       }
 }
 
 /* ----------------------------------------------------------------------
    determine the order of communication between the procs when
    splitting the fourier transform
    ------------------------------------------------------------------------- */
 
 void PPPMDisp::split_order(int** com_matrix)
 {
   // first element of com_order
   com_order[0] = me;
   //deleate diagonal elements of com_matrix
   int i,j;
   for (i = 0; i < nprocs; i++) com_matrix[i][i] = 0;
 
   int *busy;
   int *act_point = 0;
   int sum = 1;
   int curr_order = 1;
   memory->create(busy, nprocs, "pppm/disp:busy");
   memory->create(act_point, nprocs, "pppm/disp:actpoint");
   memset(act_point, 0, nprocs*sizeof(int));
   //repeate untill all entries in com_matrix are zero
   while (sum != 0) {
     memset(busy, 0, nprocs*sizeof(int));
     //loop over all procs
     for (i = 0; i < nprocs; i++) {
       //if current proc is not busy, search for a partner
       if (!busy[i]) {
         // move the current position of act_point;
         for (j = 0; j < 12; j++) {
           act_point[i]--;
           if (act_point[i] == -1) act_point[i] = nprocs-1;
           // if a partner is found:
           if (com_matrix[i][act_point[i]] && !busy[act_point[i]]) {
             busy[i] = busy[act_point[i]] = 1;
             com_matrix[i][act_point[i]] = com_matrix[act_point[i]][i] = 0;
             act_point[act_point[i]] = i;
 	    break;
           }
         }
       }
     }
     if (busy[me]) com_order[curr_order++] = act_point[me];
     // calcualte the sum of all values of com_matrix
     sum = 0;
     for (i = 0; i <nprocs; i++)
       for (j = 0; j < nprocs; j++) sum += com_matrix[i][j];
   }
 
   memory->destroy(busy);
   memory->destroy(act_point);
 }
 
 /* ----------------------------------------------------------------------
    split the work vector into its real and imaginary parts
    ------------------------------------------------------------------------- */
 
 void PPPMDisp::split_fourier()
 {
   // add / substract half the value of work1 to split
   // fill work1 in splitbuf1 for communication
   int n,m,o;
   MPI_Request request;
   MPI_Status status;
 
   m = 0;
   for (n = 0; n < nfft_6; n++) {
     split_1[m] = 0.5*work1_6[m];
     split_2[m] = 0.5*work1_6[m];
     splitbuf1[dict_send[n][0]][dict_send[n][1]] = work1_6[m++];
     split_1[m] = -0.5*work1_6[m];
     split_2[m] = 0.5*work1_6[m];
     splitbuf1[dict_send[n][0]][dict_send[n][1]+1] = work1_6[m++];
   }
     
   // "exchange" points with yourself
   for (n = 0; n < com_each[0]; n++) splitbuf2[0][n] = splitbuf1[0][n];
   // exchange points with other procs
   for (n = 1; n < com_procs; n++) {
     MPI_Irecv(splitbuf2[n],com_each[n],MPI_FFT_SCALAR,com_order[n],0,world,&request);
     MPI_Send(splitbuf1[n],com_each[n],MPI_FFT_SCALAR,com_order[n],0,world);
     MPI_Wait(&request,&status);
   }
 
   // add received values to split_1 and split_2
   for (n = 0; n < com_procs; n++) {
     o = 0;
     for (m = 0; m < com_each[n]/2; m++) {
       split_1[dict_rec[n][m]] += 0.5*splitbuf2[n][o];
       split_2[dict_rec[n][m]] -= 0.5*splitbuf2[n][o++];
       split_1[dict_rec[n][m]+1] += 0.5*splitbuf2[n][o];
       split_2[dict_rec[n][m]+1] += 0.5*splitbuf2[n][o++];  
     }
   }
 }
  
 /* ----------------------------------------------------------------------
    interpolate from grid to get electric field & force on my particles 
    for ik scheme
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::fieldforce_c_ik()
 {
   int i,l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
   FFT_SCALAR ekx,eky,ekz;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of E-field on particle
 
   double *q = atom->q;
   double **x = atom->x;
   double **f = atom->f;
 
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
     nx = part2grid[i][0];
     ny = part2grid[i][1];
     nz = part2grid[i][2];
     dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
     dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
     dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
 
     compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
 
     ekx = eky = ekz = ZEROF;
     for (n = nlower; n <= nupper; n++) {
       mz = n+nz;
       z0 = rho1d[2][n];
       for (m = nlower; m <= nupper; m++) {
 	my = m+ny;
 	y0 = z0*rho1d[1][m];
 	for (l = nlower; l <= nupper; l++) {
 	  mx = l+nx;
 	  x0 = y0*rho1d[0][l];
 	  ekx -= x0*vdx_brick[mz][my][mx];
 	  eky -= x0*vdy_brick[mz][my][mx];
 	  ekz -= x0*vdz_brick[mz][my][mx];
 	}
       }
     }
 
     // convert E-field to force
 
     const double qfactor = force->qqrd2e * scale * q[i];
     f[i][0] += qfactor*ekx;
     f[i][1] += qfactor*eky;
     if (slabflag != 2) f[i][2] += qfactor*ekz;
   }
 }
 /* ----------------------------------------------------------------------
    interpolate from grid to get electric field & force on my particles
    for ad scheme 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::fieldforce_c_ad()
 {
   int i,l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz;
   FFT_SCALAR ekx,eky,ekz;
   double s1,s2,s3;
   double sf = 0.0;
 
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
 
   double hx_inv = nx_pppm/xprd;
   double hy_inv = ny_pppm/yprd;
   double hz_inv = nz_pppm/zprd_slab;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of E-field on particle
 
   double *q = atom->q;
   double **x = atom->x;
   double **f = atom->f;
 
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
     nx = part2grid[i][0];
     ny = part2grid[i][1];
     nz = part2grid[i][2];
     dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
     dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
     dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
 
     compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
     compute_drho1d(dx,dy,dz, order, drho_coeff, drho1d);
 
     ekx = eky = ekz = ZEROF;
     for (n = nlower; n <= nupper; n++) {
       mz = n+nz;
       for (m = nlower; m <= nupper; m++) {
         my = m+ny;
         for (l = nlower; l <= nupper; l++) {
           mx = l+nx;
           ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
           eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
           ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx];
         }
       }
     }
     ekx *= hx_inv;
     eky *= hy_inv;
     ekz *= hz_inv;
     // convert E-field to force and substract self forces
     const double qfactor = force->qqrd2e * scale;
 
     s1 = x[i][0]*hx_inv;
     s2 = x[i][1]*hy_inv;
     s3 = x[i][2]*hz_inv;
     sf = sf_coeff[0]*sin(2*MY_PI*s1);
     sf += sf_coeff[1]*sin(4*MY_PI*s1);
     sf *= 2*q[i]*q[i];
     f[i][0] += qfactor*(ekx*q[i] - sf);
 
     sf = sf_coeff[2]*sin(2*MY_PI*s2);
     sf += sf_coeff[3]*sin(4*MY_PI*s2);
     sf *= 2*q[i]*q[i];
     f[i][1] += qfactor*(eky*q[i] - sf);
 
 
     sf = sf_coeff[4]*sin(2*MY_PI*s3);
     sf += sf_coeff[5]*sin(4*MY_PI*s3);
     sf *= 2*q[i]*q[i];
     if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf);
   }
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get electric field & force on my particles 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::fieldforce_c_peratom()
 {
   int i,l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
   FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of E-field on particle
 
   double *q = atom->q;
   double **x = atom->x;
 
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
     nx = part2grid[i][0];
     ny = part2grid[i][1];
     nz = part2grid[i][2];
     dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
     dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
     dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
 
     compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
 
     u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
     for (n = nlower; n <= nupper; n++) {
       mz = n+nz;
       z0 = rho1d[2][n];
       for (m = nlower; m <= nupper; m++) {
 	my = m+ny;
 	y0 = z0*rho1d[1][m];
 	for (l = nlower; l <= nupper; l++) {
 	  mx = l+nx;
 	  x0 = y0*rho1d[0][l];
 	  if (eflag_atom) u_pa += x0*u_brick[mz][my][mx];	
 	  if (vflag_atom) {
             v0 += x0*v0_brick[mz][my][mx];
             v1 += x0*v1_brick[mz][my][mx];
             v2 += x0*v2_brick[mz][my][mx];
             v3 += x0*v3_brick[mz][my][mx];
             v4 += x0*v4_brick[mz][my][mx];
             v5 += x0*v5_brick[mz][my][mx];
           }
 	}
       }
     }
 
     // convert E-field to force
 
     const double qfactor = 0.5*force->qqrd2e * scale * q[i];
 
     if (eflag_atom) eatom[i] += u_pa*qfactor;
     if (vflag_atom) {
       vatom[i][0] += v0*qfactor;
       vatom[i][1] += v1*qfactor;
       vatom[i][2] += v2*qfactor;
       vatom[i][3] += v3*qfactor;
       vatom[i][4] += v4*qfactor;
       vatom[i][5] += v5*qfactor;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get dispersion field & force on my particles
    for geometric mixing rule 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::fieldforce_g_ik()
 {
   int i,l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
   FFT_SCALAR ekx,eky,ekz;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of dispersion field on particle
 
   double **x = atom->x;
   double **f = atom->f;
   int type;
   double lj;
 
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
     nx = part2grid_6[i][0];
     ny = part2grid_6[i][1];
     nz = part2grid_6[i][2];
     dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
     dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
     dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
 
     compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
 
     ekx = eky = ekz = ZEROF;
     for (n = nlower_6; n <= nupper_6; n++) {
       mz = n+nz;
       z0 = rho1d_6[2][n];
       for (m = nlower_6; m <= nupper_6; m++) {
 	my = m+ny;
 	y0 = z0*rho1d_6[1][m];
 	for (l = nlower_6; l <= nupper_6; l++) {
 	  mx = l+nx;
 	  x0 = y0*rho1d_6[0][l];
 	  ekx -= x0*vdx_brick_g[mz][my][mx];
 	  eky -= x0*vdy_brick_g[mz][my][mx];
 	  ekz -= x0*vdz_brick_g[mz][my][mx];
 	}
       }
     }
 
     // convert E-field to force
     type = atom->type[i];
     lj = B[type];
     f[i][0] += lj*ekx;
     f[i][1] += lj*eky;
     if (slabflag != 2) f[i][2] += lj*ekz;
   }
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get dispersion field & force on my particles
    for geometric mixing rule for ad scheme
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::fieldforce_g_ad()
 {
   int i,l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz;
   FFT_SCALAR ekx,eky,ekz;
   double s1,s2,s3;
   double sf = 0.0;
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
 
   double hx_inv = nx_pppm_6/xprd;
   double hy_inv = ny_pppm_6/yprd;
   double hz_inv = nz_pppm_6/zprd_slab;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of dispersion field on particle
 
   double **x = atom->x;
   double **f = atom->f;
   int type;
   double lj;
 
   int nlocal = atom->nlocal;
 
  
   for (i = 0; i < nlocal; i++) {
     nx = part2grid_6[i][0];
     ny = part2grid_6[i][1];
     nz = part2grid_6[i][2];
     dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
     dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
     dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
 
     compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
     compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6);
 
 
     ekx = eky = ekz = ZEROF;
     for (n = nlower_6; n <= nupper_6; n++) {
       mz = n+nz;
       for (m = nlower_6; m <= nupper_6; m++) {
         my = m+ny;
         for (l = nlower_6; l <= nupper_6; l++) {
           mx = l+nx;
           ekx += drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n]*u_brick_g[mz][my][mx];
           eky += rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n]*u_brick_g[mz][my][mx];
           ekz += rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n]*u_brick_g[mz][my][mx];
         }
       }
     }
     ekx *= hx_inv;
     eky *= hy_inv;
     ekz *= hz_inv;
 
     // convert E-field to force
     type = atom->type[i];
     lj = B[type];
 
     s1 = x[i][0]*hx_inv;
     s2 = x[i][1]*hy_inv;
     s3 = x[i][2]*hz_inv;
 
     sf = sf_coeff_6[0]*sin(2*MY_PI*s1);
     sf += sf_coeff_6[1]*sin(4*MY_PI*s1);
     sf *= 2*lj*lj;
     f[i][0] += ekx*lj - sf;
 
     sf = sf_coeff_6[2]*sin(2*MY_PI*s2);
     sf += sf_coeff_6[3]*sin(4*MY_PI*s2);
     sf *= 2*lj*lj;
     f[i][1] += eky*lj - sf;
 
 
     sf = sf_coeff_6[4]*sin(2*MY_PI*s3);
     sf += sf_coeff_6[5]*sin(4*MY_PI*s3);
     sf *= 2*lj*lj;
     if (slabflag != 2) f[i][2] += ekz*lj - sf;
 
   }
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get dispersion field & force on my particles
    for geometric mixing rule for per atom quantities
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::fieldforce_g_peratom()
 {
   int i,l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
   FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of dispersion field on particle
 
   double **x = atom->x;
   int type;
   double lj;
 
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
     nx = part2grid_6[i][0];
     ny = part2grid_6[i][1];
     nz = part2grid_6[i][2];
     dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
     dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
     dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
 
     compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
 
     u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
     for (n = nlower_6; n <= nupper_6; n++) {
       mz = n+nz;
       z0 = rho1d_6[2][n];
       for (m = nlower_6; m <= nupper_6; m++) {
 	my = m+ny;
 	y0 = z0*rho1d_6[1][m];
 	for (l = nlower_6; l <= nupper_6; l++) {
 	  mx = l+nx;
 	  x0 = y0*rho1d_6[0][l];
 	  if (eflag_atom) u_pa += x0*u_brick_g[mz][my][mx];	
 	  if (vflag_atom) {
             v0 += x0*v0_brick_g[mz][my][mx];
             v1 += x0*v1_brick_g[mz][my][mx];
             v2 += x0*v2_brick_g[mz][my][mx];
             v3 += x0*v3_brick_g[mz][my][mx];
             v4 += x0*v4_brick_g[mz][my][mx];
             v5 += x0*v5_brick_g[mz][my][mx];
           }
 	}
       }
     }
 
     // convert E-field to force
     type = atom->type[i];
     lj = B[type]*0.5;
 
     if (eflag_atom) eatom[i] += u_pa*lj;
     if (vflag_atom) {
       vatom[i][0] += v0*lj;
       vatom[i][1] += v1*lj;
       vatom[i][2] += v2*lj;
       vatom[i][3] += v3*lj;
       vatom[i][4] += v4*lj;
       vatom[i][5] += v5*lj;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get dispersion field & force on my particles
    for arithmetic mixing rule and ik scheme
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::fieldforce_a_ik()
 {
   int i,l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
   FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2;
   FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5;
   FFT_SCALAR ekx6, eky6, ekz6;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of dispersion field on particle
 
   double **x = atom->x;
   double **f = atom->f;
   int type;
   double lj0, lj1, lj2, lj3, lj4, lj5, lj6;
 
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
 
     nx = part2grid_6[i][0];
     ny = part2grid_6[i][1];
     nz = part2grid_6[i][2];
     dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
     dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
     dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
     compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
     ekx0 = eky0 = ekz0 = ZEROF;
     ekx1 = eky1 = ekz1 = ZEROF;
     ekx2 = eky2 = ekz2 = ZEROF;
     ekx3 = eky3 = ekz3 = ZEROF;
     ekx4 = eky4 = ekz4 = ZEROF;
     ekx5 = eky5 = ekz5 = ZEROF;
     ekx6 = eky6 = ekz6 = ZEROF;
     for (n = nlower_6; n <= nupper_6; n++) {
       mz = n+nz;
       z0 = rho1d_6[2][n];
       for (m = nlower_6; m <= nupper_6; m++) {
 	my = m+ny;
 	y0 = z0*rho1d_6[1][m];
 	for (l = nlower_6; l <= nupper_6; l++) {
 	  mx = l+nx;
 	  x0 = y0*rho1d_6[0][l];
 	  ekx0 -= x0*vdx_brick_a0[mz][my][mx];
 	  eky0 -= x0*vdy_brick_a0[mz][my][mx];
 	  ekz0 -= x0*vdz_brick_a0[mz][my][mx];
 	  ekx1 -= x0*vdx_brick_a1[mz][my][mx];
 	  eky1 -= x0*vdy_brick_a1[mz][my][mx];
 	  ekz1 -= x0*vdz_brick_a1[mz][my][mx];
           ekx2 -= x0*vdx_brick_a2[mz][my][mx];
 	  eky2 -= x0*vdy_brick_a2[mz][my][mx];
 	  ekz2 -= x0*vdz_brick_a2[mz][my][mx];
 	  ekx3 -= x0*vdx_brick_a3[mz][my][mx];
 	  eky3 -= x0*vdy_brick_a3[mz][my][mx];
 	  ekz3 -= x0*vdz_brick_a3[mz][my][mx];
 	  ekx4 -= x0*vdx_brick_a4[mz][my][mx];
 	  eky4 -= x0*vdy_brick_a4[mz][my][mx];
 	  ekz4 -= x0*vdz_brick_a4[mz][my][mx];
           ekx5 -= x0*vdx_brick_a5[mz][my][mx];
 	  eky5 -= x0*vdy_brick_a5[mz][my][mx];
 	  ekz5 -= x0*vdz_brick_a5[mz][my][mx];
           ekx6 -= x0*vdx_brick_a6[mz][my][mx];
 	  eky6 -= x0*vdy_brick_a6[mz][my][mx];
 	  ekz6 -= x0*vdz_brick_a6[mz][my][mx];
 	}
       }
     }
     // convert D-field to force
     type = atom->type[i];
     lj0 = B[7*type+6];
     lj1 = B[7*type+5];
     lj2 = B[7*type+4];
     lj3 = B[7*type+3];
     lj4 = B[7*type+2];
     lj5 = B[7*type+1];
     lj6 = B[7*type];
     f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6;
     f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6;
     if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6;
   }
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get dispersion field & force on my particles
    for arithmetic mixing rule for the ad scheme
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::fieldforce_a_ad()
 {
   int i,l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
   FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2;
   FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5;
   FFT_SCALAR ekx6, eky6, ekz6;
 
   double s1,s2,s3;
   double sf = 0.0;
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
 
   double hx_inv = nx_pppm_6/xprd;
   double hy_inv = ny_pppm_6/yprd;
   double hz_inv = nz_pppm_6/zprd_slab;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of dispersion field on particle
 
   double **x = atom->x;
   double **f = atom->f;
   int type;
   double lj0, lj1, lj2, lj3, lj4, lj5, lj6;
 
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
 
     nx = part2grid_6[i][0];
     ny = part2grid_6[i][1];
     nz = part2grid_6[i][2];
     dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
     dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
     dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
 
     compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
     compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6);
 
     ekx0 = eky0 = ekz0 = ZEROF;
     ekx1 = eky1 = ekz1 = ZEROF;
     ekx2 = eky2 = ekz2 = ZEROF;
     ekx3 = eky3 = ekz3 = ZEROF;
     ekx4 = eky4 = ekz4 = ZEROF;
     ekx5 = eky5 = ekz5 = ZEROF;
     ekx6 = eky6 = ekz6 = ZEROF;
     for (n = nlower_6; n <= nupper_6; n++) {
       mz = n+nz;
       for (m = nlower_6; m <= nupper_6; m++) {
 	my = m+ny;
 	for (l = nlower_6; l <= nupper_6; l++) {
 	  mx = l+nx;
           x0 = drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n];
           y0 = rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n];
           z0 = rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n];
 
           ekx0 += x0*u_brick_a0[mz][my][mx];
           eky0 += y0*u_brick_a0[mz][my][mx];
           ekz0 += z0*u_brick_a0[mz][my][mx];
 
           ekx1 += x0*u_brick_a1[mz][my][mx];
           eky1 += y0*u_brick_a1[mz][my][mx];
           ekz1 += z0*u_brick_a1[mz][my][mx];
 
           ekx2 += x0*u_brick_a2[mz][my][mx];
           eky2 += y0*u_brick_a2[mz][my][mx];
           ekz2 += z0*u_brick_a2[mz][my][mx];
 
           ekx3 += x0*u_brick_a3[mz][my][mx];
           eky3 += y0*u_brick_a3[mz][my][mx];
           ekz3 += z0*u_brick_a3[mz][my][mx];
 
           ekx4 += x0*u_brick_a4[mz][my][mx];
           eky4 += y0*u_brick_a4[mz][my][mx];
           ekz4 += z0*u_brick_a4[mz][my][mx];
 
           ekx5 += x0*u_brick_a5[mz][my][mx];
           eky5 += y0*u_brick_a5[mz][my][mx];
           ekz5 += z0*u_brick_a5[mz][my][mx];
 
           ekx6 += x0*u_brick_a6[mz][my][mx];
           eky6 += y0*u_brick_a6[mz][my][mx];
           ekz6 += z0*u_brick_a6[mz][my][mx];
 	}
       }
     }
 
     ekx0 *= hx_inv;
     eky0 *= hy_inv;
     ekz0 *= hz_inv;
 
     ekx1 *= hx_inv;
     eky1 *= hy_inv;
     ekz1 *= hz_inv;
 
     ekx2 *= hx_inv;
     eky2 *= hy_inv;
     ekz2 *= hz_inv;
 
     ekx3 *= hx_inv;
     eky3 *= hy_inv;
     ekz3 *= hz_inv;
 
     ekx4 *= hx_inv;
     eky4 *= hy_inv;
     ekz4 *= hz_inv;
 
     ekx5 *= hx_inv;
     eky5 *= hy_inv;
     ekz5 *= hz_inv;
 
     ekx6 *= hx_inv;
     eky6 *= hy_inv;
     ekz6 *= hz_inv;
 
     // convert D-field to force
     type = atom->type[i];
     lj0 = B[7*type+6];
     lj1 = B[7*type+5];
     lj2 = B[7*type+4];
     lj3 = B[7*type+3];
     lj4 = B[7*type+2];
     lj5 = B[7*type+1];
     lj6 = B[7*type];
 
     s1 = x[i][0]*hx_inv;
     s2 = x[i][1]*hy_inv;
     s3 = x[i][2]*hz_inv;
 
     sf = sf_coeff_6[0]*sin(2*MY_PI*s1);
     sf += sf_coeff_6[1]*sin(4*MY_PI*s1);
     sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
     f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6 - sf;
 
     sf = sf_coeff_6[2]*sin(2*MY_PI*s2);
     sf += sf_coeff_6[3]*sin(4*MY_PI*s2);
     sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
     f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6 - sf;
 
     sf = sf_coeff_6[4]*sin(2*MY_PI*s3);
     sf += sf_coeff_6[5]*sin(4*MY_PI*s3);
     sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
     if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6 - sf;
   }
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get dispersion field & force on my particles
    for arithmetic mixing rule for per atom quantities
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::fieldforce_a_peratom()
 {
   int i,l,m,n,nx,ny,nz,mx,my,mz;
   FFT_SCALAR dx,dy,dz,x0,y0,z0;
   FFT_SCALAR u_pa0,v00,v10,v20,v30,v40,v50;
   FFT_SCALAR u_pa1,v01,v11,v21,v31,v41,v51;
   FFT_SCALAR u_pa2,v02,v12,v22,v32,v42,v52;
   FFT_SCALAR u_pa3,v03,v13,v23,v33,v43,v53;
   FFT_SCALAR u_pa4,v04,v14,v24,v34,v44,v54;
   FFT_SCALAR u_pa5,v05,v15,v25,v35,v45,v55;
   FFT_SCALAR u_pa6,v06,v16,v26,v36,v46,v56;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of dispersion field on particle
 
   double **x = atom->x;
   int type;
   double lj0, lj1, lj2, lj3, lj4, lj5, lj6;
 
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++) {
 
     nx = part2grid_6[i][0];
     ny = part2grid_6[i][1];
     nz = part2grid_6[i][2];
     dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
     dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
     dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
     compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
 
     u_pa0 = v00 = v10 = v20 = v30 = v40 = v50 = ZEROF;
     u_pa1 = v01 = v11 = v21 = v31 = v41 = v51 = ZEROF;
     u_pa2 = v02 = v12 = v22 = v32 = v42 = v52 = ZEROF;
     u_pa3 = v03 = v13 = v23 = v33 = v43 = v53 = ZEROF;
     u_pa4 = v04 = v14 = v24 = v34 = v44 = v54 = ZEROF;
     u_pa5 = v05 = v15 = v25 = v35 = v45 = v55 = ZEROF;
     u_pa6 = v06 = v16 = v26 = v36 = v46 = v56 = ZEROF;
     for (n = nlower_6; n <= nupper_6; n++) {
       mz = n+nz;
       z0 = rho1d_6[2][n];
       for (m = nlower_6; m <= nupper_6; m++) {
 	my = m+ny;
 	y0 = z0*rho1d_6[1][m];
 	for (l = nlower_6; l <= nupper_6; l++) {
 	  mx = l+nx;
 	  x0 = y0*rho1d_6[0][l];
           if (eflag_atom) {
             u_pa0 += x0*u_brick_a0[mz][my][mx];
             u_pa1 += x0*u_brick_a1[mz][my][mx];
             u_pa2 += x0*u_brick_a2[mz][my][mx];
             u_pa3 += x0*u_brick_a3[mz][my][mx];
             u_pa4 += x0*u_brick_a4[mz][my][mx];
             u_pa5 += x0*u_brick_a5[mz][my][mx];
             u_pa6 += x0*u_brick_a6[mz][my][mx];
 	  }
           if (vflag_atom) {
             v00 += x0*v0_brick_a0[mz][my][mx];
             v10 += x0*v1_brick_a0[mz][my][mx];
             v20 += x0*v2_brick_a0[mz][my][mx];
             v30 += x0*v3_brick_a0[mz][my][mx];
             v40 += x0*v4_brick_a0[mz][my][mx];
             v50 += x0*v5_brick_a0[mz][my][mx];
             v01 += x0*v0_brick_a1[mz][my][mx];
             v11 += x0*v1_brick_a1[mz][my][mx];
             v21 += x0*v2_brick_a1[mz][my][mx];
             v31 += x0*v3_brick_a1[mz][my][mx];
             v41 += x0*v4_brick_a1[mz][my][mx];
             v51 += x0*v5_brick_a1[mz][my][mx];
             v02 += x0*v0_brick_a2[mz][my][mx];
             v12 += x0*v1_brick_a2[mz][my][mx];
             v22 += x0*v2_brick_a2[mz][my][mx];
             v32 += x0*v3_brick_a2[mz][my][mx];
             v42 += x0*v4_brick_a2[mz][my][mx];
             v52 += x0*v5_brick_a2[mz][my][mx];
             v03 += x0*v0_brick_a3[mz][my][mx];
             v13 += x0*v1_brick_a3[mz][my][mx];
             v23 += x0*v2_brick_a3[mz][my][mx];
             v33 += x0*v3_brick_a3[mz][my][mx];
             v43 += x0*v4_brick_a3[mz][my][mx];
             v53 += x0*v5_brick_a3[mz][my][mx];
             v04 += x0*v0_brick_a4[mz][my][mx];
             v14 += x0*v1_brick_a4[mz][my][mx];
             v24 += x0*v2_brick_a4[mz][my][mx];
             v34 += x0*v3_brick_a4[mz][my][mx];
             v44 += x0*v4_brick_a4[mz][my][mx];
             v54 += x0*v5_brick_a4[mz][my][mx];
             v05 += x0*v0_brick_a5[mz][my][mx];
             v15 += x0*v1_brick_a5[mz][my][mx];
             v25 += x0*v2_brick_a5[mz][my][mx];
             v35 += x0*v3_brick_a5[mz][my][mx];
             v45 += x0*v4_brick_a5[mz][my][mx];
             v55 += x0*v5_brick_a5[mz][my][mx];
             v06 += x0*v0_brick_a6[mz][my][mx];
             v16 += x0*v1_brick_a6[mz][my][mx];
             v26 += x0*v2_brick_a6[mz][my][mx];
             v36 += x0*v3_brick_a6[mz][my][mx];
             v46 += x0*v4_brick_a6[mz][my][mx];
             v56 += x0*v5_brick_a6[mz][my][mx];
           }
 	}
       }
     }
     // convert D-field to force
     type = atom->type[i];
     lj0 = B[7*type+6]*0.5;
     lj1 = B[7*type+5]*0.5;
     lj2 = B[7*type+4]*0.5;
     lj3 = B[7*type+3]*0.5;
     lj4 = B[7*type+2]*0.5;
     lj5 = B[7*type+1]*0.5;
     lj6 = B[7*type]*0.5;
 
  
     if (eflag_atom) 
       eatom[i] += u_pa0*lj0 + u_pa1*lj1 + u_pa2*lj2 + 
         u_pa3*lj3 + u_pa4*lj4 + u_pa5*lj5 + u_pa6*lj6;
     if (vflag_atom) {
       vatom[i][0] += v00*lj0 + v01*lj1 + v02*lj2 + v03*lj3 + 
         v04*lj4 + v05*lj5 + v06*lj6;
       vatom[i][1] += v10*lj0 + v11*lj1 + v12*lj2 + v13*lj3 + 
         v14*lj4 + v15*lj5 + v16*lj6;
       vatom[i][2] += v20*lj0 + v21*lj1 + v22*lj2 + v23*lj3 + 
         v24*lj4 + v25*lj5 + v26*lj6;
       vatom[i][3] += v30*lj0 + v31*lj1 + v32*lj2 + v33*lj3 + 
         v34*lj4 + v35*lj5 + v36*lj6;
       vatom[i][4] += v40*lj0 + v41*lj1 + v42*lj2 + v43*lj3 + 
         v44*lj4 + v45*lj5 + v46*lj6;
       vatom[i][5] += v50*lj0 + v51*lj1 + v52*lj2 + v53*lj3 + 
         v54*lj4 + v55*lj5 + v56*lj6;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    pack values to buf to send to another proc
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
 {
   int n = 0;
 
   switch (flag) {
 
   // Coulomb interactions
 
   case FORWARD_IK: {
     FFT_SCALAR *xsrc = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *ysrc = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *zsrc = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       buf[n++] = xsrc[list[i]];
       buf[n++] = ysrc[list[i]];
       buf[n++] = zsrc[list[i]];
     }
     break;
   }
 
   case FORWARD_AD: {
     FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++)
       buf[i] = src[list[i]];
     break;
   }
 
   case FORWARD_IK_PERATOM: {
     FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       if (eflag_atom) buf[n++] = esrc[list[i]];
       if (vflag_atom) {
         buf[n++] = v0src[list[i]];
         buf[n++] = v1src[list[i]];
         buf[n++] = v2src[list[i]];
         buf[n++] = v3src[list[i]];
         buf[n++] = v4src[list[i]];
         buf[n++] = v5src[list[i]];
       }
     }
     break;
   }
 
   case FORWARD_AD_PERATOM: {
     FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       buf[n++] = v0src[list[i]];
       buf[n++] = v1src[list[i]];
       buf[n++] = v2src[list[i]];
       buf[n++] = v3src[list[i]];
       buf[n++] = v4src[list[i]];
       buf[n++] = v5src[list[i]];
     }
     break;
   }
 
   // Dispersion interactions, geometric mixing
 
   case FORWARD_IK_G: {
     FFT_SCALAR *xsrc = &vdx_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ysrc = &vdy_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zsrc = &vdz_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     for (int i = 0; i < nlist; i++) {
       buf[n++] = xsrc[list[i]];
       buf[n++] = ysrc[list[i]];
       buf[n++] = zsrc[list[i]];
     }
     break;
   }
 
   case FORWARD_AD_G: {
     FFT_SCALAR *src = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     for (int i = 0; i < nlist; i++)
       buf[i] = src[list[i]];
     break;
   }
 
   case FORWARD_IK_PERATOM_G: {
     FFT_SCALAR *esrc = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     for (int i = 0; i < nlist; i++) {
       if (eflag_atom) buf[n++] = esrc[list[i]];
       if (vflag_atom) {
         buf[n++] = v0src[list[i]];
         buf[n++] = v1src[list[i]];
         buf[n++] = v2src[list[i]];
         buf[n++] = v3src[list[i]];
         buf[n++] = v4src[list[i]];
         buf[n++] = v5src[list[i]];
       }
     }
     break;
   }
 
   case FORWARD_AD_PERATOM_G: {
     FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     for (int i = 0; i < nlist; i++) {
       buf[n++] = v0src[list[i]];
       buf[n++] = v1src[list[i]];
       buf[n++] = v2src[list[i]];
       buf[n++] = v3src[list[i]];
       buf[n++] = v4src[list[i]];
       buf[n++] = v5src[list[i]];
     }
     break;
   }
 
   // Dispersion interactions, arithmetic mixing
 
   case FORWARD_IK_A: {
     FFT_SCALAR *xsrc0 = &vdx_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ysrc0 = &vdy_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zsrc0 = &vdz_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *xsrc1 = &vdx_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ysrc1 = &vdy_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zsrc1 = &vdz_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *xsrc2 = &vdx_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ysrc2 = &vdy_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zsrc2 = &vdz_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *xsrc3 = &vdx_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ysrc3 = &vdy_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zsrc3 = &vdz_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *xsrc4 = &vdx_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ysrc4 = &vdy_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zsrc4 = &vdz_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *xsrc5 = &vdx_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ysrc5 = &vdy_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zsrc5 = &vdz_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *xsrc6 = &vdx_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ysrc6 = &vdy_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zsrc6 = &vdz_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     for (int i = 0; i < nlist; i++) {
       buf[n++] = xsrc0[list[i]];
       buf[n++] = ysrc0[list[i]];
       buf[n++] = zsrc0[list[i]];
 
       buf[n++] = xsrc1[list[i]];
       buf[n++] = ysrc1[list[i]];
       buf[n++] = zsrc1[list[i]];
 
       buf[n++] = xsrc2[list[i]];
       buf[n++] = ysrc2[list[i]];
       buf[n++] = zsrc2[list[i]];
 
       buf[n++] = xsrc3[list[i]];
       buf[n++] = ysrc3[list[i]];
       buf[n++] = zsrc3[list[i]];
 
       buf[n++] = xsrc4[list[i]];
       buf[n++] = ysrc4[list[i]];
       buf[n++] = zsrc4[list[i]];
 
       buf[n++] = xsrc5[list[i]];
       buf[n++] = ysrc5[list[i]];
       buf[n++] = zsrc5[list[i]];
 
       buf[n++] = xsrc6[list[i]];
       buf[n++] = ysrc6[list[i]];
       buf[n++] = zsrc6[list[i]];
     }
     break;
   }
 
   case FORWARD_AD_A: {
     FFT_SCALAR *src0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *src1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *src2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *src3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *src4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *src5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *src6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     for (int i = 0; i < nlist; i++) {
       buf[n++] = src0[list[i]];
       buf[n++] = src1[list[i]];
       buf[n++] = src2[list[i]];
       buf[n++] = src3[list[i]];
       buf[n++] = src4[list[i]];
       buf[n++] = src5[list[i]];
       buf[n++] = src6[list[i]];
     }
     break;
   }
 
   case FORWARD_IK_PERATOM_A: {
     FFT_SCALAR *esrc0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *esrc1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *esrc2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *esrc3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *esrc4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *esrc5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *esrc6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     for (int i = 0; i < nlist; i++) {
       if (eflag_atom) {
         buf[n++] = esrc0[list[i]];
         buf[n++] = esrc1[list[i]];
         buf[n++] = esrc2[list[i]];
         buf[n++] = esrc3[list[i]];
         buf[n++] = esrc4[list[i]];
         buf[n++] = esrc5[list[i]];
         buf[n++] = esrc6[list[i]];
       }
       if (vflag_atom) {
         buf[n++] = v0src0[list[i]];
         buf[n++] = v1src0[list[i]];
         buf[n++] = v2src0[list[i]];
         buf[n++] = v3src0[list[i]];
         buf[n++] = v4src0[list[i]];
         buf[n++] = v5src0[list[i]];
 
         buf[n++] = v0src1[list[i]];
         buf[n++] = v1src1[list[i]];
         buf[n++] = v2src1[list[i]];
         buf[n++] = v3src1[list[i]];
         buf[n++] = v4src1[list[i]];
         buf[n++] = v5src1[list[i]];
 
         buf[n++] = v0src2[list[i]];
         buf[n++] = v1src2[list[i]];
         buf[n++] = v2src2[list[i]];
         buf[n++] = v3src2[list[i]];
         buf[n++] = v4src2[list[i]];
         buf[n++] = v5src2[list[i]];
 
         buf[n++] = v0src3[list[i]];
         buf[n++] = v1src3[list[i]];
         buf[n++] = v2src3[list[i]];
         buf[n++] = v3src3[list[i]];
         buf[n++] = v4src3[list[i]];
         buf[n++] = v5src3[list[i]];
 
         buf[n++] = v0src4[list[i]];
         buf[n++] = v1src4[list[i]];
         buf[n++] = v2src4[list[i]];
         buf[n++] = v3src4[list[i]];
         buf[n++] = v4src4[list[i]];
         buf[n++] = v5src4[list[i]];
 
         buf[n++] = v0src5[list[i]];
         buf[n++] = v1src5[list[i]];
         buf[n++] = v2src5[list[i]];
         buf[n++] = v3src5[list[i]];
         buf[n++] = v4src5[list[i]];
         buf[n++] = v5src5[list[i]];
 
         buf[n++] = v0src6[list[i]];
         buf[n++] = v1src6[list[i]];
         buf[n++] = v2src6[list[i]];
         buf[n++] = v3src6[list[i]];
         buf[n++] = v4src6[list[i]];
         buf[n++] = v5src6[list[i]];
       }
     }
     break;
   }
 
   case FORWARD_AD_PERATOM_A: {
     FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     for (int i = 0; i < nlist; i++) {
       buf[n++] = v0src0[list[i]];
       buf[n++] = v1src0[list[i]];
       buf[n++] = v2src0[list[i]];
       buf[n++] = v3src0[list[i]];
       buf[n++] = v4src0[list[i]];
       buf[n++] = v5src0[list[i]];
 
       buf[n++] = v0src1[list[i]];
       buf[n++] = v1src1[list[i]];
       buf[n++] = v2src1[list[i]];
       buf[n++] = v3src1[list[i]];
       buf[n++] = v4src1[list[i]];
       buf[n++] = v5src1[list[i]];
 
       buf[n++] = v0src2[list[i]];
       buf[n++] = v1src2[list[i]];
       buf[n++] = v2src2[list[i]];
       buf[n++] = v3src2[list[i]];
       buf[n++] = v4src2[list[i]];
       buf[n++] = v5src2[list[i]];
 
       buf[n++] = v0src3[list[i]];
       buf[n++] = v1src3[list[i]];
       buf[n++] = v2src3[list[i]];
       buf[n++] = v3src3[list[i]];
       buf[n++] = v4src3[list[i]];
       buf[n++] = v5src3[list[i]];
 
       buf[n++] = v0src4[list[i]];
       buf[n++] = v1src4[list[i]];
       buf[n++] = v2src4[list[i]];
       buf[n++] = v3src4[list[i]];
       buf[n++] = v4src4[list[i]];
       buf[n++] = v5src4[list[i]];
 
       buf[n++] = v0src5[list[i]];
       buf[n++] = v1src5[list[i]];
       buf[n++] = v2src5[list[i]];
       buf[n++] = v3src5[list[i]];
       buf[n++] = v4src5[list[i]];
       buf[n++] = v5src5[list[i]];
 
       buf[n++] = v0src6[list[i]];
       buf[n++] = v1src6[list[i]];
       buf[n++] = v2src6[list[i]];
       buf[n++] = v3src6[list[i]];
       buf[n++] = v4src6[list[i]];
       buf[n++] = v5src6[list[i]];
     }
     break;
   }
 
   }
 }
 
 /* ----------------------------------------------------------------------
    unpack another proc's own values from buf and set own ghost values
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
 {
   int n = 0;
 
   switch (flag) {
 
   // Coulomb interactions
 
   case FORWARD_IK: {
     FFT_SCALAR *xdest = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *ydest = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *zdest = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       xdest[list[i]] = buf[n++];
       ydest[list[i]] = buf[n++];
       zdest[list[i]] = buf[n++];
     }
     break;
   }
 
   case FORWARD_AD: {
     FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++)
       dest[list[i]] = buf[n++];
     break;
   }
 
   case FORWARD_IK_PERATOM: {
     FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       if (eflag_atom) esrc[list[i]] = buf[n++];
       if (vflag_atom) {
         v0src[list[i]] = buf[n++];
         v1src[list[i]] = buf[n++];
         v2src[list[i]] = buf[n++];
         v3src[list[i]] = buf[n++];
         v4src[list[i]] = buf[n++];
         v5src[list[i]] = buf[n++];
       }
     }
     break;
   }
 
   case FORWARD_AD_PERATOM: {
     FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
     FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++) {
       v0src[list[i]] = buf[n++];
       v1src[list[i]] = buf[n++];
       v2src[list[i]] = buf[n++];
       v3src[list[i]] = buf[n++];
       v4src[list[i]] = buf[n++];
       v5src[list[i]] = buf[n++];
     }
     break;
   }
 
   // Disperion interactions, geometric mixing
 
   case FORWARD_IK_G: {
     FFT_SCALAR *xdest = &vdx_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ydest = &vdy_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zdest = &vdz_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     for (int i = 0; i < nlist; i++) {
       xdest[list[i]] = buf[n++];
       ydest[list[i]] = buf[n++];
       zdest[list[i]] = buf[n++];
     }
     break;
   }
 
   case FORWARD_AD_G: {
     FFT_SCALAR *dest = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     for (int i = 0; i < nlist; i++)
       dest[list[i]] = buf[n++];
     break;
   }
 
   case FORWARD_IK_PERATOM_G: {
     FFT_SCALAR *esrc = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     for (int i = 0; i < nlist; i++) {
       if (eflag_atom) esrc[list[i]] = buf[n++];
       if (vflag_atom) {
         v0src[list[i]] = buf[n++];
         v1src[list[i]] = buf[n++];
         v2src[list[i]] = buf[n++];
         v3src[list[i]] = buf[n++];
         v4src[list[i]] = buf[n++];
         v5src[list[i]] = buf[n++];
       }
     }
     break;
   }
 
   case FORWARD_AD_PERATOM_G: {
     FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     for (int i = 0; i < nlist; i++) {
       v0src[list[i]] = buf[n++];
       v1src[list[i]] = buf[n++];
       v2src[list[i]] = buf[n++];
       v3src[list[i]] = buf[n++];
       v4src[list[i]] = buf[n++];
       v5src[list[i]] = buf[n++];
     }
     break;
   }
 
   // Disperion interactions, arithmetic mixing
 
   case FORWARD_IK_A: {
     FFT_SCALAR *xdest0 = &vdx_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ydest0 = &vdy_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zdest0 = &vdz_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *xdest1 = &vdx_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ydest1 = &vdy_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zdest1 = &vdz_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *xdest2 = &vdx_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ydest2 = &vdy_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zdest2 = &vdz_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *xdest3 = &vdx_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ydest3 = &vdy_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zdest3 = &vdz_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *xdest4 = &vdx_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ydest4 = &vdy_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zdest4 = &vdz_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *xdest5 = &vdx_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ydest5 = &vdy_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zdest5 = &vdz_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *xdest6 = &vdx_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *ydest6 = &vdy_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *zdest6 = &vdz_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     for (int i = 0; i < nlist; i++) {
       xdest0[list[i]] = buf[n++];
       ydest0[list[i]] = buf[n++];
       zdest0[list[i]] = buf[n++];
 
       xdest1[list[i]] = buf[n++];
       ydest1[list[i]] = buf[n++];
       zdest1[list[i]] = buf[n++];
 
       xdest2[list[i]] = buf[n++];
       ydest2[list[i]] = buf[n++];
       zdest2[list[i]] = buf[n++];
 
       xdest3[list[i]] = buf[n++];
       ydest3[list[i]] = buf[n++];
       zdest3[list[i]] = buf[n++];
 
       xdest4[list[i]] = buf[n++];
       ydest4[list[i]] = buf[n++];
       zdest4[list[i]] = buf[n++];
 
       xdest5[list[i]] = buf[n++];
       ydest5[list[i]] = buf[n++];
       zdest5[list[i]] = buf[n++];
 
       xdest6[list[i]] = buf[n++];
       ydest6[list[i]] = buf[n++];
       zdest6[list[i]] = buf[n++];
     }
     break;
   }
 
   case FORWARD_AD_A: {
     FFT_SCALAR *dest0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *dest1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *dest2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *dest3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *dest4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *dest5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *dest6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     for (int i = 0; i < nlist; i++) {
       dest0[list[i]] = buf[n++];
       dest1[list[i]] = buf[n++];
       dest2[list[i]] = buf[n++];
       dest3[list[i]] = buf[n++];
       dest4[list[i]] = buf[n++];
       dest5[list[i]] = buf[n++];
       dest6[list[i]] = buf[n++];
     }
     break;
   }
 
   case FORWARD_IK_PERATOM_A: {
     FFT_SCALAR *esrc0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *esrc1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *esrc2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *esrc3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *esrc4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *esrc5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *esrc6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     for (int i = 0; i < nlist; i++) {
       if (eflag_atom) {
         esrc0[list[i]] = buf[n++];
         esrc1[list[i]] = buf[n++];
         esrc2[list[i]] = buf[n++];
         esrc3[list[i]] = buf[n++];
         esrc4[list[i]] = buf[n++];
         esrc5[list[i]] = buf[n++];
         esrc6[list[i]] = buf[n++];
       }
       if (vflag_atom) {
         v0src0[list[i]] = buf[n++];
         v1src0[list[i]] = buf[n++];
         v2src0[list[i]] = buf[n++];
         v3src0[list[i]] = buf[n++];
         v4src0[list[i]] = buf[n++];
         v5src0[list[i]] = buf[n++];
 
         v0src1[list[i]] = buf[n++];
         v1src1[list[i]] = buf[n++];
         v2src1[list[i]] = buf[n++];
         v3src1[list[i]] = buf[n++];
         v4src1[list[i]] = buf[n++];
         v5src1[list[i]] = buf[n++];
 
         v0src2[list[i]] = buf[n++];
         v1src2[list[i]] = buf[n++];
         v2src2[list[i]] = buf[n++];
         v3src2[list[i]] = buf[n++];
         v4src2[list[i]] = buf[n++];
         v5src2[list[i]] = buf[n++];
 
         v0src3[list[i]] = buf[n++];
         v1src3[list[i]] = buf[n++];
         v2src3[list[i]] = buf[n++];
         v3src3[list[i]] = buf[n++];
         v4src3[list[i]] = buf[n++];
         v5src3[list[i]] = buf[n++];
 
         v0src4[list[i]] = buf[n++];
         v1src4[list[i]] = buf[n++];
         v2src4[list[i]] = buf[n++];
         v3src4[list[i]] = buf[n++];
         v4src4[list[i]] = buf[n++];
         v5src4[list[i]] = buf[n++];
 
         v0src5[list[i]] = buf[n++];
         v1src5[list[i]] = buf[n++];
         v2src5[list[i]] = buf[n++];
         v3src5[list[i]] = buf[n++];
         v4src5[list[i]] = buf[n++];
         v5src5[list[i]] = buf[n++];
 
         v0src6[list[i]] = buf[n++];
         v1src6[list[i]] = buf[n++];
         v2src6[list[i]] = buf[n++];
         v3src6[list[i]] = buf[n++];
         v4src6[list[i]] = buf[n++];
         v5src6[list[i]] = buf[n++];
       }
     }
     break;
   }
 
   case FORWARD_AD_PERATOM_A: {
     FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
 
     for (int i = 0; i < nlist; i++) {
       v0src0[list[i]] = buf[n++];
       v1src0[list[i]] = buf[n++];
       v2src0[list[i]] = buf[n++];
       v3src0[list[i]] = buf[n++];
       v4src0[list[i]] = buf[n++];
       v5src0[list[i]] = buf[n++];
 
       v0src1[list[i]] = buf[n++];
       v1src1[list[i]] = buf[n++];
       v2src1[list[i]] = buf[n++];
       v3src1[list[i]] = buf[n++];
       v4src1[list[i]] = buf[n++];
       v5src1[list[i]] = buf[n++];
 
       v0src2[list[i]] = buf[n++];
       v1src2[list[i]] = buf[n++];
       v2src2[list[i]] = buf[n++];
       v3src2[list[i]] = buf[n++];
       v4src2[list[i]] = buf[n++];
       v5src2[list[i]] = buf[n++];
 
       v0src3[list[i]] = buf[n++];
       v1src3[list[i]] = buf[n++];
       v2src3[list[i]] = buf[n++];
       v3src3[list[i]] = buf[n++];
       v4src3[list[i]] = buf[n++];
       v5src3[list[i]] = buf[n++];
 
       v0src4[list[i]] = buf[n++];
       v1src4[list[i]] = buf[n++];
       v2src4[list[i]] = buf[n++];
       v3src4[list[i]] = buf[n++];
       v4src4[list[i]] = buf[n++];
       v5src4[list[i]] = buf[n++];
 
       v0src5[list[i]] = buf[n++];
       v1src5[list[i]] = buf[n++];
       v2src5[list[i]] = buf[n++];
       v3src5[list[i]] = buf[n++];
       v4src5[list[i]] = buf[n++];
       v5src5[list[i]] = buf[n++];
 
       v0src6[list[i]] = buf[n++];
       v1src6[list[i]] = buf[n++];
       v2src6[list[i]] = buf[n++];
       v3src6[list[i]] = buf[n++];
       v4src6[list[i]] = buf[n++];
       v5src6[list[i]] = buf[n++];
     }
     break;
   }
 
   }
 }
 
 /* ----------------------------------------------------------------------
    pack ghost values into buf to send to another proc
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
 {
   int n = 0;
 
   //Coulomb interactions
 
   if (flag == REVERSE_RHO) {
     FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++)
       buf[i] = src[list[i]];
 
   //Dispersion interactions, geometric mixing
 
   } else if (flag == REVERSE_RHO_G) {
     FFT_SCALAR *src = &density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     for (int i = 0; i < nlist; i++)
       buf[i] = src[list[i]];
 
   //Dispersion interactions, arithmetic mixing
 
   } else if (flag == REVERSE_RHO_A) {
     FFT_SCALAR *src0 = &density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *src1 = &density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *src2 = &density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *src3 = &density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *src4 = &density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *src5 = &density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *src6 = &density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     for (int i = 0; i < nlist; i++) {
       buf[n++] = src0[list[i]];
       buf[n++] = src1[list[i]];
       buf[n++] = src2[list[i]];
       buf[n++] = src3[list[i]];
       buf[n++] = src4[list[i]];
       buf[n++] = src5[list[i]];
       buf[n++] = src6[list[i]];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    unpack another proc's ghost values from buf and add to own values
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
 {
   int n = 0;
 
   //Coulomb interactions
 
   if (flag == REVERSE_RHO) {
     FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out];
     for (int i = 0; i < nlist; i++)
       dest[list[i]] += buf[i];
 
   //Dispersion interactions, geometric mixing
 
   } else if (flag == REVERSE_RHO_G) {
     FFT_SCALAR *dest = &density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
     for (int i = 0; i < nlist; i++)
       dest[list[i]] += buf[i];
 
   //Dispersion interactions, arithmetic mixing
 
   } else if (flag == REVERSE_RHO_A) {
     FFT_SCALAR *dest0 = &density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *dest1 = &density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *dest2 = &density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *dest3 = &density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *dest4 = &density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *dest5 = &density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
     FFT_SCALAR *dest6 = &density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
     for (int i = 0; i < nlist; i++) {
       dest0[list[i]] += buf[n++];
       dest1[list[i]] += buf[n++];
       dest2[list[i]] += buf[n++];
       dest3[list[i]] += buf[n++];
       dest4[list[i]] += buf[n++];
       dest5[list[i]] += buf[n++];
       dest6[list[i]] += buf[n++];
     }
   } 
 }
 
 /* ----------------------------------------------------------------------
    map nprocs to NX by NY grid as PX by PY procs - return optimal px,py 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py)
 {
   // loop thru all possible factorizations of nprocs
   // surf = surface area of largest proc sub-domain
   // innermost if test minimizes surface area and surface/volume ratio
 
   int bestsurf = 2 * (nx + ny);
   int bestboxx = 0;
   int bestboxy = 0;
 
   int boxx,boxy,surf,ipx,ipy;
 
   ipx = 1;
   while (ipx <= nprocs) {
     if (nprocs % ipx == 0) {
       ipy = nprocs/ipx;
       boxx = nx/ipx;
       if (nx % ipx) boxx++;
       boxy = ny/ipy;
       if (ny % ipy) boxy++;
       surf = boxx + boxy;
       if (surf < bestsurf || 
 	  (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) {
 	bestsurf = surf;
 	bestboxx = boxx;
 	bestboxy = boxy;
 	*px = ipx;
 	*py = ipy;
       }
     }
     ipx++;
   }
 }
 
 /* ----------------------------------------------------------------------
    charge assignment into rho1d
    dx,dy,dz = distance of particle from "lower left" grid point 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
 			      const FFT_SCALAR &dz, int ord, 
                              FFT_SCALAR **rho_c, FFT_SCALAR **r1d)
 {
   int k,l;
   FFT_SCALAR r1,r2,r3;
 
   for (k = (1-ord)/2; k <= ord/2; k++) {
     r1 = r2 = r3 = ZEROF;
 
     for (l = ord-1; l >= 0; l--) {
       r1 = rho_c[l][k] + r1*dx;
       r2 = rho_c[l][k] + r2*dy;
       r3 = rho_c[l][k] + r3*dz;
     }
     r1d[0][k] = r1;
     r1d[1][k] = r2;
     r1d[2][k] = r3;
   }
 }
 
 /* ----------------------------------------------------------------------
    charge assignment into drho1d
    dx,dy,dz = distance of particle from "lower left" grid point
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::compute_drho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
                           const FFT_SCALAR &dz, int ord, 
                               FFT_SCALAR **drho_c, FFT_SCALAR **dr1d)
 {
   int k,l;
   FFT_SCALAR r1,r2,r3;
 
   for (k = (1-ord)/2; k <= ord/2; k++) {
     r1 = r2 = r3 = ZEROF;
 
     for (l = ord-2; l >= 0; l--) {
       r1 = drho_c[l][k] + r1*dx;
       r2 = drho_c[l][k] + r2*dy;
       r3 = drho_c[l][k] + r3*dz;
     }
     dr1d[0][k] = r1;
     dr1d[1][k] = r2;
     dr1d[2][k] = r3;
   }
 }
 
 /* ----------------------------------------------------------------------
    generate coeffients for the weight function of order n
 
               (n-1)
   Wn(x) =     Sum    wn(k,x) , Sum is over every other integer
            k=-(n-1)
   For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1
       k is odd integers if n is even and even integers if n is odd
               ---
              | n-1
              | Sum a(l,j)*(x-k/2)**l   if abs(x-k/2) < 1/2
   wn(k,x) = <  l=0
              |
              |  0                       otherwise
               ---
   a coeffients are packed into the array rho_coeff to eliminate zeros
   rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k) 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::compute_rho_coeff(FFT_SCALAR **coeff , FFT_SCALAR **dcoeff, 
                                  int ord)
 {
   int j,k,l,m;
   FFT_SCALAR s;
 
   FFT_SCALAR **a;
   memory->create2d_offset(a,ord,-ord,ord,"pppm/disp:a");
 
   for (k = -ord; k <= ord; k++) 
     for (l = 0; l < ord; l++)
       a[l][k] = 0.0;
         
   a[0][0] = 1.0;
   for (j = 1; j < ord; j++) {
     for (k = -j; k <= j; k += 2) {
       s = 0.0;
       for (l = 0; l < j; l++) {
 	a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1);
 #ifdef FFT_SINGLE
 	s += powf(0.5,(float) l+1) *
 	  (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1);
 #else
 	s += pow(0.5,(double) l+1) * 
 	  (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1);
 #endif
       }
       a[0][k] = s;
     }
   }
 
   m = (1-ord)/2;
   for (k = -(ord-1); k < ord; k += 2) {
     for (l = 0; l < ord; l++)
       coeff[l][m] = a[l][k];
     for (l = 1; l < ord; l++)
       dcoeff[l-1][m] = l*a[l][k];
     m++;
   }
 
   memory->destroy2d_offset(a,-ord);
 }
 
 /* ----------------------------------------------------------------------
    Slab-geometry correction term to dampen inter-slab interactions between
    periodically repeating slabs.  Yields good approximation to 2D Ewald if 
    adequate empty space is left between repeating slabs (J. Chem. Phys. 
    111, 3155).  Slabs defined here to be parallel to the xy plane. 
 ------------------------------------------------------------------------- */
 
 void PPPMDisp::slabcorr(int eflag)
 {
   // compute local contribution to global dipole moment
 
   double *q = atom->q;
   double **x = atom->x;
   int nlocal = atom->nlocal;
 
   double dipole = 0.0;
   for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];
   
   // sum local contributions to get global dipole moment
 
   double dipole_all;
   MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
 
   // compute corrections
   
   const double e_slabcorr = 2.0*MY_PI*dipole_all*dipole_all/volume;
   const double qscale = force->qqrd2e * scale;
   
   if (eflag_global) energy_1 += qscale * e_slabcorr;
 
   // per-atom energy
 
   if (eflag_atom) {
     double efact = 2.0*MY_PI*dipole_all/volume; 
     for (int i = 0; i < nlocal; i++) eatom[i] += qscale * q[i]*x[i][2]*efact;
   }
 
   // add on force corrections
 
   double ffact = -4.0*MY_PI*dipole_all/volume; 
   double **f = atom->f;
 
   for (int i = 0; i < nlocal; i++) f[i][2] += qscale * q[i]*ffact;
 }
 
 /* ----------------------------------------------------------------------
    perform and time the 1d FFTs required for N timesteps
 ------------------------------------------------------------------------- */
 
 int PPPMDisp::timing_1d(int n, double &time1d)
 {
   double time1,time2;
   int mixing = 1;
   if (function[2]) mixing = 4;
 
   if (function[0]) for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
   if (function[1] + function[2])
     for (int i = 0; i < 2*nfft_both_6; i++) work1_6[i] = ZEROF;
 
   MPI_Barrier(world);
   time1 = MPI_Wtime();
 
   if (function[0]) {
     for (int i = 0; i < n; i++) {
       fft1->timing1d(work1,nfft_both,1);
       fft2->timing1d(work1,nfft_both,-1);
       if (differentiation_flag != 1){
         fft2->timing1d(work1,nfft_both,-1);
         fft2->timing1d(work1,nfft_both,-1);
       }
     }
   }
 
   MPI_Barrier(world);
   time2 = MPI_Wtime();
   time1d = time2 - time1;
 
   MPI_Barrier(world);
   time1 = MPI_Wtime();
 
   if (function[1] + function[2]) {
     for (int i = 0; i < n; i++) {
       fft1_6->timing1d(work1_6,nfft_both_6,1);
       fft2_6->timing1d(work1_6,nfft_both_6,-1);
       if (differentiation_flag != 1){
         fft2_6->timing1d(work1_6,nfft_both_6,-1);
         fft2_6->timing1d(work1_6,nfft_both_6,-1);
       }
     }
   }
 
   MPI_Barrier(world);
   time2 = MPI_Wtime();
   time1d += (time2 - time1)*mixing;
 
   if (differentiation_flag) return 2;
   return 4;
 }
 
 /* ----------------------------------------------------------------------
    perform and time the 3d FFTs required for N timesteps
 ------------------------------------------------------------------------- */
 
 int PPPMDisp::timing_3d(int n, double &time3d)
 {
   double time1,time2;
   int mixing = 1;
   if (function[2]) mixing = 4;
 
   if (function[0]) for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
   if (function[1] + function[2]) 
     for (int i = 0; i < 2*nfft_both_6; i++) work1_6[i] = ZEROF;
 
 
 
   MPI_Barrier(world);
   time1 = MPI_Wtime();
 
   if (function[0]) {
     for (int i = 0; i < n; i++) {
       fft1->compute(work1,work1,1);
       fft2->compute(work1,work1,-1);
       if (differentiation_flag != 1) {
         fft2->compute(work1,work1,-1);
         fft2->compute(work1,work1,-1);
       }
     }
   }
 
   MPI_Barrier(world);
   time2 = MPI_Wtime();
   time3d = time2 - time1;
 
   MPI_Barrier(world);
   time1 = MPI_Wtime();
   
   if (function[1] + function[2]) {
     for (int i = 0; i < n; i++) {
       fft1_6->compute(work1_6,work1_6,1);
       fft2_6->compute(work1_6,work1_6,-1);
       if (differentiation_flag != 1) {
         fft2_6->compute(work1_6,work1_6,-1);
         fft2_6->compute(work1_6,work1_6,-1);
       }
     }
   }
 
   MPI_Barrier(world);
   time2 = MPI_Wtime();
   time3d += (time2 - time1) * mixing;
 
   if (differentiation_flag) return 2;
   return 4;
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local arrays 
 ------------------------------------------------------------------------- */
 
 double PPPMDisp::memory_usage()
 {
   double bytes = nmax*3 * sizeof(double);
   int mixing = 1;
   int diff = 3;     //depends on differentiation
   int per = 7;      //depends on per atom calculations
   if (differentiation_flag) {
     diff = 1;
     per = 6;
   }
   if (!evflag_atom) per = 0;
   if (function[2]) mixing = 7;
 
   if (function[0]) {
     int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * 
       (nzhi_out-nzlo_out+1);
     bytes += (1 + diff +  per) * nbrick * sizeof(FFT_SCALAR);     //brick memory
     bytes += 6 * nfft_both * sizeof(double);      // vg
     bytes += nfft_both * sizeof(double);          // greensfn
     bytes += nfft_both * 3 * sizeof(FFT_SCALAR);    // density_FFT, work1, work2 
     bytes += cg->memory_usage();
   }
 
   if (function[1] + function[2]) {
     int nbrick = (nxhi_out_6-nxlo_out_6+1) * (nyhi_out_6-nylo_out_6+1) * 
       (nzhi_out_6-nzlo_out_6+1);
     bytes += (1 + diff + per ) * nbrick * sizeof(FFT_SCALAR) * mixing;     // density_brick + vd_brick + per atom bricks
     bytes += 6 * nfft_both_6 * sizeof(double);      // vg
     bytes += nfft_both_6 * sizeof(double);          // greensfn
     bytes += nfft_both_6 * (mixing + 2) * sizeof(FFT_SCALAR);    // density_FFT, work1, work2 
     bytes += cg_6->memory_usage();
   }
   return bytes;
 }
diff --git a/src/KSPACE/pppm_disp.h b/src/KSPACE/pppm_disp.h
index f68f4bd71..89437a109 100755
--- a/src/KSPACE/pppm_disp.h
+++ b/src/KSPACE/pppm_disp.h
@@ -1,602 +1,520 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef KSPACE_CLASS
 
 KSpaceStyle(pppm/disp,PPPMDisp)
 
 #else
 
 #ifndef LMP_PPPM_DISP_H
 #define LMP_PPPM_DISP_H
 
 #include "lmptype.h"
 #include "mpi.h"
 
 #ifdef FFT_SINGLE
 typedef float FFT_SCALAR;
 #define MPI_FFT_SCALAR MPI_FLOAT
 #else
 typedef double FFT_SCALAR;
 #define MPI_FFT_SCALAR MPI_DOUBLE
 #endif
 
 #include "kspace.h"
 
 namespace LAMMPS_NS {
 
-
 #define EWALD_MAXORDER	6
 #define EWALD_FUNCS	3
 
 class PPPMDisp : public KSpace {
  public:
   PPPMDisp(class LAMMPS *, int, char **);
   virtual ~PPPMDisp();
   virtual void init();
   virtual void setup();
   void setup_grid();
   virtual void compute(int, int);
   virtual int timing_1d(int, double &);
   virtual int timing_3d(int, double &);
   virtual double memory_usage();
 
  protected:
 
 /* ----------------------------------------------------------------------
 Variables needed for calculating the 1/r and 1/r^6 potential
 ------------------------------------------------------------------------- */
 
   int function[EWALD_FUNCS];
 
   int me,nprocs;
   int nfactors;
   int *factors;
   double qsum,qsqsum;
   double csumij;
   double csum;
   double *csumi;  //needed as correction term for per atom calculations!
   double *cii;
   int csumflag;
   double cutoff, cutoff_lj;
   double volume;
   double *B;
   double virial_1[6], virial_6[6];
   double sf_coeff[6], sf_coeff_6[6];
   int peratom_allocate_flag;
 
 
   double delxinv,delyinv,delzinv,delvolinv;
   double delxinv_6,delyinv_6,delzinv_6,delvolinv_6;
     
   double shift,shiftone;
   int nxlo_in,nylo_in,nzlo_in,nxhi_in,nyhi_in,nzhi_in;
   int nxlo_out,nylo_out,nzlo_out,nxhi_out,nyhi_out,nzhi_out;
   int nxlo_fft,nylo_fft,nzlo_fft,nxhi_fft,nyhi_fft,nzhi_fft;
   int nlower,nupper;
   int ngrid,nfft,nfft_both;
 
   double shift_6,shiftone_6;
   int nxlo_in_6,nylo_in_6,nzlo_in_6,nxhi_in_6,nyhi_in_6,nzhi_in_6;
   int nxlo_out_6,nylo_out_6,nzlo_out_6,nxhi_out_6,nyhi_out_6,nzhi_out_6;
   int nxlo_fft_6,nylo_fft_6,nzlo_fft_6,nxhi_fft_6,nyhi_fft_6,nzhi_fft_6;
   int nlower_6,nupper_6;
   int ngrid_6,nfft_6,nfft_both_6;
 
   //// variables needed for splitting the fourier transformed
   int com_max, com_procs;
   FFT_SCALAR **splitbuf1, **splitbuf2;
   int **dict_send, **dict_rec;
   int *com_each, *com_order;
   FFT_SCALAR *split_1, *split_2;
 
-
   //// the following variables are needed for every structure factor
   FFT_SCALAR ***density_brick;
   FFT_SCALAR ***vdx_brick,***vdy_brick,***vdz_brick;
   FFT_SCALAR *density_fft;
   FFT_SCALAR ***u_brick;
   FFT_SCALAR ***v0_brick,***v1_brick,***v2_brick,***v3_brick,***v4_brick,***v5_brick;
 
   FFT_SCALAR ***density_brick_g;
   FFT_SCALAR ***vdx_brick_g,***vdy_brick_g,***vdz_brick_g;
   FFT_SCALAR *density_fft_g;
   FFT_SCALAR ***u_brick_g;
   FFT_SCALAR ***v0_brick_g,***v1_brick_g,***v2_brick_g,***v3_brick_g,***v4_brick_g,***v5_brick_g;
 
   FFT_SCALAR ***density_brick_a0;
   FFT_SCALAR ***vdx_brick_a0,***vdy_brick_a0,***vdz_brick_a0;
   FFT_SCALAR *density_fft_a0;
   FFT_SCALAR ***u_brick_a0;
   FFT_SCALAR ***v0_brick_a0,***v1_brick_a0,***v2_brick_a0,***v3_brick_a0,***v4_brick_a0,***v5_brick_a0;
 
   FFT_SCALAR ***density_brick_a1;
   FFT_SCALAR ***vdx_brick_a1,***vdy_brick_a1,***vdz_brick_a1;
   FFT_SCALAR *density_fft_a1;
   FFT_SCALAR ***u_brick_a1;
   FFT_SCALAR ***v0_brick_a1,***v1_brick_a1,***v2_brick_a1,***v3_brick_a1,***v4_brick_a1,***v5_brick_a1;
 
   FFT_SCALAR ***density_brick_a2;
   FFT_SCALAR ***vdx_brick_a2,***vdy_brick_a2,***vdz_brick_a2;
   FFT_SCALAR *density_fft_a2;
   FFT_SCALAR ***u_brick_a2;
   FFT_SCALAR ***v0_brick_a2,***v1_brick_a2,***v2_brick_a2,***v3_brick_a2,***v4_brick_a2,***v5_brick_a2;
 
   FFT_SCALAR ***density_brick_a3;
   FFT_SCALAR ***vdx_brick_a3,***vdy_brick_a3,***vdz_brick_a3;
   FFT_SCALAR *density_fft_a3;
   FFT_SCALAR ***u_brick_a3;
   FFT_SCALAR ***v0_brick_a3,***v1_brick_a3,***v2_brick_a3,***v3_brick_a3,***v4_brick_a3,***v5_brick_a3;
 
   FFT_SCALAR ***density_brick_a4;
   FFT_SCALAR ***vdx_brick_a4,***vdy_brick_a4,***vdz_brick_a4;
   FFT_SCALAR *density_fft_a4;
   FFT_SCALAR ***u_brick_a4;
   FFT_SCALAR ***v0_brick_a4,***v1_brick_a4,***v2_brick_a4,***v3_brick_a4,***v4_brick_a4,***v5_brick_a4;
 
   FFT_SCALAR ***density_brick_a5;
   FFT_SCALAR ***vdx_brick_a5,***vdy_brick_a5,***vdz_brick_a5;
   FFT_SCALAR *density_fft_a5;
   FFT_SCALAR ***u_brick_a5;
   FFT_SCALAR ***v0_brick_a5,***v1_brick_a5,***v2_brick_a5,***v3_brick_a5,***v4_brick_a5,***v5_brick_a5;
 
   FFT_SCALAR ***density_brick_a6;
   FFT_SCALAR ***vdx_brick_a6,***vdy_brick_a6,***vdz_brick_a6;
   FFT_SCALAR *density_fft_a6;
   FFT_SCALAR ***u_brick_a6;
   FFT_SCALAR ***v0_brick_a6,***v1_brick_a6,***v2_brick_a6,***v3_brick_a6,***v4_brick_a6,***v5_brick_a6;
 
   //// needed for each interaction type
   double *greensfn;
   double **vg;
   double **vg2;
 
   double *greensfn_6;
   double **vg_6;
   double **vg2_6;
 
   double *fkx,*fky,*fkz;
   double *fkx2, *fky2, *fkz2;
   double *fkx_6, *fky_6, *fkz_6;
   double *fkx2_6, *fky2_6, *fkz2_6;
   double *gf_b;
   double *gf_b_6;
 
   double *sf_precoeff1, *sf_precoeff2, *sf_precoeff3, *sf_precoeff4, 
     *sf_precoeff5, *sf_precoeff6;
   double *sf_precoeff1_6, *sf_precoeff2_6, *sf_precoeff3_6, 
     *sf_precoeff4_6, *sf_precoeff5_6, *sf_precoeff6_6;
   FFT_SCALAR **rho1d,**rho_coeff;
   FFT_SCALAR **drho1d, **drho_coeff;
   FFT_SCALAR **rho1d_6, **rho_coeff_6;
   FFT_SCALAR **drho1d_6, **drho_coeff_6;
   FFT_SCALAR *work1,*work2;
   FFT_SCALAR *work1_6, *work2_6;
 
-
   class FFT3d *fft1,*fft2 ;
   class FFT3d *fft1_6, *fft2_6;
   class Remap *remap;
   class Remap *remap_6;
   class CommGrid *cg;
   class CommGrid *cg_peratom;
   class CommGrid *cg_6;
   class CommGrid *cg_peratom_6;
 
   int **part2grid;             // storage for particle -> grid mapping
   int **part2grid_6;
   int nmax;
 
   int triclinic;               // domain settings, orthog or triclinic
   double *boxlo;
                                // TIP4P settings
   int typeH,typeO;             // atom types of TIP4P water H and O atoms
   double qdist;                // distance from O site to negative charge
   double alpha;                // geometric factor
 
   void init_coeffs();	
 
   void set_grid();
   void set_grid_6();
   void set_init_g6();
   void set_fft_parameters(int&, int&, int&, int&, int&,int&,
                           int&, int&,int&, int&, int&,int&,
                           int&, int&,int&, int&, int&,int&,
                           int&, int&,int&, int&, int&,
 			  int&, int&, int&,
 		          double&, double&, int&);
   void set_n_pppm_6();
   void adjust_gewald();
   void adjust_gewald_6();
   double f();
   double derivf();
   double f_6();
   double derivf_6();
   double final_accuracy();
   double final_accuracy_6();
   double lj_rspace_error();
   double compute_qopt();
   double compute_qopt_6();
   double compute_qopt_ik();
   double compute_qopt_ad();
   double compute_qopt_6_ik();
   double compute_qopt_6_ad();
 
   void calc_csum();
   void prepare_splitting();
 
   virtual void allocate();
   virtual void allocate_peratom();
   virtual void deallocate();
   virtual void deallocate_peratom();
   int factorable(int);
   double rms(double, double, bigint, double, double **);
   double diffpr(double, double, double, double, double **);
   void compute_gf_denom(double*, int);
   double gf_denom(double, double, double, double*, int);
   
 
   void compute_sf_precoeff(int, int, int, int, 
                            int, int, int,
                            int, int, int,
                            double*, double*, double*,
                            double*, double*, double*);
   void compute_gf();
   void compute_sf_coeff();
   void compute_gf_6();
   void compute_sf_coeff_6();
 
 
   virtual void particle_map(double, double, double,
                              double, int **, int, int,
                              int, int, int,
                              int, int, int);
   virtual void particle_map_c(double, double, double,
 			      double, int **, int, int,
                               int, int, int,
                               int, int, int );
   virtual void make_rho_c();
   virtual void make_rho_g();
   virtual void make_rho_a();
 
   virtual void brick2fft(int, int, int, int, int, int,
 			 FFT_SCALAR ***, FFT_SCALAR *, FFT_SCALAR *,
                          LAMMPS_NS::Remap *);
   virtual void brick2fft_a();
 
   virtual void poisson_ik(FFT_SCALAR *, FFT_SCALAR *,
 		          FFT_SCALAR *, LAMMPS_NS::FFT3d *,LAMMPS_NS::FFT3d *, 
                           int, int, int, int, int, int, int,
 		          int, int, int, int, int, int,
                           int, int, int, double&, double *,
                           double *, double *, double *,
                           double *, double *, double *,
 		          FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***, double *, double **, double **,
                           FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
                           FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***);
 
   virtual void poisson_ad(FFT_SCALAR*, FFT_SCALAR*,
                           FFT_SCALAR*, LAMMPS_NS::FFT3d*,LAMMPS_NS::FFT3d*, 
                           int, int, int, int,
                           int, int, int, int, int, int,
                           int, int, int, int, int, int,
                           double&, double*,
                           double*, double**, double**,
                           FFT_SCALAR***, FFT_SCALAR***, FFT_SCALAR***, FFT_SCALAR***,
                           FFT_SCALAR***, FFT_SCALAR***, FFT_SCALAR***);
 
   virtual void poisson_peratom(FFT_SCALAR*, FFT_SCALAR*, LAMMPS_NS::FFT3d*, 
                                double**, double**, int,
                                int, int, int, int, int, int,
                                FFT_SCALAR***, FFT_SCALAR***, FFT_SCALAR***,
                                FFT_SCALAR***, FFT_SCALAR***, FFT_SCALAR***);
   virtual void poisson_2s_ik(FFT_SCALAR *, FFT_SCALAR *,
                              FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
                              FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
                              FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
 			     FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
                              FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
 			     FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***);
   virtual void poisson_2s_ad(FFT_SCALAR *, FFT_SCALAR *,
                              FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
 			     FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
                              FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
 			     FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***);
 
   virtual void poisson_2s_peratom(FFT_SCALAR***, FFT_SCALAR***, FFT_SCALAR***,
 				  FFT_SCALAR***, FFT_SCALAR***, FFT_SCALAR***,
                                   FFT_SCALAR***, FFT_SCALAR***, FFT_SCALAR***,
                                   FFT_SCALAR***, FFT_SCALAR***, FFT_SCALAR***);
 
 
   virtual void fieldforce_c_ik();
   virtual void fieldforce_c_ad();
   virtual void fieldforce_c_peratom();
   virtual void fieldforce_g_ik();
   virtual void fieldforce_g_ad();
   virtual void fieldforce_g_peratom();
   virtual void fieldforce_a_ik();
   virtual void fieldforce_a_ad();
   virtual void fieldforce_a_peratom();
   void procs2grid2d(int,int,int,int *, int*);
   void compute_rho1d(const FFT_SCALAR &, const FFT_SCALAR &, 
 		     const FFT_SCALAR &, int, FFT_SCALAR **, FFT_SCALAR **);
   void compute_drho1d(const FFT_SCALAR &, const FFT_SCALAR &, 
 		      const FFT_SCALAR &, int, FFT_SCALAR **, FFT_SCALAR **);
   void compute_rho_coeff(FFT_SCALAR **,FFT_SCALAR **, int);
   void slabcorr(int);
   void split_fourier();
   void split_order(int **);
 
   // grid communication
 
   void pack_forward(int, FFT_SCALAR *, int, int *);
   void unpack_forward(int, FFT_SCALAR *, int, int *);
   void pack_reverse(int, FFT_SCALAR *, int, int *);
   void unpack_reverse(int, FFT_SCALAR *, int, int *);
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Cannot (yet) use PPPMDisp with triclinic box
 
-UNDOCUMENTED
+This feature is not yet supported.
 
 E: Cannot use PPPMDisp with 2d simulation
 
-UNDOCUMENTED
+The kspace style pppm/disp cannot be used in 2d simulations.  You can
+use 2d PPPM in a 3d simulation; see the kspace_modify command.
 
 E: Cannot use nonperiodic boundaries with PPPMDisp
 
-UNDOCUMENTED
+For kspace style pppm/disp, all 3 dimensions must have periodic
+boundaries unless you use the kspace_modify command to define a 2d
+slab with a non-periodic z dimension.
 
 E: Incorrect boundaries with slab PPPMDisp
 
-UNDOCUMENTED
+Must have periodic x,y dimensions and non-periodic z dimension to use
+2d slab option with PPPM.
 
 E: PPPMDisp coulomb order cannot be greater than %d
 
-UNDOCUMENTED
+This is a limitation of the PPPM implementation in LAMMPS.
 
 E: KSpace style is incompatible with Pair style
 
 Setting a kspace style requires that a pair style with a long-range
 Coulombic and Dispersion component be selected.
 
 E: Unsupported mixing rule in kspace_style pppm/disp for pair_style %s
 
-UNDOCUMENTED
+Only geometric mixing is supported.
 
 E: Unsupported order in kspace_style pppm/disp pair_style %s
 
-UNDOCUMENTED
+Only 1/r^6 dispersion terms are supported.
 
 W: Charges are set, but coulombic solver is not used
 
-UNDOCUMENTED
+The atom style supports charge, but this KSpace style does not include
+long-range Coulombics.
 
 E: Kspace style with selected options requires atom attribute q
 
 The atom style defined does not have these attributes.
 Change the atom style or switch of the coulomb solver.
 
 E: Cannot use kspace solver with selected options on system with no charge
 
 No atoms in system have a non-zero charge. Change charges or change 
 options of the kspace solver/pair style.
 
 W: System is not charge neutral, net charge = %g
 
 The total charge on all atoms on the system is not 0.0, which
 is not valid for Ewald or PPPM coulombic solvers.
 
 E: Bond and angle potentials must be defined for TIP4P
 
 Cannot use TIP4P pair potential unless bond and angle potentials
 are defined.
 
 E: Bad TIP4P angle type for PPPMDisp/TIP4P
 
-UNDOCUMENTED
+Specified angle type is not valid.
 
 E: Bad TIP4P bond type for PPPMDisp/TIP4P
 
-UNDOCUMENTED
+Specified bond type is not valid.
 
 W: Reducing PPPMDisp Coulomb order b/c stencil extends beyond neighbor processor.
 
-UNDOCUMENTED
+This may lead to a larger grid than desired.  See the kspace_modify overlap
+command to prevent changing of the PPPM order.
 
 E: PPPMDisp Coulomb grid is too large
 
-UNDOCUMENTED
+The global PPPM grid is larger than OFFSET in one or more dimensions.
+OFFSET is currently set to 4096.  You likely need to decrease the
+requested accuracy.
 
-E: Coulomb PPPMDisp order has been reduced below minorder
+E: Coulomb PPPMDisp order < minimum allowed order
 
-UNDOCUMENTED
+The default minimum order is 2.  This can be reset by the
+kspace_modify minorder command.
 
 W: Reducing PPPMDisp Dispersion order b/c stencil extends beyond neighbor processor
 
-UNDOCUMENTED
+This may lead to a larger grid than desired.  See the kspace_modify overlap
+command to prevent changing of the PPPM order.
 
 E: PPPMDisp Dispersion grid is too large
 
-UNDOCUMENTED
+The global dispersion grid is larger than OFFSET in one or more
+dimensions.  OFFSET is currently set to 4096.  You likely need to
+decrease the requested accuracy.
 
 E: Dispersion PPPMDisp order has been reduced below minorder
 
-UNDOCUMENTED
+This may lead to a larger grid than desired.  See the kspace_modify overlap
+command to prevent changing of the dipsersion order.
 
 E: PPPM grid stencil extends beyond nearest neighbor processor
 
-UNDOCUMENTED
+This is not allowed if the kspace_modify overlap setting is no.
 
-E: epsilon or sigma reference not set by pair style in PPPMDisp
+E: Epsilon or sigma reference not set by pair style in PPPMDisp
 
-UNDOCUMENTED
+The pair style is not providing the needed epsilon or sigma values.
 
 E: KSpace accuracy too large to estimate G vector
 
-UNDOCUMENTED
+Reduce the accuracy request or specify gwald explicitly
+via the kspace_modify command.
 
-E: Could not compute grid size for Coulomb interaction!
+E: Could not compute grid size for Coulomb interaction
 
-UNDOCUMENTED
+The code is unable to compute a grid size consistent with the desired
+accuracy.  This error should not occur for typical problems.  Please
+send an email to the developers.
 
 E: Could not compute g_ewald
 
-UNDOCUMENTED
+The Newton-Raphson solver failed to converge to a good value for
+g_ewald.  This error should not occur for typical problems.  Please
+send an email to the developers.
 
 E: Could not adjust g_ewald_6
 
-UNDOCUMENTED
+The Newton-Raphson solver failed to converge to a good value for
+g_ewald_6.  This error should not occur for typical problems.  Please
+send an email to the developers.
 
 E: Cannot compute initial g_ewald_disp
 
 LAMMPS failed to compute an initial guess for the PPPM_disp g_ewald_6
 factor that partitions the computation between real space and k-space
 for Disptersion interactions.
 
-E: Could not compute grid size for Dispersion!
+E: Could not compute grid size for dispersion
 
-UNDOCUMENTED
+The code is unable to compute a grid size consistent with the desired
+accuracy.  This error should not occur for typical problems.  Please
+send an email to the developers.
 
 E: Out of range atoms - cannot compute PPPMDisp
 
-UNDOCUMENTED
-
-U: Cannot (yet) use PPPM_disp with triclinic box
-
-This feature is not yet supported.
-
-U: Cannot use PPPM_disp with 2d simulation
-
-The kspace style pppm_disp cannot be used in 2d simulations.  You can use
-2d PPPM_disp in a 3d simulation; see the kspace_modify command.
-
-U: Cannot use nonperiodic boundaries with PPPM_disp
-
-For kspace style pppm_disp, all 3 dimensions must have periodic boundaries
-unless you use the kspace_modify command to define a 2d slab with a
-non-periodic z dimension.
-
-U: Incorrect boundaries with slab PPPM_disp
-
-Must have periodic x,y dimensions and non-periodic z dimension to use
-2d slab option with PPPM_disp.
-
-U: PPPM_disp coulomb order cannot be greater than %d
-
-Self-explanatory.
-
-U: PPPM_disp dispersion order cannot be greater than %d
-
-Self-explanatory.
-
-U: Unsupported mixing rule in kspace_style pppm_disp for pair_style %s
-
-PPPM_disp requires arithemtic or geometric mixing rules.
-
-U: Unsupported order in kspace_style pppm_disp pair_style %s
-
-PPPM_disp only works for 1/r and 1/r^6 potentials
-
-U: Charges are set, but coulombic long-range solver is not used.
-
-Charges have been specified, however, calculations are performed
-as if they were zero.
-
-U: Bad TIP4P angle type for PPPM_disp/TIP4P
-
-Specified angle type is not valid.
-
-U: Bad TIP4P bond type for PPPM_disp/TIP4P
-
-Specified bond type is not valid.
-
-U: Reducing PPPM_disp Coulomb order b/c stencil extends beyond neighbor processor
-
-LAMMPS is attempting this in order to allow the simulation
-to run.  It should not effect the PPPM_disp accuracy.
-
-U: Reducing PPPM_disp Dispersion order b/c stencil extends beyond neighbor processor
-
-LAMMPS is attempting this in order to allow the simulation
-to run.  It should not effect the PPPM_disp accuracy.
-
-U: PPPM_disp Coulomb grid is too large
-
-The global PPPM_disp grid for Coulomb interactions is larger than OFFSET in one or more dimensions.
-OFFSET is currently set to 16384.  You likely need to decrease the
-requested precision.
-
-U: PPPM_grid Dispersion grid is too large
-
-One of the PPPM_disp grids for Dispersion interactions is larger than OFFSET in one or more dimensions.
-OFFSET is currently set to 16384.  You likely need to decrease the
-requested precision.
-
-U: Coulomb PPPM_disp order has been reduced to 0
-
-LAMMPS has attempted to reduce the PPPM_disp coulomb order to enable the simulation
-to run, but can reduce the order no further.  Try increasing the
-accuracy of PPPM_disp coulomb by reducing the tolerance size, thus inducing a 
-larger PPPM_disp coulomb grid.
-
-U: Dispersion PPPM_disp order has been reduced to 0
-
-LAMMPS has attempted to reduce the PPPM_disp dispersion order to enable the simulation
-to run, but can reduce the order no further.  Try increasing the
-accuracy of PPPM_disp dispersion by reducing the tolerance size, thus inducing a 
-larger PPPM_disp dispersion grid.
-
-U: Cannot compute PPPM_disp g_ewald
-
-LAMMPS failed to compute a valid approximation for the PPPM_disp g_ewald
-factor that partitions the computation between real space and k-space
-for Coulomb interactions.
-
-U: Cannot compute final g_ewald_disp
-
-LAMMPS failed to compute a final value for the PPPM_disp g_ewald_6
-factor that partitions the computation between real space and k-space
-for Disptersion interactions.
-
-U: Out of range atoms - cannot compute PPPM_disp
-
-One or more atoms are attempting to map their charge to a PPPM_disp grid
+One or more atoms are attempting to map their charge to a PPPM grid
 point that is not owned by a processor.  This is likely for one of two
 reasons, both of them bad.  First, it may mean that an atom near the
 boundary of a processor's sub-domain has moved more than 1/2 the
 "neighbor skin distance"_neighbor.html without neighbor lists being
 rebuilt and atoms being migrated to new processors.  This also means
 you may be missing pairwise interactions that need to be computed.
 The solution is to change the re-neighboring criteria via the
 "neigh_modify"_neigh_modify command.  The safest settings are "delay 0
 every 1 check yes".  Second, it may mean that an atom has moved far
 outside a processor's sub-domain or even the entire simulation box.
 This indicates bad physics, e.g. due to highly overlapping atoms, too
 large a timestep, etc.
 
 */
diff --git a/src/KSPACE/pppm_disp_tip4p.h b/src/KSPACE/pppm_disp_tip4p.h
index 9acdfb002..d08b82b0e 100755
--- a/src/KSPACE/pppm_disp_tip4p.h
+++ b/src/KSPACE/pppm_disp_tip4p.h
@@ -1,87 +1,83 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef KSPACE_CLASS
 
 KSpaceStyle(pppm/disp/tip4p,PPPMDispTIP4P)
 
 #else
 
 #ifndef LMP_PPPM_DISP_TIP4P_H
 #define LMP_PPPM_DISP_TIP4P_H
 
 #include "pppm_disp.h"
 
 namespace LAMMPS_NS {
 
 class PPPMDispTIP4P : public PPPMDisp {
  public:
   PPPMDispTIP4P(class LAMMPS *, int, char **);
   virtual ~PPPMDispTIP4P () {};
   void init();
 
  protected:
   virtual void particle_map_c(double, double, double,
                               double, int **, int, int,
                               int, int, int, int, int, int);
   virtual void make_rho_c();
   virtual void fieldforce_c_ik();
   virtual void fieldforce_c_ad();
   virtual void fieldforce_c_peratom();
 
  private:
   void find_M(int, int &, int &, double *); 
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Kspace style pppm/disp/tip4p requires newton on
 
-UNDOCUMENTED
+Self-explanatory.
 
 E: Out of range atoms - cannot compute PPPM
 
 One or more atoms are attempting to map their charge to a PPPM grid
 point that is not owned by a processor.  This is likely for one of two
 reasons, both of them bad.  First, it may mean that an atom near the
 boundary of a processor's sub-domain has moved more than 1/2 the
 "neighbor skin distance"_neighbor.html without neighbor lists being
 rebuilt and atoms being migrated to new processors.  This also means
 you may be missing pairwise interactions that need to be computed.
 The solution is to change the re-neighboring criteria via the
 "neigh_modify"_neigh_modify command.  The safest settings are "delay 0
 every 1 check yes".  Second, it may mean that an atom has moved far
 outside a processor's sub-domain or even the entire simulation box.
 This indicates bad physics, e.g. due to highly overlapping atoms, too
 large a timestep, etc.
 
 E: TIP4P hydrogen is missing
 
 The TIP4P pairwise computation failed to find the correct H atom
 within a water molecule.
 
 E: TIP4P hydrogen has incorrect atom type
 
 The TIP4P pairwise computation found an H atom whose type does not
 agree with the specified H type.
 
-U: Kspace style pppm/tip4p requires newton on
-
-UNDOCUMENTED
-
 */
diff --git a/src/MC/fix_gcmc.cpp b/src/MC/fix_gcmc.cpp
index 31d7278f8..c6184b79f 100644
--- a/src/MC/fix_gcmc.cpp
+++ b/src/MC/fix_gcmc.cpp
@@ -1,1301 +1,1302 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Paul Crozier (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdlib.h"
 #include "string.h"
 #include "fix_gcmc.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "atom_vec_hybrid.h"
 #include "update.h"
 #include "modify.h"
 #include "fix.h"
 #include "comm.h"
 #include "group.h"
 #include "domain.h"
 #include "region.h"
 #include "random_park.h"
 #include "force.h"
 #include "pair.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 #include <iostream>
 
 using namespace std;
 using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;
 
 /* ---------------------------------------------------------------------- */
 
 FixGCMC::FixGCMC(LAMMPS *lmp, int narg, char **arg) :
   Fix(lmp, narg, arg)
 {
-  if (narg < 11) error->all(FLERR,"Illegal fix GCMC command");
+  if (narg < 11) error->all(FLERR,"Illegal fix gcmc command");
 
   vector_flag = 1;
   size_vector = 8;
   global_freq = 1;
   extvector = 0;
   restart_global = 1;
   time_depend = 1;
 
   // required args
 
   nevery = atoi(arg[3]);
   nexchanges = atoi(arg[4]);
   nmcmoves = atoi(arg[5]);
   ngcmc_type = atoi(arg[6]);
   seed = atoi(arg[7]);
   reservoir_temperature = atof(arg[8]);
   chemical_potential = atof(arg[9]);
   displace = atof(arg[10]);
 
-  if (nexchanges < 0) error->all(FLERR,"Illegal fix GCMC command");
-  if (nmcmoves < 0) error->all(FLERR,"Illegal fix GCMC command");
-  if (seed <= 0) error->all(FLERR,"Illegal fix GCMC command");
+  if (nexchanges < 0) error->all(FLERR,"Illegal fix gcmc command");
+  if (nmcmoves < 0) error->all(FLERR,"Illegal fix gcmc command");
+  if (seed <= 0) error->all(FLERR,"Illegal fix gcmc command");
   if (reservoir_temperature < 0.0)
-    error->all(FLERR,"Illegal fix GCMC command");
-  if (displace < 0.0) error->all(FLERR,"Illegal fix GCMC command");
+    error->all(FLERR,"Illegal fix gcmc command");
+  if (displace < 0.0) error->all(FLERR,"Illegal fix gcmc command");
 
   // set defaults
 
   molflag = 0;
   max_rotation_angle = 10*MY_PI/180;
   regionflag = 0; 
   iregion = -1; 
   region_volume = 0;
   max_region_attempts = 1000; 
   rotation_group = 0;
   rotation_groupbit = 0;
   rotation_inversegroupbit = 0;
 
   // read options from end of input line
 
   options(narg-11,&arg[11]);
 
   // random number generator, same for all procs
 
   random_equal = new RanPark(lmp,seed);
 
   // random number generator, not the same for all procs
 
   random_unequal = new RanPark(lmp,seed);
   
   // error checks on region and its extent being inside simulation box
 
   region_xlo = region_xhi = region_ylo = region_yhi = region_zlo = region_zhi = 0.0;
   if (regionflag) {
     if (domain->regions[iregion]->bboxflag == 0)
-      error->all(FLERR,"Fix GCMC region does not support a bounding box");
+      error->all(FLERR,"Fix gcmc region does not support a bounding box");
     if (domain->regions[iregion]->dynamic_check())
-      error->all(FLERR,"Fix GCMC region cannot be dynamic");
+      error->all(FLERR,"Fix gcmc region cannot be dynamic");
     
     region_xlo = domain->regions[iregion]->extent_xlo;
     region_xhi = domain->regions[iregion]->extent_xhi;
     region_ylo = domain->regions[iregion]->extent_ylo;
     region_yhi = domain->regions[iregion]->extent_yhi;
     region_zlo = domain->regions[iregion]->extent_zlo;
     region_zhi = domain->regions[iregion]->extent_zhi;
 
     if (region_xlo < domain->boxlo[0] || region_xhi > domain->boxhi[0] ||
         region_ylo < domain->boxlo[1] || region_yhi > domain->boxhi[1] ||
         region_zlo < domain->boxlo[2] || region_zhi > domain->boxhi[2])
-      error->all(FLERR,"Fix GCMC region extends outside simulation box");
+      error->all(FLERR,"Fix gcmc region extends outside simulation box");
 
     // estimate region volume using MC trials
       
     double coord[3];
     int inside = 0;
     int attempts = 10000000;
     for (int i = 0; i < attempts; i++) {
       coord[0] = region_xlo + random_equal->uniform() * (region_xhi-region_xlo);
       coord[1] = region_ylo + random_equal->uniform() * (region_yhi-region_ylo);
       coord[2] = region_zlo + random_equal->uniform() * (region_zhi-region_zlo);
       if (domain->regions[iregion]->match(coord[0],coord[1],coord[2]) != 0) inside++;
     }
 
     double max_region_volume = (region_xhi - region_xlo)*
      (region_yhi - region_ylo)*(region_zhi - region_zlo);
 
     region_volume = max_region_volume*static_cast<double> (inside)/
      static_cast<double> (attempts);
   }
 
   // compute the number of MC cycles that occur nevery timesteps
 
   ncycles = nexchanges + nmcmoves;
 
   // set up reneighboring
 
   force_reneighbor = 1;
   next_reneighbor = update->ntimestep + 1;
 
   // zero out counters
 
   ntranslation_attempts = 0.0;
   ntranslation_successes = 0.0;
   nrotation_attempts = 0.0;
   nrotation_successes = 0.0;
   ndeletion_attempts = 0.0;
   ndeletion_successes = 0.0;
   ninsertion_attempts = 0.0;
   ninsertion_successes = 0.0;
 
   gcmc_nmax = 0;
   local_gas_list = NULL;
 
   model_atom = NULL;
 }
 
 /* ----------------------------------------------------------------------
    parse optional parameters at end of input line
 ------------------------------------------------------------------------- */
 
 void FixGCMC::options(int narg, char **arg)
 {
-  if (narg < 0) error->all(FLERR,"Illegal fix GCMC command");
+  if (narg < 0) error->all(FLERR,"Illegal fix gcmc command");
 
   int iarg = 0;
   while (iarg < narg) {
   if (strcmp(arg[iarg],"molecule") == 0) {
-      if (iarg+2 > narg) error->all(FLERR,"Illegal fix GCMC command");
+      if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command");
       if (strcmp(arg[iarg+1],"no") == 0) molflag = 0;
       else if (strcmp(arg[iarg+1],"yes") == 0) molflag = 1;
-      else error->all(FLERR,"Illegal fix GCMC command");
+      else error->all(FLERR,"Illegal fix gcmc command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"region") == 0) {
-      if (iarg+2 > narg) error->all(FLERR,"Illegal fix GCMC command");
+      if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command");
       iregion = domain->find_region(arg[iarg+1]);
       if (iregion == -1)
-        error->all(FLERR,"Region ID for fix GCMC does not exist");
+        error->all(FLERR,"Region ID for fix gcmc does not exist");
       int n = strlen(arg[iarg+1]) + 1;
       idregion = new char[n];
       strcpy(idregion,arg[iarg+1]);
       regionflag = 1;
       iarg += 2;
     } else if (strcmp(arg[iarg],"maxangle") == 0) {
-      if (iarg+2 > narg) error->all(FLERR,"Illegal fix GCMC command");
+      if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command");
       max_rotation_angle = atof(arg[iarg+1]);
       max_rotation_angle *= MY_PI/180;
       iarg += 2;
-    } else error->all(FLERR,"Illegal fix GCMC command");
+    } else error->all(FLERR,"Illegal fix gcmc command");
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 FixGCMC::~FixGCMC()
 {
   delete random_equal;
   delete random_unequal;
   memory->destroy(local_gas_list);
   memory->destroy(atom_coord);
   memory->destroy(model_atom_buf);
   delete model_atom;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixGCMC::setmask()
 {
   int mask = 0;
   mask |= PRE_EXCHANGE;
   return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixGCMC::init()
 {
   int *type = atom->type;
 
   if (molflag == 0) {
     if (ngcmc_type <= 0 || ngcmc_type > atom->ntypes)
-      error->all(FLERR,"Invalid atom type in fix GCMC command");
+      error->all(FLERR,"Invalid atom type in fix gcmc command");
   }
 
   // if molflag not set, warn if any deletable atom has a mol ID
 
   if (molflag == 0 && atom->molecule_flag) {
     int *molecule = atom->molecule;
     int *mask = atom->mask;
     int flag = 0;
     for (int i = 0; i < atom->nlocal; i++)
       if (type[i] == ngcmc_type)
         if (molecule[i]) flag = 1;
     int flagall;
     MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world);
     if (flagall && comm->me == 0)
       error->all(FLERR,
-       "Fix GCMC cannot exchange individual atoms belonging to a molecule");
+       "Fix gcmc cannot exchange individual atoms belonging to a molecule");
   }
 
   // if molflag set, check for unset mol IDs
 
   if (molflag == 1) {
     int *molecule = atom->molecule;
     int *mask = atom->mask;
     int flag = 0;
     for (int i = 0; i < atom->nlocal; i++)
       if (mask[i] == groupbit)
         if (molecule[i] == 0) flag = 1;
     int flagall;
     MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world);
     if (flagall && comm->me == 0)
       error->all(FLERR,
-       "All mol IDs should be set for fix GCMC group atoms");
+       "All mol IDs should be set for fix gcmc group atoms");
   }
 
   if ((molflag && (atom->molecule_flag == 0)) || 
       (molflag && ((!atom->tag_enable) || (!atom->map_style))))
     error->all(FLERR,
-     "Fix GCMC molecule command requires that atoms have molecule attributes");
+     "Fix gcmc molecule command requires that atoms have molecule attributes");
 
   if (force->pair->single_enable == 0)
-    error->all(FLERR,"Fix GCMC incompatible with given pair_style");
+    error->all(FLERR,"Fix gcmc incompatible with given pair_style");
 
   if (domain->dimension == 2)
-    error->all(FLERR,"Cannot use fix GCMC in a 2d simulation");
+    error->all(FLERR,"Cannot use fix gcmc in a 2d simulation");
 
   if (domain->triclinic == 1)
-    error->all(FLERR,"Cannot use fix GCMC with a triclinic box");
+    error->all(FLERR,"Cannot use fix gcmc with a triclinic box");
 
   // create a new group for rotation molecules
 
   if (molflag) {
     char **group_arg = new char*[3];
     group_arg[0] = (char *) "rotation_gas_atoms";
     group_arg[1] = (char *) "molecule";
     char digits[12];
     sprintf(digits,"%d",ngcmc_type);
     group_arg[2] = digits;
     group->assign(3,group_arg);
     rotation_group = group->find(group_arg[0]);
-    if (rotation_group == -1) error->all(FLERR,"Could not find fix group ID");
+    if (rotation_group == -1) 
+      error->all(FLERR,"Could not find fix gcmc rotation group ID");
     rotation_groupbit = group->bitmask[rotation_group];
     rotation_inversegroupbit = rotation_groupbit ^ ~0;
     delete [] group_arg;
   }
     
   // get all of the needed molecule data if molflag, 
   // otherwise just get the gas mass
   
   if (molflag) get_model_molecule();
   else gas_mass = atom->mass[ngcmc_type];
   
   if (gas_mass <= 0.0)
-    error->all(FLERR,"Illegal fix GCMC gas mass <= 0");
+    error->all(FLERR,"Illegal fix gcmc gas mass <= 0");
   
   // check that no deletable atoms are in atom->firstgroup
   // deleting such an atom would not leave firstgroup atoms first
   
   if (atom->firstgroup >= 0) {
     int *mask = atom->mask;
     int firstgroupbit = group->bitmask[atom->firstgroup];
 
     int flag = 0;
     for (int i = 0; i < atom->nlocal; i++)
       if ((mask[i] == groupbit) && (mask[i] && firstgroupbit)) flag = 1;
     
     int flagall;
     MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world);
 
     if (flagall)
       error->all(FLERR,"Cannot do GCMC on atoms in atom_modify first group");
   }
   
   // compute beta, lambda, sigma, and the zz factor
 
   beta = 1.0/(force->boltz*reservoir_temperature);
   double lambda = sqrt(force->hplanck*force->hplanck/
                        (2.0*MY_PI*gas_mass*force->mvv2e*
                         force->boltz*reservoir_temperature));
   sigma = sqrt(force->boltz*reservoir_temperature/gas_mass/force->mvv2e);
   zz = exp(beta*chemical_potential)/(pow(lambda,3.0));
   
   imagetmp = ((tagint) IMGMAX << IMG2BITS) | 
              ((tagint) IMGMAX << IMGBITS) | IMGMAX;
 }
 
 /* ----------------------------------------------------------------------
    attempt Monte Carlo translations, rotations, insertions, and deletions
    done before exchange, borders, reneighbor
    so that ghost atoms and neighbor lists will be correct
 ------------------------------------------------------------------------- */
 
 void FixGCMC::pre_exchange()
 {
   // just return if should not be called on this timestep
 
   if (next_reneighbor != update->ntimestep) return;
 
   xlo = domain->boxlo[0];
   xhi = domain->boxhi[0];
   ylo = domain->boxlo[1];
   yhi = domain->boxhi[1];
   zlo = domain->boxlo[2];
   zhi = domain->boxhi[2];
   sublo = domain->sublo;
   subhi = domain->subhi;
 
   if (regionflag) volume = region_volume;
   else volume = domain->xprd * domain->yprd * domain->zprd;
 
   update_gas_atoms_list();
 
   if (molflag) {
     for (int i = 0; i < ncycles; i++) {
       int random_int_fraction =
         static_cast<int>(random_equal->uniform()*ncycles) + 1;
       if (random_int_fraction <= nmcmoves) {
         if (random_equal->uniform() < 0.5) attempt_molecule_translation();
         else attempt_molecule_rotation();
       } else {
         if (random_equal->uniform() < 0.5) attempt_molecule_deletion();
         else attempt_molecule_insertion();
       }
     }
   } else {
     for (int i = 0; i < ncycles; i++) {
       int random_int_fraction =
         static_cast<int>(random_equal->uniform()*ncycles) + 1;
       if (random_int_fraction <= nmcmoves) {
         attempt_atomic_translation();
       } else {
         if (random_equal->uniform() < 0.5) attempt_atomic_deletion();
         else attempt_atomic_insertion();
       }
     }
   }
 
   next_reneighbor = update->ntimestep + nevery;
 }
 
 /* ----------------------------------------------------------------------
 ------------------------------------------------------------------------- */
 
 void FixGCMC::attempt_atomic_translation()
 {
   ntranslation_attempts += 1.0;
   
   if (ngas == 0) return;
 
   int i;
   if (regionflag) i = pick_random_gas_atom_in_region();
   else i = pick_random_gas_atom();
   
   int success = 0;
   if (i >= 0) {
     double **x = atom->x;
     double energy_before = energy(i,ngcmc_type,-1,x[i]);
     double rsq = 1.1;
     double rx,ry,rz;
     rx = ry = rz = 0.0;
     while (rsq > 1.0) {
       rx = 2*random_unequal->uniform() - 1.0;
       ry = 2*random_unequal->uniform() - 1.0;
       rz = 2*random_unequal->uniform() - 1.0;
       rsq = rx*rx + ry*ry + rz*rz;
     }
     double coord[3];
     coord[0] = x[i][0] + displace*rx;
     coord[1] = x[i][1] + displace*ry;
     coord[2] = x[i][2] + displace*rz;
     double energy_after = energy(i,ngcmc_type,-1,coord);
     if (random_unequal->uniform() < exp(-beta*(energy_after - energy_before))) {
       x[i][0] = coord[0];
       x[i][1] = coord[1];
       x[i][2] = coord[2];
       success = 1;
     }
   }
 
   int success_all = 0;
   MPI_Allreduce(&success,&success_all,1,MPI_INT,MPI_MAX,world);
 
   if (success_all) {
     comm->borders();
     update_gas_atoms_list();
     ntranslation_successes += 1.0;
   }
 }
 
 /* ----------------------------------------------------------------------
 ------------------------------------------------------------------------- */
 
 void FixGCMC::attempt_atomic_deletion()
 {
   ndeletion_attempts += 1.0;
 
   if (ngas == 0) return;
   
   int i;
   if (regionflag) i = pick_random_gas_atom_in_region();
   else i = pick_random_gas_atom();
 
   int success = 0;
   if (i >= 0) {
     double deletion_energy = energy(i,ngcmc_type,-1,atom->x[i]);
     if (random_unequal->uniform() < ngas*exp(beta*deletion_energy)/(zz*volume)) {
       atom->avec->copy(atom->nlocal-1,i,1);
       atom->nlocal--;
       success = 1;
     }
   }
 
   int success_all = 0;
   MPI_Allreduce(&success,&success_all,1,MPI_INT,MPI_MAX,world);
 
   if (success_all) {
     if (atom->tag_enable) {
       atom->natoms--;
       if (atom->map_style) atom->map_init();
     }
     comm->borders();
     update_gas_atoms_list();
     ndeletion_successes += 1.0;
   }
 }
 
 /* ----------------------------------------------------------------------
 ------------------------------------------------------------------------- */
 
 void FixGCMC::attempt_atomic_insertion()
 {
   ninsertion_attempts += 1.0;
 
   double coord[3];
   if (regionflag) {
     int region_attempt = 0;
     coord[0] = region_xlo + random_equal->uniform() * (region_xhi-region_xlo);
     coord[1] = region_ylo + random_equal->uniform() * (region_yhi-region_ylo);
     coord[2] = region_zlo + random_equal->uniform() * (region_zhi-region_zlo);
     while (domain->regions[iregion]->match(coord[0],coord[1],coord[2]) == 0) {
       coord[0] = region_xlo + random_equal->uniform() * (region_xhi-region_xlo);
       coord[1] = region_ylo + random_equal->uniform() * (region_yhi-region_ylo);
       coord[2] = region_zlo + random_equal->uniform() * (region_zhi-region_zlo);
       region_attempt++;
       if (region_attempt >= max_region_attempts) return;
     }
   } else {
     coord[0] = xlo + random_equal->uniform() * (xhi-xlo);
     coord[1] = ylo + random_equal->uniform() * (yhi-ylo);
     coord[2] = zlo + random_equal->uniform() * (zhi-zlo);
   }
 
   int proc_flag = 0;
   if (coord[0] >= sublo[0] && coord[0] < subhi[0] &&
       coord[1] >= sublo[1] && coord[1] < subhi[1] &&
       coord[2] >= sublo[2] && coord[2] < subhi[2]) proc_flag = 1;
 
   int success = 0;
   if (proc_flag) {
     double insertion_energy = energy(-1,ngcmc_type,-1,coord);
     if (random_unequal->uniform() <
         zz*volume*exp(-beta*insertion_energy)/(ngas+1)) {
       atom->avec->create_atom(ngcmc_type,coord);
       int m = atom->nlocal - 1;
       atom->mask[m] = 1 | groupbit;
       atom->v[m][0] = random_unequal->gaussian()*sigma;
       atom->v[m][1] = random_unequal->gaussian()*sigma;
       atom->v[m][2] = random_unequal->gaussian()*sigma;
 
       int nfix = modify->nfix;
       Fix **fix = modify->fix;
       for (int j = 0; j < nfix; j++)
         if (fix[j]->create_attribute) fix[j]->set_arrays(m);
 
       success = 1;
     }
   }
 
   int success_all = 0;
   MPI_Allreduce(&success,&success_all,1,MPI_INT,MPI_MAX,world);
 
   if (success_all) {
     if (atom->tag_enable) {
       atom->natoms++;
       atom->tag_extend();
       if (atom->map_style) atom->map_init();
     }
     comm->borders();
     update_gas_atoms_list();
     ninsertion_successes += 1.0;
   }
 }
 
 /* ----------------------------------------------------------------------
 ------------------------------------------------------------------------- */
 
 void FixGCMC::attempt_molecule_translation()
 {
   ntranslation_attempts += 1.0;
 
   if (ngas == 0) return;
 
   int translation_molecule;
   if (regionflag) translation_molecule = pick_random_gas_molecule_in_region();
   else translation_molecule = pick_random_gas_molecule();
   if (translation_molecule == -1) return;
 
   double energy_before_sum = molecule_energy(translation_molecule);
   
   double **x = atom->x;
   double rx,ry,rz;
   double com_displace[3],coord[3];
   double rsq = 1.1;
   while (rsq > 1.0) {
     rx = 2*random_equal->uniform() - 1.0;
     ry = 2*random_equal->uniform() - 1.0;
     rz = 2*random_equal->uniform() - 1.0;
     rsq = rx*rx + ry*ry + rz*rz;
   }
   com_displace[0] = displace*rx;
   com_displace[1] = displace*ry;
   com_displace[2] = displace*rz;
 
   double energy_after = 0.0;
   for (int i = 0; i < atom->nlocal; i++) {
     if (atom->molecule[i] == translation_molecule) {
       coord[0] = x[i][0] + com_displace[0];
       coord[1] = x[i][1] + com_displace[1];
       coord[2] = x[i][2] + com_displace[2];
       energy_after += energy(i,atom->type[i],translation_molecule,coord);
     }
   }
 
   double energy_after_sum = 0.0;
   MPI_Allreduce(&energy_after,&energy_after_sum,1,MPI_DOUBLE,MPI_SUM,world);
 
   if (random_equal->uniform() < exp(-beta*(energy_after_sum - energy_before_sum))) {
     for (int i = 0; i < atom->nlocal; i++) {
       if (atom->molecule[i] == translation_molecule) {
         x[i][0] += com_displace[0];
         x[i][1] += com_displace[1];
         x[i][2] += com_displace[2];
       }
     }
     comm->borders();
     update_gas_atoms_list();
     ntranslation_successes += 1.0;
   }
 }
 
 /* ----------------------------------------------------------------------
 ------------------------------------------------------------------------- */
 
 void FixGCMC::attempt_molecule_rotation()
 {
   nrotation_attempts += 1.0;
 
   if (ngas == 0) return;
 
   int rotation_molecule;
   if (regionflag) rotation_molecule = pick_random_gas_molecule_in_region();
   else rotation_molecule = pick_random_gas_molecule();
   if (rotation_molecule == -1) return;
   
   double energy_before_sum = molecule_energy(rotation_molecule);
 
   int nlocal = atom->nlocal;
   int *mask = atom->mask;
   for (int i = 0; i < nlocal; i++) {
     if (atom->molecule[i] == rotation_molecule) {
       mask[i] |= rotation_groupbit;
     } else {
       mask[i] &= rotation_inversegroupbit;
     }
   }
 
   double com[3];
   com[0] = com[1] = com[2] = 0.0;
   group->xcm(rotation_group,gas_mass,com);
 
   double rot[9];
   get_rotation_matrix(max_rotation_angle,&rot[0]);
 
   double **x = atom->x;
   tagint *image = atom->image;
   double energy_after = 0.0;
   int n = 0;
   for (int i = 0; i < nlocal; i++) {
     if (mask[i] & rotation_groupbit) {
       double xtmp[3];
       domain->unmap(x[i],image[i],xtmp);
       xtmp[0] -= com[0];
       xtmp[1] -= com[1];
       xtmp[2] -= com[2];
       atom_coord[n][0] = rot[0]*xtmp[0] + rot[1]*xtmp[1] + rot[2]*xtmp[2] + com[0];
       atom_coord[n][1] = rot[3]*xtmp[0] + rot[4]*xtmp[1] + rot[5]*xtmp[2] + com[1];
       atom_coord[n][2] = rot[6]*xtmp[0] + rot[7]*xtmp[1] + rot[8]*xtmp[2] + com[2];
       xtmp[0] = atom_coord[n][0];
       xtmp[1] = atom_coord[n][1];
       xtmp[2] = atom_coord[n][2];
       domain->remap(xtmp);
       energy_after += energy(i,atom->type[i],rotation_molecule,xtmp);
       n++;
     }
   }
 
   double energy_after_sum = 0.0;
   MPI_Allreduce(&energy_after,&energy_after_sum,1,MPI_DOUBLE,MPI_SUM,world);
 
   if (random_equal->uniform() < exp(-beta*(energy_after_sum - energy_before_sum))) {
     int n = 0;
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & rotation_groupbit) {
         image[i] = imagetmp;
         x[i][0] = atom_coord[n][0];
         x[i][1] = atom_coord[n][1];
         x[i][2] = atom_coord[n][2];
         domain->remap(x[i],image[i]);
         n++;
       }
     }
     comm->borders();
     update_gas_atoms_list();
     nrotation_successes += 1.0;
   }
 }
 
 /* ----------------------------------------------------------------------
 ------------------------------------------------------------------------- */
 
 void FixGCMC::attempt_molecule_deletion()
 {
   ndeletion_attempts += 1.0;
 
   if (ngas == 0) return;
   
   int deletion_molecule;
   if (regionflag) deletion_molecule = pick_random_gas_molecule_in_region();
   else deletion_molecule = pick_random_gas_molecule();
   if (deletion_molecule == -1) return;
 
   double deletion_energy_sum = molecule_energy(deletion_molecule);
 
   if (random_equal->uniform() < ngas*exp(beta*deletion_energy_sum)/(zz*volume)) {
     int i = 0;
     while (i < atom->nlocal) {
       if (atom->molecule[i] == deletion_molecule) {
         atom->avec->copy(atom->nlocal-1,i,1);
         atom->nlocal--;
       } else i++;
     }
     atom->natoms -= natoms_per_molecule;
     atom->map_init();
     comm->borders();
     update_gas_atoms_list();
     ndeletion_successes += 1.0;
   }
 }
 
 /* ----------------------------------------------------------------------
 ------------------------------------------------------------------------- */
 
 void FixGCMC::attempt_molecule_insertion()
 {
   ninsertion_attempts += 1.0;
 
   double xprd = domain->xprd;
   double yprd = domain->yprd;
   double zprd = domain->zprd;
 
   double com_coord[3];
   if (regionflag) {
     int region_attempt = 0;
     com_coord[0] = region_xlo + random_equal->uniform() * (region_xhi-region_xlo);
     com_coord[1] = region_ylo + random_equal->uniform() * (region_yhi-region_ylo);
     com_coord[2] = region_zlo + random_equal->uniform() * (region_zhi-region_zlo);
     while (domain->regions[iregion]->match(com_coord[0],com_coord[1],com_coord[2]) == 0) {
       com_coord[0] = region_xlo + random_equal->uniform() * (region_xhi-region_xlo);
       com_coord[1] = region_ylo + random_equal->uniform() * (region_yhi-region_ylo);
       com_coord[2] = region_zlo + random_equal->uniform() * (region_zhi-region_zlo);
       region_attempt++;
       if (region_attempt >= max_region_attempts) return;
     }
   } else {
     com_coord[0] = xlo + random_equal->uniform() * (xhi-xlo);
     com_coord[1] = ylo + random_equal->uniform() * (yhi-ylo);
     com_coord[2] = zlo + random_equal->uniform() * (zhi-zlo);
   }
 
   double rot[9];
   get_rotation_matrix(MY_2PI,&rot[0]);
 
   double **model_x = model_atom->x;
   double insertion_energy = 0.0;
   bool procflag[natoms_per_molecule];
   for (int i = 0; i < natoms_per_molecule; i++) {
     atom_coord[i][0] = rot[0]*model_x[i][0] + rot[1]*model_x[i][1] + rot[2]*model_x[i][2] + com_coord[0];
     atom_coord[i][1] = rot[3]*model_x[i][0] + rot[4]*model_x[i][1] + rot[5]*model_x[i][2] + com_coord[1];
     atom_coord[i][2] = rot[6]*model_x[i][0] + rot[7]*model_x[i][1] + rot[8]*model_x[i][2] + com_coord[2];
 
     double xtmp[3];
     xtmp[0] = atom_coord[i][0];
     xtmp[1] = atom_coord[i][1];
     xtmp[2] = atom_coord[i][2];
     domain->remap(xtmp);
 
     procflag[i] = false;
     if (xtmp[0] >= sublo[0] && xtmp[0] < subhi[0] &&
         xtmp[1] >= sublo[1] && xtmp[1] < subhi[1] &&
         xtmp[2] >= sublo[2] && xtmp[2] < subhi[2]) {
       procflag[i] = true;
       insertion_energy += energy(-1,model_atom->type[i],-1,xtmp);
     }
   }
 
   double insertion_energy_sum = 0.0;
   MPI_Allreduce(&insertion_energy,&insertion_energy_sum,1,MPI_DOUBLE,MPI_SUM,world);
 
   if (random_equal->uniform() < zz*volume*exp(-beta*insertion_energy_sum)/(ngas+1)) {  
     maxmol++;
     if (maxmol >= MAXSMALLINT) 
-      error->all(FLERR,"Fix GCMC ran out of available molecule IDs");
+      error->all(FLERR,"Fix gcmc ran out of available molecule IDs");
 
     int maxtag = 0;
     for (int i = 0; i < atom->nlocal; i++) maxtag = MAX(maxtag,atom->tag[i]);
     int maxtag_all;
     MPI_Allreduce(&maxtag,&maxtag_all,1,MPI_INT,MPI_MAX,world);
     int atom_offset = maxtag_all;
 
     int k = 0;
     double **x = atom->x;
     double **v = atom->v;
     tagint *image = atom->image;
     int *molecule = atom->molecule;
     int *tag = atom->tag;
     for (int i = 0; i < natoms_per_molecule; i++) {
       k += atom->avec->unpack_restart(&model_atom_buf[k]);
       if (procflag[i]) {
         int m = atom->nlocal - 1;
         image[m] = imagetmp;
         x[m][0] = atom_coord[i][0];
         x[m][1] = atom_coord[i][1];
         x[m][2] = atom_coord[i][2];
         domain->remap(x[m],image[m]);
         atom->molecule[m] = maxmol;
         tag[m] += atom_offset;
         v[m][0] = random_unequal->gaussian()*sigma;
         v[m][1] = random_unequal->gaussian()*sigma;
         v[m][2] = random_unequal->gaussian()*sigma;
         
         if (atom->avec->bonds_allow)
           for (int j = 0; j < atom->num_bond[m]; j++)
             atom->bond_atom[m][j] += atom_offset;
         if (atom->avec->angles_allow)
           for (int j = 0; j < atom->num_angle[m]; j++) {
             atom->angle_atom1[m][j] += atom_offset;
             atom->angle_atom2[m][j] += atom_offset;
             atom->angle_atom3[m][j] += atom_offset;
           }
         if (atom->avec->dihedrals_allow)
           for (int j = 0; j < atom->num_dihedral[m]; j++) {
             atom->dihedral_atom1[m][j] += atom_offset;
             atom->dihedral_atom2[m][j] += atom_offset;
             atom->dihedral_atom3[m][j] += atom_offset;
             atom->dihedral_atom4[m][j] += atom_offset;
           }
         if (atom->avec->impropers_allow)
           for (int j = 0; j < atom->num_improper[m]; j++) {
             atom->improper_atom1[m][j] += atom_offset;
             atom->improper_atom2[m][j] += atom_offset;
             atom->improper_atom3[m][j] += atom_offset;
             atom->improper_atom4[m][j] += atom_offset;
           }
         
         int nfix = modify->nfix;
         Fix **fix = modify->fix;
         for (int j = 0; j < nfix; j++)
           if (fix[j]->create_attribute) fix[j]->set_arrays(m);
 
       } else atom->nlocal--;
     }
     atom->natoms += natoms_per_molecule;
     atom->map_init();
     comm->borders();
     update_gas_atoms_list();
     ninsertion_successes += 1.0;
   }
 }
 
 /* ----------------------------------------------------------------------
    compute particle's interaction energy with the rest of the system
 ------------------------------------------------------------------------- */
 
 double FixGCMC::energy(int i, int itype, int imolecule, double *coord)
 {
   double delx,dely,delz,rsq;
 
   double **x = atom->x;
   int *type = atom->type;
   int *molecule = atom->molecule;
   int nall = atom->nlocal + atom->nghost;
   pair = force->pair;
   cutsq = force->pair->cutsq;
 
   double fpair = 0.0;
   double factor_coul = 1.0;
   double factor_lj = 1.0;
 
   double total_energy = 0.0;
   for (int j = 0; j < nall; j++) {
 
     if (i == j) continue;
     if (molflag)
       if (imolecule == molecule[j]) continue;
 
     delx = coord[0] - x[j][0];
     dely = coord[1] - x[j][1];
     delz = coord[2] - x[j][2];
     rsq = delx*delx + dely*dely + delz*delz;
     int jtype = type[j];
 
     if (rsq < cutsq[itype][jtype])
       total_energy +=
         pair->single(i,j,itype,jtype,rsq,factor_coul,factor_lj,fpair);
   }
 
   return total_energy;
 }
 
 /* ----------------------------------------------------------------------
 ------------------------------------------------------------------------- */
 
 int FixGCMC::pick_random_gas_atom()
 {
   int i = -1;
   int iwhichglobal = static_cast<int> (ngas*random_equal->uniform());
   if ((iwhichglobal >= ngas_before) &&
       (iwhichglobal < ngas_before + ngas_local)) {
     int iwhichlocal = iwhichglobal - ngas_before;
     i = local_gas_list[iwhichlocal];
   }
 
   return i;
 }
 
 /* ----------------------------------------------------------------------
 ------------------------------------------------------------------------- */
 
 int FixGCMC::pick_random_gas_atom_in_region()
 {
   int i = -1;
   int i_own_candidate = 0;
   int i_own_candidate_all = 0;
   int region_attempt = 0;
   double **x = atom->x;
   while (!i_own_candidate_all) {
     int iwhichglobal = static_cast<int> (ngas*random_equal->uniform());
     if ((iwhichglobal >= ngas_before) &&
         (iwhichglobal < ngas_before + ngas_local)) {
       i_own_candidate = 1;
       int iwhichlocal = iwhichglobal - ngas_before;
       i = local_gas_list[iwhichlocal];
       if (domain->regions[iregion]->match(x[i][0],x[i][1],x[i][2]) == 0)
         i_own_candidate = 0;
     }
     MPI_Allreduce(&i_own_candidate,&i_own_candidate_all,1,MPI_INT,MPI_MAX,world);
     region_attempt++;
     if (region_attempt >= max_region_attempts) return -1;
   }
 
   return i;
 }
 
 /* ----------------------------------------------------------------------
 ------------------------------------------------------------------------- */
 
 int FixGCMC::pick_random_gas_molecule()
 {
   int iwhichglobal = static_cast<int> (ngas*random_equal->uniform());
   int gas_molecule_id = 0;
   if ((iwhichglobal >= ngas_before) &&
       (iwhichglobal < ngas_before + ngas_local)) {
     int iwhichlocal = iwhichglobal - ngas_before;
     int i = local_gas_list[iwhichlocal];
     gas_molecule_id = atom->molecule[i];
   }
 
   int gas_molecule_id_all = 0;
   MPI_Allreduce(&gas_molecule_id,&gas_molecule_id_all,1,MPI_INT,MPI_MAX,world);
   
   return gas_molecule_id_all;
 }
 
 /* ----------------------------------------------------------------------
 ------------------------------------------------------------------------- */
 
 int FixGCMC::pick_random_gas_molecule_in_region()
 {
   int region_attempt = 0;
   int gas_molecule_id = 0;
   int gas_molecule_id_all = 0;
   double **x = atom->x;
   while (!gas_molecule_id_all) {
     int iwhichglobal = static_cast<int> (ngas*random_equal->uniform());
     if ((iwhichglobal >= ngas_before) &&
         (iwhichglobal < ngas_before + ngas_local)) {
       int iwhichlocal = iwhichglobal - ngas_before;
       int i = local_gas_list[iwhichlocal];
       if (domain->regions[iregion]->match(x[i][0],x[i][1],x[i][2]) != 0) {
         gas_molecule_id = atom->molecule[i];
       }
     }
     gas_molecule_id_all = 0;
     MPI_Allreduce(&gas_molecule_id,&gas_molecule_id_all,1,MPI_INT,MPI_MAX,world);
     region_attempt++;
     if (region_attempt >= max_region_attempts) return -1;
   }
 
   return gas_molecule_id_all;
 }
 
 /* ----------------------------------------------------------------------
    compute the energy of the given gas molecule in its current position 
    sum across all procs that own atoms of the given molecule
 ------------------------------------------------------------------------- */
 
 double FixGCMC::molecule_energy(int gas_molecule_id)
 {
   double mol_energy = 0.0;
   for (int i = 0; i < atom->nlocal; i++)
     if (atom->molecule[i] == gas_molecule_id) {
       mol_energy += energy(i,atom->type[i],gas_molecule_id,atom->x[i]);
     }
 
   double mol_energy_sum = 0.0;
   MPI_Allreduce(&mol_energy,&mol_energy_sum,1,MPI_DOUBLE,MPI_SUM,world);
   
   return mol_energy_sum;
 }
 
 /* ----------------------------------------------------------------------
    compute a 3x3 rotation matrix using 3 random Euler angles, 
    each with a random maximum value supplied by the caller
 ------------------------------------------------------------------------- */
 
 void FixGCMC::get_rotation_matrix(double max_angle, double *rot)
 {
   double angle_x = max_angle*random_equal->uniform();
   double angle_y = max_angle*random_equal->uniform();
   double angle_z = max_angle*random_equal->uniform();
   
   double a = cos(angle_x);
   double b = sin(angle_x);
   double c = cos(angle_y);
   double d = sin(angle_y);
   double e = cos(angle_z);
   double f = sin(angle_z);
   double ad = a*d;
   double bd = b*d;
 
   rot[0] = c*e;
   rot[1] = -c*f;
   rot[2] = -d;
   rot[3] = -bd*e + a*f;
   rot[4] = bd*f + a*e;
   rot[5] = -b*c;
   rot[6] = ad*e + b*f;
   rot[7] = -ad*f + b*e;
   rot[8] = a*c;
 }
 
 /* ----------------------------------------------------------------------
    when using the molecule capability, populate model atom arrays from
    the model molecule provided by the user that will then be used to build 
    inserted molecules
 ------------------------------------------------------------------------- */
 
 void FixGCMC::get_model_molecule()
 {
   // find out how many atoms are in the model molecule
   // just loop through all of the atoms I own, then sum up across procs
   
   int model_molecule_number = ngcmc_type;
   int natoms_per_molecule_local = 0;
   for (int i = 0; i < atom->nlocal; i++) {
     if (atom->molecule[i] == model_molecule_number) {
       natoms_per_molecule_local++;
     }
   }
 
   natoms_per_molecule = 0;
   MPI_Allreduce(&natoms_per_molecule_local,&natoms_per_molecule,1,MPI_INT,MPI_MAX,world);
 
   if (natoms_per_molecule == 0)
-    error->all(FLERR,"Fix GCMC could not find any atoms in the user-supplied template molecule");
+    error->all(FLERR,"Fix gcmc could not find any atoms in the user-supplied template molecule");
   
   memory->create(atom_coord,natoms_per_molecule,3,"fixGCMC:atom_coord");
 
   // maxmol = largest molecule tag across all existing atoms
 
   maxmol = 0;
   if (atom->molecular) {
     for (int i = 0; i < atom->nlocal; i++) maxmol = MAX(atom->molecule[i],maxmol);
     int maxmol_all;
     MPI_Allreduce(&maxmol,&maxmol_all,1,MPI_INT,MPI_MAX,world);
     maxmol = maxmol_all;
   }
 
   // communication buffer for model atom's info
   // max_size = largest buffer needed by any proc
   // must do before new Atom class created,
   //   since size_restart() uses atom->nlocal
 
   int max_size;
   int buf_send_size = atom->avec->size_restart(); 
   
   MPI_Allreduce(&buf_send_size,&max_size,1,MPI_INT,MPI_MAX,world);
   double *buf;
   memory->create(buf,max_size,"fixGCMC:buf");
   
   // create storage space for the model molecule's atoms
   // create a new atom object called atom to store the data
   
   // old_atom = original atom class
   // atom = new model atom class
   // if old_atom style was hybrid, pass sub-style names to create_avec
 
   Atom *old_atom = atom;
   atom = new Atom(lmp);
   atom->settings(old_atom);
   
   int nstyles = 0;
   char **keywords = NULL;
   if (strcmp(old_atom->atom_style,"hybrid") == 0) {
     AtomVecHybrid *avec_hybrid = (AtomVecHybrid *) old_atom->avec;
     nstyles = avec_hybrid->nstyles;
     keywords = avec_hybrid->keywords;
   }
   
   atom->create_avec(old_atom->atom_style,nstyles,keywords);
   
   // assign atom and topology counts in model atom class from old_atom
 
   atom->ntypes = old_atom->ntypes;
   atom->nbondtypes = old_atom->nbondtypes;
   atom->nangletypes = old_atom->nangletypes;
   atom->ndihedraltypes = old_atom->ndihedraltypes;
   atom->nimpropertypes = old_atom->nimpropertypes;
   atom->bond_per_atom = old_atom->bond_per_atom;
   atom->angle_per_atom = old_atom->angle_per_atom;
   atom->dihedral_per_atom = old_atom->dihedral_per_atom;
   atom->improper_per_atom = old_atom->improper_per_atom;
   atom->extra_bond_per_atom = old_atom->extra_bond_per_atom;
   atom->allocate_type_arrays();
   atom->avec->grow(natoms_per_molecule);
   
   // copy type arrays to model atom class
   
   if (atom->mass) {
     for (int itype = 1; itype <= atom->ntypes; itype++) {
       atom->mass_setflag[itype] = old_atom->mass_setflag[itype];
       if (atom->mass_setflag[itype]) atom->mass[itype] = old_atom->mass[itype];
     }
   }
   // loop over all procs
   // if this iteration of loop is me:
   //   pack my atom data into buf
   //   bcast it to all other procs
 
   AtomVec *old_avec = old_atom->avec;
   AtomVec *model_avec = atom->avec;
 
   int model_buf_size = 0;
   for (int iproc = 0; iproc < comm->nprocs; iproc++) {
     int nbuf_iproc = 0;
     if (comm->me == iproc) {
       for (int i = 0; i < old_atom->nlocal; i++) {
         if (old_atom->molecule[i] == model_molecule_number) {
           nbuf_iproc += old_avec->pack_restart(i,&buf[nbuf_iproc]);
         }
       }
     }
     MPI_Bcast(&nbuf_iproc,1,MPI_INT,iproc,world);
     MPI_Bcast(buf,nbuf_iproc,MPI_DOUBLE,iproc,world);
 
     model_buf_size += nbuf_iproc;
      
     int m = 0;
     while (m < nbuf_iproc)
       m += model_avec->unpack_restart(&buf[m]);
   }
 
   // free communication buffer 
 
   memory->destroy(buf);
   
   // make sure that the number of model atoms is equal to the number of atoms per gas molecule
   
   int nlocal = atom->nlocal;
   if (nlocal != natoms_per_molecule)
-    error->all(FLERR,"Fix GCMC incorrect number of atoms per molecule");
+    error->all(FLERR,"Fix gcmc incorrect number of atoms per molecule");
   
   // compute the model molecule's mass and center-of-mass
   // then recenter model molecule on the origin
 
   double com[3]; 
   gas_mass = group->mass(0);
   group->xcm(0,gas_mass,com);
 
   double **x = atom->x;  
   for (int i = 0; i < nlocal; i++) {
     domain->unmap(x[i],atom->image[i]);
     x[i][0] -= com[0];
     x[i][1] -= com[1];
     x[i][2] -= com[2];
   }
 
   int mintag = atom->tag[0];
   for (int i = 0; i < atom->nlocal; i++) mintag = MIN(mintag,atom->tag[i]);
   int atom_offset = mintag - 1;
   
   for (int i = 0; i < nlocal; i++) {
     atom->mask[i] = 1 | groupbit;
     atom->tag[i] -= atom_offset;
     if (atom->avec->bonds_allow)
       for (int j = 0; j < atom->num_bond[i]; j++)
         atom->bond_atom[i][j] -= atom_offset;
     if (atom->avec->angles_allow)
       for (int j = 0; j < atom->num_angle[i]; j++) {
         atom->angle_atom1[i][j] -= atom_offset;
         atom->angle_atom2[i][j] -= atom_offset;
         atom->angle_atom3[i][j] -= atom_offset;
       }
     if (atom->avec->dihedrals_allow)
       for (int j = 0; j < atom->num_dihedral[i]; j++) {
         atom->dihedral_atom1[i][j] -= atom_offset;
         atom->dihedral_atom2[i][j] -= atom_offset;
         atom->dihedral_atom3[i][j] -= atom_offset;
         atom->dihedral_atom4[i][j] -= atom_offset;
       }
     if (atom->avec->impropers_allow)
       for (int j = 0; j < atom->num_improper[i]; j++) {
         atom->improper_atom1[i][j] -= atom_offset;
         atom->improper_atom2[i][j] -= atom_offset;
         atom->improper_atom3[i][j] -= atom_offset;
         atom->improper_atom4[i][j] -= atom_offset;
       }
   }
 
   // pack model atoms into a buffer for use during molecule insertions
   
   memory->create(model_atom_buf,model_buf_size,"fixGCMC:model_atom_buf");
   int n = 0;
   for (int i = 0; i < nlocal; i++) 
     n += model_avec->pack_restart(i,&model_atom_buf[n]);
 
   // move atom to model_atom and restore old_atom class pointer back to atom
 
   model_atom = atom;
   atom = old_atom;
 }
 
 /* ----------------------------------------------------------------------
    update the list of gas atoms
 ------------------------------------------------------------------------- */
 
 void FixGCMC::update_gas_atoms_list()
 {
   if (atom->nlocal > gcmc_nmax) {
     memory->sfree(local_gas_list);
     gcmc_nmax = atom->nmax;
     local_gas_list = (int *) memory->smalloc(gcmc_nmax*sizeof(int),
      "GCMC:local_gas_list");
   }
 
   ngas_local = 0;
   for (int i = 0; i < atom->nlocal; i++) {
     if (atom->mask[i] & groupbit) {
       local_gas_list[ngas_local] = i;
       ngas_local++;
     }
   }
 
   MPI_Allreduce(&ngas_local,&ngas,1,MPI_INT,MPI_SUM,world);
   MPI_Scan(&ngas_local,&ngas_before,1,MPI_INT,MPI_SUM,world);
   ngas_before -= ngas_local;
 }
 
 /* ----------------------------------------------------------------------
   return acceptance ratios
 ------------------------------------------------------------------------- */
 
 double FixGCMC::compute_vector(int n)
 {
   if (n == 0) return ntranslation_attempts;
   if (n == 1) return ntranslation_successes;
   if (n == 2) return ninsertion_attempts;
   if (n == 3) return ninsertion_successes;
   if (n == 4) return ndeletion_attempts;
   if (n == 5) return ndeletion_successes;
   if (n == 6) return nrotation_attempts;
   if (n == 7) return nrotation_successes;
   return 0.0;
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 double FixGCMC::memory_usage()
 {
   double bytes = gcmc_nmax * sizeof(int);
   return bytes;
 }
 
 /* ----------------------------------------------------------------------
    pack entire state of Fix into one write
 ------------------------------------------------------------------------- */
 
 void FixGCMC::write_restart(FILE *fp)
 {
   int n = 0;
   double list[4];
   list[n++] = random_equal->state();
   list[n++] = random_unequal->state();
   list[n++] = next_reneighbor;
 
   if (comm->me == 0) {
     int size = n * sizeof(double);
     fwrite(&size,sizeof(int),1,fp);
     fwrite(list,sizeof(double),n,fp);
   }
 }
 
 /* ----------------------------------------------------------------------
    use state info from restart file to restart the Fix
 ------------------------------------------------------------------------- */
 
 void FixGCMC::restart(char *buf)
 {
   int n = 0;
   double *list = (double *) buf;
 
   seed = static_cast<int> (list[n++]);
   random_equal->reset(seed);
 
   seed = static_cast<int> (list[n++]);
   random_unequal->reset(seed);
 
   next_reneighbor = static_cast<int> (list[n++]);
 }
diff --git a/src/MC/fix_gcmc.h b/src/MC/fix_gcmc.h
index 2e9b147a2..5e61c517e 100644
--- a/src/MC/fix_gcmc.h
+++ b/src/MC/fix_gcmc.h
@@ -1,232 +1,220 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef FIX_CLASS
 
 FixStyle(gcmc,FixGCMC)
 
 #else
 
 #ifndef LMP_FIX_GCMC_H
 #define LMP_FIX_GCMC_H
 
 #include "stdio.h"
 #include "fix.h"
 
 namespace LAMMPS_NS {
 
 class FixGCMC : public Fix {
  public:
   FixGCMC(class LAMMPS *, int, char **);
   ~FixGCMC();
   int setmask();
   void init();
   void pre_exchange();
   void attempt_atomic_translation();
   void attempt_atomic_deletion();
   void attempt_atomic_insertion();
   void attempt_molecule_translation();
   void attempt_molecule_rotation();
   void attempt_molecule_deletion();
   void attempt_molecule_insertion();
   double energy(int, int, int, double *);
   int pick_random_gas_atom();
   int pick_random_gas_atom_in_region();
   int pick_random_gas_molecule();
   int pick_random_gas_molecule_in_region();
   double molecule_energy(int);
   void get_rotation_matrix(double, double *);
   void get_model_molecule();
   void update_gas_atoms_list();
   double compute_vector(int);
   double memory_usage();
   void write_restart(FILE *);
   void restart(char *);
 
  private:
   int rotation_group,rotation_groupbit;
   int rotation_inversegroupbit;
   int ngcmc_type,nevery,seed;
   int ncycles,nexchanges,nmcmoves;
   int ngas;                 // # of gas atoms on all procs
   int ngas_local;           // # of gas atoms on this proc
   int ngas_before;          // # of gas atoms on procs < this proc
   int molflag;              // 0 = atomic, 1 = molecular system
   int regionflag;           // 0 = anywhere in box, 1 = specific region
   int iregion;              // GCMC region
   char *idregion;           // GCMC region id
 
   int maxmol;               // largest molecule tag across all existing atoms
   int natoms_per_molecule;  // number of atoms in each gas molecule
 
   double ntranslation_attempts;
   double ntranslation_successes;
   double nrotation_attempts;
   double nrotation_successes;
   double ndeletion_attempts;
   double ndeletion_successes;
   double ninsertion_attempts;
   double ninsertion_successes;
 
   int gcmc_nmax;
   int max_region_attempts;
   double gas_mass;
   double reservoir_temperature;
   double chemical_potential;
   double displace;
   double max_rotation_angle;
   double beta,zz,sigma,volume;
   double xlo,xhi,ylo,yhi,zlo,zhi;
   double region_xlo,region_xhi,region_ylo,region_yhi,region_zlo,region_zhi;
   double region_volume;
   double *sublo,*subhi;
   int *local_gas_list;
   double **cutsq;
   double **atom_coord;
   double *model_atom_buf;
   tagint imagetmp;
 
   class Pair *pair;
 
   class RanPark *random_equal;
   class RanPark *random_unequal;
   
   class Atom *model_atom;
 
   void options(int, char **);
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
-E: Fix GCMC region does not support a bounding box
+E: Fix gcmc region does not support a bounding box
 
-UNDOCUMENTED
+Not all regions represent bounded volumes.  You cannot use
+such a region with the fix gcmc command.
 
-E: Fix GCMC region cannot be dynamic
+E: Fix gcmc region cannot be dynamic
 
-UNDOCUMENTED
+Only static regions can be used with fix gcmc.
 
-E: Fix GCMC region extends outside simulation box
+E: Fix gcmc region extends outside simulation box
 
-UNDOCUMENTED
+Self-explanatory.
 
-E: Region ID for fix GCMC does not exist
+E: Region ID for fix gcmc does not exist
 
-UNDOCUMENTED
+Self-explanatory.
 
-E: Invalid atom type in fix GCMC command
+E: Invalid atom type in fix gcmc command
 
 The atom type specified in the GCMC command does not exist.
 
-E: Fix GCMC cannot exchange individual atoms belonging to a molecule
+E: Fix gcmc cannot exchange individual atoms belonging to a molecule
 
 This is an error since you should not delete only one atom of a molecule.
 The user has specified atomic (non-molecular) gas exchanges, but an atom
 belonging to a molecule could be deleted.
 
-E: All mol IDs should be set for fix GCMC group atoms
+E: All mol IDs should be set for fix gcmc group atoms
 
 The molecule flag is on, yet not all molecule ids in the fix group have
 been set to non-zero positive values by the user. This is an error since
-all atoms in the fix GCMC group are eligible for deletion, rotation, and
+all atoms in the fix gcmc group are eligible for deletion, rotation, and
 translation and therefore must have valid molecule ids.
 
-E: Fix GCMC molecule command requires that atoms have molecule attributes
+E: Fix gcmc molecule command requires that atoms have molecule attributes
 
 Should not choose the GCMC molecule feature if no molecules are being
 simulated. The general molecule flag is off, but GCMC's molecule flag
 is on.
 
-E: Fix GCMC incompatible with given pair_style
+E: Fix gcmc incompatible with given pair_style
 
 Some pair_styles do not provide single-atom energies, which are needed
-by fix GCMC.
+by fix gcmc.
 
-E: Cannot use fix GCMC in a 2d simulation
+E: Cannot use fix gcmc in a 2d simulation
 
-Fix GCMC is set up to run in 3d only. No 2d simulations with fix GCMC
+Fix gcmc is set up to run in 3d only. No 2d simulations with fix gcmc
 are allowed.
 
-E: Cannot use fix GCMC with a triclinic box
+E: Cannot use fix gcmc with a triclinic box
 
-Fix GCMC is set up to run with othogonal boxes only. Simulations with
-triclinic boxes and fix GCMC are not allowed.
+Fix gcmc is set up to run with othogonal boxes only. Simulations with
+triclinic boxes and fix gcmc are not allowed.
 
-E: Could not find fix group ID
+E: Could not find fix gcmc rotation group ID
 
-UNDOCUMENTED
+Self-explanatory.
 
-E: Illegal fix GCMC gas mass <= 0
+E: Illegal fix gcmc gas mass <= 0
 
 The computed mass of the designated gas molecule or atom type was less 
 than or equal to zero.
 
 E: Cannot do GCMC on atoms in atom_modify first group
 
 This is a restriction due to the way atoms are organized in a list to
 enable the atom_modify first command.
 
-E: Fix GCMC ran out of available molecule IDs
+E: Fix gcmc ran out of available molecule IDs
 
 This is a code limitation where more than MAXSMALLINT (usually around
 two billion) molecules have been created. The code needs to be 
 modified to either allow molecule ID recycling or use bigger ints for
 molecule IDs. A work-around is to run shorter simulations.
 
-E: Fix GCMC could not find any atoms in the user-supplied template molecule
+E: Fix gcmc could not find any atoms in the user-supplied template molecule
 
-When using the molecule option with fix GCMC, the user must supply a 
+When using the molecule option with fix gcmc, the user must supply a 
 template molecule in the usual LAMMPS data file with its molecule id
-specified in the fix GCMC command as the "type" of the exchanged gas.
+specified in the fix gcmc command as the "type" of the exchanged gas.
 
-E: Fix GCMC incorrect number of atoms per molecule
+E: Fix gcmc incorrect number of atoms per molecule
 
 The number of atoms in each gas molecule was not computed correctly.
 
-U: Fix GCMC fix group should be all
-
-Fix GCMC will ignore the fix group specified by the user. User should
-set the fix group to "all". Fix GCMC will overwrite the user-specified
-fix group with a group consisting of all GCMC gas atoms.
-
-U: Fix GCMC region does not support a bounding box 
+E: Fix gcmc region cannot be dynamic 
  
-Not all regions represent bounded volumes.  You cannot use 
-such a region with the fix GCMC command. 
+Only static regions can be used with fix gcmc. 
  
-E: Fix GCMC region cannot be dynamic 
- 
-Only static regions can be used with fix GCMC. 
- 
-E: Fix GCMC region extends outside simulation box 
+E: Fix gcmc region extends outside simulation box 
  
 Self-explanatory. 
  
-E: Region ID for fix GCMC does not exist 
+E: Region ID for fix gcmc does not exist 
  
 Self-explanatory.
 
-
-
 */
diff --git a/src/MOLECULE/atom_vec_full.cpp b/src/MOLECULE/atom_vec_full.cpp
index dbda299f7..f48c08121 100644
--- a/src/MOLECULE/atom_vec_full.cpp
+++ b/src/MOLECULE/atom_vec_full.cpp
@@ -1,1034 +1,1034 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "stdlib.h"
 #include "atom_vec_full.h"
 #include "atom.h"
 #include "comm.h"
 #include "domain.h"
 #include "modify.h"
 #include "fix.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define DELTA 10000
 
 /* ---------------------------------------------------------------------- */
 
 AtomVecFull::AtomVecFull(LAMMPS *lmp) : AtomVec(lmp)
 {
   molecular = 1;
   bonds_allow = angles_allow = dihedrals_allow = impropers_allow = 1;
   mass_type = 1;
 
   comm_x_only = comm_f_only = 1;
   size_forward = 3;
   size_reverse = 3;
   size_border = 8;
   size_velocity = 3;
   size_data_atom = 7;
   size_data_vel = 4;
   xcol_data = 5;
 
   atom->molecule_flag = atom->q_flag = 1;
 }
 
 /* ----------------------------------------------------------------------
    grow atom arrays
    n = 0 grows arrays by DELTA
    n > 0 allocates arrays to size n
 ------------------------------------------------------------------------- */
 
 void AtomVecFull::grow(int n)
 {
   if (n == 0) nmax += DELTA;
   else nmax = n;
   atom->nmax = nmax;
   if (nmax < 0 || nmax > MAXSMALLINT)
     error->one(FLERR,"Per-processor system is too big");
 
   tag = memory->grow(atom->tag,nmax,"atom:tag");
   type = memory->grow(atom->type,nmax,"atom:type");
   mask = memory->grow(atom->mask,nmax,"atom:mask");
   image = memory->grow(atom->image,nmax,"atom:image");
   x = memory->grow(atom->x,nmax,3,"atom:x");
   v = memory->grow(atom->v,nmax,3,"atom:v");
   f = memory->grow(atom->f,nmax*comm->nthreads,3,"atom:f");
 
   q = memory->grow(atom->q,nmax,"atom:q");
   molecule = memory->grow(atom->molecule,nmax,"atom:molecule");
 
   nspecial = memory->grow(atom->nspecial,nmax,3,"atom:nspecial");
   special = memory->grow(atom->special,nmax,atom->maxspecial,"atom:special");
 
   num_bond = memory->grow(atom->num_bond,nmax,"atom:num_bond");
   bond_type = memory->grow(atom->bond_type,nmax,atom->bond_per_atom,
                            "atom:bond_type");
   bond_atom = memory->grow(atom->bond_atom,nmax,atom->bond_per_atom,
                            "atom:bond_atom");
 
   num_angle = memory->grow(atom->num_angle,nmax,"atom:num_angle");
   angle_type = memory->grow(atom->angle_type,nmax,atom->angle_per_atom,
                             "atom:angle_type");
   angle_atom1 = memory->grow(atom->angle_atom1,nmax,atom->angle_per_atom,
                              "atom:angle_atom1");
   angle_atom2 = memory->grow(atom->angle_atom2,nmax,atom->angle_per_atom,
                              "atom:angle_atom2");
   angle_atom3 = memory->grow(atom->angle_atom3,nmax,atom->angle_per_atom,
                              "atom:angle_atom3");
 
   num_dihedral = memory->grow(atom->num_dihedral,nmax,"atom:num_dihedral");
   dihedral_type = memory->grow(atom->dihedral_type,nmax,
                                atom->dihedral_per_atom,"atom:dihedral_type");
   dihedral_atom1 =
     memory->grow(atom->dihedral_atom1,nmax,atom->dihedral_per_atom,
                  "atom:dihedral_atom1");
   dihedral_atom2 =
     memory->grow(atom->dihedral_atom2,nmax,atom->dihedral_per_atom,
                  "atom:dihedral_atom2");
   dihedral_atom3 =
     memory->grow(atom->dihedral_atom3,nmax,atom->dihedral_per_atom,
                  "atom:dihedral_atom3");
   dihedral_atom4 =
     memory->grow(atom->dihedral_atom4,nmax,atom->dihedral_per_atom,
                  "atom:dihedral_atom4");
 
   num_improper = memory->grow(atom->num_improper,nmax,"atom:num_improper");
   improper_type =
     memory->grow(atom->improper_type,nmax,atom->improper_per_atom,
                  "atom:improper_type");
   improper_atom1 =
     memory->grow(atom->improper_atom1,nmax,atom->improper_per_atom,
                  "atom:improper_atom1");
   improper_atom2 =
     memory->grow(atom->improper_atom2,nmax,atom->improper_per_atom,
                  "atom:improper_atom2");
   improper_atom3 =
     memory->grow(atom->improper_atom3,nmax,atom->improper_per_atom,
                  "atom:improper_atom3");
   improper_atom4 =
     memory->grow(atom->improper_atom4,nmax,atom->improper_per_atom,
                  "atom:improper_atom4");
 
   if (atom->nextra_grow)
     for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
       modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax);
 }
 
 /* ----------------------------------------------------------------------
    reset local array ptrs
 ------------------------------------------------------------------------- */
 
 void AtomVecFull::grow_reset()
 {
   tag = atom->tag; type = atom->type;
   mask = atom->mask; image = atom->image;
   x = atom->x; v = atom->v; f = atom->f;
   q = atom->q; molecule = atom->molecule;
   nspecial = atom->nspecial; special = atom->special;
   num_bond = atom->num_bond; bond_type = atom->bond_type;
   bond_atom = atom->bond_atom;
   num_angle = atom->num_angle; angle_type = atom->angle_type;
   angle_atom1 = atom->angle_atom1; angle_atom2 = atom->angle_atom2;
   angle_atom3 = atom->angle_atom3;
   num_dihedral = atom->num_dihedral; dihedral_type = atom->dihedral_type;
   dihedral_atom1 = atom->dihedral_atom1; dihedral_atom2 = atom->dihedral_atom2;
   dihedral_atom3 = atom->dihedral_atom3; dihedral_atom4 = atom->dihedral_atom4;
   num_improper = atom->num_improper; improper_type = atom->improper_type;
   improper_atom1 = atom->improper_atom1; improper_atom2 = atom->improper_atom2;
   improper_atom3 = atom->improper_atom3; improper_atom4 = atom->improper_atom4;
 }
 
 /* ----------------------------------------------------------------------
    copy atom I info to atom J
 ------------------------------------------------------------------------- */
 
 void AtomVecFull::copy(int i, int j, int delflag)
 {
   int k;
 
   tag[j] = tag[i];
   type[j] = type[i];
   mask[j] = mask[i];
   image[j] = image[i];
   x[j][0] = x[i][0];
   x[j][1] = x[i][1];
   x[j][2] = x[i][2];
   v[j][0] = v[i][0];
   v[j][1] = v[i][1];
   v[j][2] = v[i][2];
 
   q[j] = q[i];
   molecule[j] = molecule[i];
 
   num_bond[j] = num_bond[i];
   for (k = 0; k < num_bond[j]; k++) {
     bond_type[j][k] = bond_type[i][k];
     bond_atom[j][k] = bond_atom[i][k];
   }
 
   num_angle[j] = num_angle[i];
   for (k = 0; k < num_angle[j]; k++) {
     angle_type[j][k] = angle_type[i][k];
     angle_atom1[j][k] = angle_atom1[i][k];
     angle_atom2[j][k] = angle_atom2[i][k];
     angle_atom3[j][k] = angle_atom3[i][k];
   }
 
   num_dihedral[j] = num_dihedral[i];
   for (k = 0; k < num_dihedral[j]; k++) {
     dihedral_type[j][k] = dihedral_type[i][k];
     dihedral_atom1[j][k] = dihedral_atom1[i][k];
     dihedral_atom2[j][k] = dihedral_atom2[i][k];
     dihedral_atom3[j][k] = dihedral_atom3[i][k];
     dihedral_atom4[j][k] = dihedral_atom4[i][k];
   }
 
   num_improper[j] = num_improper[i];
   for (k = 0; k < num_improper[j]; k++) {
     improper_type[j][k] = improper_type[i][k];
     improper_atom1[j][k] = improper_atom1[i][k];
     improper_atom2[j][k] = improper_atom2[i][k];
     improper_atom3[j][k] = improper_atom3[i][k];
     improper_atom4[j][k] = improper_atom4[i][k];
   }
 
   nspecial[j][0] = nspecial[i][0];
   nspecial[j][1] = nspecial[i][1];
   nspecial[j][2] = nspecial[i][2];
   for (k = 0; k < nspecial[j][2]; k++) special[j][k] = special[i][k];
 
   if (atom->nextra_grow)
     for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
       modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag);
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecFull::pack_comm(int n, int *list, double *buf,
                            int pbc_flag, int *pbc)
 {
   int i,j,m;
   double dx,dy,dz;
 
   m = 0;
   if (pbc_flag == 0) {
     for (i = 0; i < n; i++) {
       j = list[i];
       buf[m++] = x[j][0];
       buf[m++] = x[j][1];
       buf[m++] = x[j][2];
     }
   } else {
     if (domain->triclinic == 0) {
       dx = pbc[0]*domain->xprd;
       dy = pbc[1]*domain->yprd;
       dz = pbc[2]*domain->zprd;
     } else {
       dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
       dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
       dz = pbc[2]*domain->zprd;
     }
     for (i = 0; i < n; i++) {
       j = list[i];
       buf[m++] = x[j][0] + dx;
       buf[m++] = x[j][1] + dy;
       buf[m++] = x[j][2] + dz;
     }
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecFull::pack_comm_vel(int n, int *list, double *buf,
                                int pbc_flag, int *pbc)
 {
   int i,j,m;
   double dx,dy,dz,dvx,dvy,dvz;
 
   m = 0;
   if (pbc_flag == 0) {
     for (i = 0; i < n; i++) {
       j = list[i];
       buf[m++] = x[j][0];
       buf[m++] = x[j][1];
       buf[m++] = x[j][2];
       buf[m++] = v[j][0];
       buf[m++] = v[j][1];
       buf[m++] = v[j][2];
     }
   } else {
     if (domain->triclinic == 0) {
       dx = pbc[0]*domain->xprd;
       dy = pbc[1]*domain->yprd;
       dz = pbc[2]*domain->zprd;
     } else {
       dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
       dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
       dz = pbc[2]*domain->zprd;
     }
     if (!deform_vremap) {
       for (i = 0; i < n; i++) {
         j = list[i];
         buf[m++] = x[j][0] + dx;
         buf[m++] = x[j][1] + dy;
         buf[m++] = x[j][2] + dz;
         buf[m++] = v[j][0];
         buf[m++] = v[j][1];
         buf[m++] = v[j][2];
       }
     } else {
       dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
       dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
       dvz = pbc[2]*h_rate[2];
       for (i = 0; i < n; i++) {
         j = list[i];
         buf[m++] = x[j][0] + dx;
         buf[m++] = x[j][1] + dy;
         buf[m++] = x[j][2] + dz;
         if (mask[i] & deform_groupbit) {
           buf[m++] = v[j][0] + dvx;
           buf[m++] = v[j][1] + dvy;
           buf[m++] = v[j][2] + dvz;
         } else {
           buf[m++] = v[j][0];
           buf[m++] = v[j][1];
           buf[m++] = v[j][2];
         }
       }
     }
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AtomVecFull::unpack_comm(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     x[i][0] = buf[m++];
     x[i][1] = buf[m++];
     x[i][2] = buf[m++];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AtomVecFull::unpack_comm_vel(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     x[i][0] = buf[m++];
     x[i][1] = buf[m++];
     x[i][2] = buf[m++];
     v[i][0] = buf[m++];
     v[i][1] = buf[m++];
     v[i][2] = buf[m++];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecFull::pack_reverse(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     buf[m++] = f[i][0];
     buf[m++] = f[i][1];
     buf[m++] = f[i][2];
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AtomVecFull::unpack_reverse(int n, int *list, double *buf)
 {
   int i,j,m;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     f[j][0] += buf[m++];
     f[j][1] += buf[m++];
     f[j][2] += buf[m++];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecFull::pack_border(int n, int *list, double *buf,
                              int pbc_flag, int *pbc)
 {
   int i,j,m;
   double dx,dy,dz;
 
   m = 0;
   if (pbc_flag == 0) {
     for (i = 0; i < n; i++) {
       j = list[i];
       buf[m++] = x[j][0];
       buf[m++] = x[j][1];
       buf[m++] = x[j][2];
       buf[m++] = tag[j];
       buf[m++] = type[j];
       buf[m++] = mask[j];
       buf[m++] = q[j];
       buf[m++] = molecule[j];
     }
   } else {
     if (domain->triclinic == 0) {
       dx = pbc[0]*domain->xprd;
       dy = pbc[1]*domain->yprd;
       dz = pbc[2]*domain->zprd;
     } else {
       dx = pbc[0];
       dy = pbc[1];
       dz = pbc[2];
     }
     for (i = 0; i < n; i++) {
       j = list[i];
       buf[m++] = x[j][0] + dx;
       buf[m++] = x[j][1] + dy;
       buf[m++] = x[j][2] + dz;
       buf[m++] = tag[j];
       buf[m++] = type[j];
       buf[m++] = mask[j];
       buf[m++] = q[j];
       buf[m++] = molecule[j];
     }
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecFull::pack_border_vel(int n, int *list, double *buf,
                                  int pbc_flag, int *pbc)
 {
   int i,j,m;
   double dx,dy,dz,dvx,dvy,dvz;
 
   m = 0;
   if (pbc_flag == 0) {
     for (i = 0; i < n; i++) {
       j = list[i];
       buf[m++] = x[j][0];
       buf[m++] = x[j][1];
       buf[m++] = x[j][2];
       buf[m++] = tag[j];
       buf[m++] = type[j];
       buf[m++] = mask[j];
       buf[m++] = q[j];
       buf[m++] = molecule[j];
       buf[m++] = v[j][0];
       buf[m++] = v[j][1];
       buf[m++] = v[j][2];
     }
   } else {
     if (domain->triclinic == 0) {
       dx = pbc[0]*domain->xprd;
       dy = pbc[1]*domain->yprd;
       dz = pbc[2]*domain->zprd;
     } else {
       dx = pbc[0];
       dy = pbc[1];
       dz = pbc[2];
     }
     if (!deform_vremap) {
       for (i = 0; i < n; i++) {
         j = list[i];
         buf[m++] = x[j][0] + dx;
         buf[m++] = x[j][1] + dy;
         buf[m++] = x[j][2] + dz;
         buf[m++] = tag[j];
         buf[m++] = type[j];
         buf[m++] = mask[j];
         buf[m++] = q[j];
         buf[m++] = molecule[j];
         buf[m++] = v[j][0];
         buf[m++] = v[j][1];
         buf[m++] = v[j][2];
       }
     } else {
       dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
       dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
       dvz = pbc[2]*h_rate[2];
       for (i = 0; i < n; i++) {
         j = list[i];
         buf[m++] = x[j][0] + dx;
         buf[m++] = x[j][1] + dy;
         buf[m++] = x[j][2] + dz;
         buf[m++] = tag[j];
         buf[m++] = type[j];
         buf[m++] = mask[j];
         buf[m++] = q[j];
         buf[m++] = molecule[j];
         if (mask[i] & deform_groupbit) {
           buf[m++] = v[j][0] + dvx;
           buf[m++] = v[j][1] + dvy;
           buf[m++] = v[j][2] + dvz;
         } else {
           buf[m++] = v[j][0];
           buf[m++] = v[j][1];
           buf[m++] = v[j][2];
         }
       }
     }
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecFull::pack_border_hybrid(int n, int *list, double *buf)
 {
   int i,j,m;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     buf[m++] = q[j];
     buf[m++] = molecule[j];
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AtomVecFull::unpack_border(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     if (i == nmax) grow(0);
     x[i][0] = buf[m++];
     x[i][1] = buf[m++];
     x[i][2] = buf[m++];
     tag[i] = static_cast<int> (buf[m++]);
     type[i] = static_cast<int> (buf[m++]);
     mask[i] = static_cast<int> (buf[m++]);
     q[i] = buf[m++];
     molecule[i] = static_cast<int> (buf[m++]);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AtomVecFull::unpack_border_vel(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     if (i == nmax) grow(0);
     x[i][0] = buf[m++];
     x[i][1] = buf[m++];
     x[i][2] = buf[m++];
     tag[i] = static_cast<int> (buf[m++]);
     type[i] = static_cast<int> (buf[m++]);
     mask[i] = static_cast<int> (buf[m++]);
     q[i] = buf[m++];
     molecule[i] = static_cast<int> (buf[m++]);
     v[i][0] = buf[m++];
     v[i][1] = buf[m++];
     v[i][2] = buf[m++];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecFull::unpack_border_hybrid(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     q[i] = buf[m++];
     molecule[i] = static_cast<int> (buf[m++]);
   }
   return m;
 }
 
 /* ----------------------------------------------------------------------
    pack data for atom I for sending to another proc
    xyz must be 1st 3 values, so comm::exchange() can test on them
 ------------------------------------------------------------------------- */
 
 int AtomVecFull::pack_exchange(int i, double *buf)
 {
   int k;
 
   int m = 1;
   buf[m++] = x[i][0];
   buf[m++] = x[i][1];
   buf[m++] = x[i][2];
   buf[m++] = v[i][0];
   buf[m++] = v[i][1];
   buf[m++] = v[i][2];
   buf[m++] = tag[i];
   buf[m++] = type[i];
   buf[m++] = mask[i];
   *((tagint *) &buf[m++]) = image[i];
 
   buf[m++] = q[i];
   buf[m++] = molecule[i];
 
   buf[m++] = num_bond[i];
   for (k = 0; k < num_bond[i]; k++) {
     buf[m++] = bond_type[i][k];
     buf[m++] = bond_atom[i][k];
   }
 
   buf[m++] = num_angle[i];
   for (k = 0; k < num_angle[i]; k++) {
     buf[m++] = angle_type[i][k];
     buf[m++] = angle_atom1[i][k];
     buf[m++] = angle_atom2[i][k];
     buf[m++] = angle_atom3[i][k];
   }
 
   buf[m++] = num_dihedral[i];
   for (k = 0; k < num_dihedral[i]; k++) {
     buf[m++] = dihedral_type[i][k];
     buf[m++] = dihedral_atom1[i][k];
     buf[m++] = dihedral_atom2[i][k];
     buf[m++] = dihedral_atom3[i][k];
     buf[m++] = dihedral_atom4[i][k];
   }
 
   buf[m++] = num_improper[i];
   for (k = 0; k < num_improper[i]; k++) {
     buf[m++] = improper_type[i][k];
     buf[m++] = improper_atom1[i][k];
     buf[m++] = improper_atom2[i][k];
     buf[m++] = improper_atom3[i][k];
     buf[m++] = improper_atom4[i][k];
   }
 
   buf[m++] = nspecial[i][0];
   buf[m++] = nspecial[i][1];
   buf[m++] = nspecial[i][2];
   for (k = 0; k < nspecial[i][2]; k++) buf[m++] = special[i][k];
 
   if (atom->nextra_grow)
     for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
       m += modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf[m]);
 
   buf[0] = m;
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecFull::unpack_exchange(double *buf)
 {
   int k;
 
   int nlocal = atom->nlocal;
   if (nlocal == nmax) grow(0);
 
   int m = 1;
   x[nlocal][0] = buf[m++];
   x[nlocal][1] = buf[m++];
   x[nlocal][2] = buf[m++];
   v[nlocal][0] = buf[m++];
   v[nlocal][1] = buf[m++];
   v[nlocal][2] = buf[m++];
   tag[nlocal] = static_cast<int> (buf[m++]);
   type[nlocal] = static_cast<int> (buf[m++]);
   mask[nlocal] = static_cast<int> (buf[m++]);
   image[nlocal] = *((tagint *) &buf[m++]);
 
   q[nlocal] = buf[m++];
   molecule[nlocal] = static_cast<int> (buf[m++]);
 
   num_bond[nlocal] = static_cast<int> (buf[m++]);
   for (k = 0; k < num_bond[nlocal]; k++) {
     bond_type[nlocal][k] = static_cast<int> (buf[m++]);
     bond_atom[nlocal][k] = static_cast<int> (buf[m++]);
   }
 
   num_angle[nlocal] = static_cast<int> (buf[m++]);
   for (k = 0; k < num_angle[nlocal]; k++) {
     angle_type[nlocal][k] = static_cast<int> (buf[m++]);
     angle_atom1[nlocal][k] = static_cast<int> (buf[m++]);
     angle_atom2[nlocal][k] = static_cast<int> (buf[m++]);
     angle_atom3[nlocal][k] = static_cast<int> (buf[m++]);
   }
 
   num_dihedral[nlocal] = static_cast<int> (buf[m++]);
   for (k = 0; k < num_dihedral[nlocal]; k++) {
     dihedral_type[nlocal][k] = static_cast<int> (buf[m++]);
     dihedral_atom1[nlocal][k] = static_cast<int> (buf[m++]);
     dihedral_atom2[nlocal][k] = static_cast<int> (buf[m++]);
     dihedral_atom3[nlocal][k] = static_cast<int> (buf[m++]);
     dihedral_atom4[nlocal][k] = static_cast<int> (buf[m++]);
   }
 
   num_improper[nlocal] = static_cast<int> (buf[m++]);
   for (k = 0; k < num_improper[nlocal]; k++) {
     improper_type[nlocal][k] = static_cast<int> (buf[m++]);
     improper_atom1[nlocal][k] = static_cast<int> (buf[m++]);
     improper_atom2[nlocal][k] = static_cast<int> (buf[m++]);
     improper_atom3[nlocal][k] = static_cast<int> (buf[m++]);
     improper_atom4[nlocal][k] = static_cast<int> (buf[m++]);
   }
 
   nspecial[nlocal][0] = static_cast<int> (buf[m++]);
   nspecial[nlocal][1] = static_cast<int> (buf[m++]);
   nspecial[nlocal][2] = static_cast<int> (buf[m++]);
   for (k = 0; k < nspecial[nlocal][2]; k++)
     special[nlocal][k] = static_cast<int> (buf[m++]);
 
   if (atom->nextra_grow)
     for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
       m += modify->fix[atom->extra_grow[iextra]]->
         unpack_exchange(nlocal,&buf[m]);
 
   atom->nlocal++;
   return m;
 }
 
 /* ----------------------------------------------------------------------
    size of restart data for all atoms owned by this proc
    include extra data stored by fixes
 ------------------------------------------------------------------------- */
 
 int AtomVecFull::size_restart()
 {
   int i;
 
   int nlocal = atom->nlocal;
   int n = 0;
   for (i = 0; i < nlocal; i++)
     n += 17 + 2*num_bond[i] + 4*num_angle[i] +
       5*num_dihedral[i] + 5*num_improper[i];
 
   if (atom->nextra_restart)
     for (int iextra = 0; iextra < atom->nextra_restart; iextra++)
       for (i = 0; i < nlocal; i++)
         n += modify->fix[atom->extra_restart[iextra]]->size_restart(i);
 
   return n;
 }
 
 /* ----------------------------------------------------------------------
    pack atom I's data for restart file including extra quantities
    xyz must be 1st 3 values, so that read_restart can test on them
    molecular types may be negative, but write as positive
 ------------------------------------------------------------------------- */
 
 int AtomVecFull::pack_restart(int i, double *buf)
 {
   int k;
 
   int m = 1;
   buf[m++] = x[i][0];
   buf[m++] = x[i][1];
   buf[m++] = x[i][2];
   buf[m++] = tag[i];
   buf[m++] = type[i];
   buf[m++] = mask[i];
   *((tagint *) &buf[m++]) = image[i];
   buf[m++] = v[i][0];
   buf[m++] = v[i][1];
   buf[m++] = v[i][2];
 
   buf[m++] = q[i];
   buf[m++] = molecule[i];
 
   buf[m++] = num_bond[i];
   for (k = 0; k < num_bond[i]; k++) {
     buf[m++] = MAX(bond_type[i][k],-bond_type[i][k]);
     buf[m++] = bond_atom[i][k];
   }
 
   buf[m++] = num_angle[i];
   for (k = 0; k < num_angle[i]; k++) {
     buf[m++] = MAX(angle_type[i][k],-angle_type[i][k]);
     buf[m++] = angle_atom1[i][k];
     buf[m++] = angle_atom2[i][k];
     buf[m++] = angle_atom3[i][k];
   }
 
   buf[m++] = num_dihedral[i];
   for (k = 0; k < num_dihedral[i]; k++) {
     buf[m++] = MAX(dihedral_type[i][k],-dihedral_type[i][k]);
     buf[m++] = dihedral_atom1[i][k];
     buf[m++] = dihedral_atom2[i][k];
     buf[m++] = dihedral_atom3[i][k];
     buf[m++] = dihedral_atom4[i][k];
   }
 
   buf[m++] = num_improper[i];
   for (k = 0; k < num_improper[i]; k++) {
     buf[m++] = MAX(improper_type[i][k],-improper_type[i][k]);
     buf[m++] = improper_atom1[i][k];
     buf[m++] = improper_atom2[i][k];
     buf[m++] = improper_atom3[i][k];
     buf[m++] = improper_atom4[i][k];
   }
 
   if (atom->nextra_restart)
     for (int iextra = 0; iextra < atom->nextra_restart; iextra++)
       m += modify->fix[atom->extra_restart[iextra]]->pack_restart(i,&buf[m]);
 
   buf[0] = m;
   return m;
 }
 
 /* ----------------------------------------------------------------------
    unpack data for one atom from restart file including extra quantities
 ------------------------------------------------------------------------- */
 
 int AtomVecFull::unpack_restart(double *buf)
 {
   int k;
 
   int nlocal = atom->nlocal;
   if (nlocal == nmax) {
     grow(0);
     if (atom->nextra_store)
       memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
   }
 
   int m = 1;
   x[nlocal][0] = buf[m++];
   x[nlocal][1] = buf[m++];
   x[nlocal][2] = buf[m++];
   tag[nlocal] = static_cast<int> (buf[m++]);
   type[nlocal] = static_cast<int> (buf[m++]);
   mask[nlocal] = static_cast<int> (buf[m++]);
   image[nlocal] = *((tagint *) &buf[m++]);
   v[nlocal][0] = buf[m++];
   v[nlocal][1] = buf[m++];
   v[nlocal][2] = buf[m++];
 
   q[nlocal] = buf[m++];
   molecule[nlocal] = static_cast<int> (buf[m++]);
 
   num_bond[nlocal] = static_cast<int> (buf[m++]);
   for (k = 0; k < num_bond[nlocal]; k++) {
     bond_type[nlocal][k] = static_cast<int> (buf[m++]);
     bond_atom[nlocal][k] = static_cast<int> (buf[m++]);
   }
 
   num_angle[nlocal] = static_cast<int> (buf[m++]);
   for (k = 0; k < num_angle[nlocal]; k++) {
     angle_type[nlocal][k] = static_cast<int> (buf[m++]);
     angle_atom1[nlocal][k] = static_cast<int> (buf[m++]);
     angle_atom2[nlocal][k] = static_cast<int> (buf[m++]);
     angle_atom3[nlocal][k] = static_cast<int> (buf[m++]);
   }
 
   num_dihedral[nlocal] = static_cast<int> (buf[m++]);
   for (k = 0; k < num_dihedral[nlocal]; k++) {
     dihedral_type[nlocal][k] = static_cast<int> (buf[m++]);
     dihedral_atom1[nlocal][k] = static_cast<int> (buf[m++]);
     dihedral_atom2[nlocal][k] = static_cast<int> (buf[m++]);
     dihedral_atom3[nlocal][k] = static_cast<int> (buf[m++]);
     dihedral_atom4[nlocal][k] = static_cast<int> (buf[m++]);
   }
 
   num_improper[nlocal] = static_cast<int> (buf[m++]);
   for (k = 0; k < num_improper[nlocal]; k++) {
     improper_type[nlocal][k] = static_cast<int> (buf[m++]);
     improper_atom1[nlocal][k] = static_cast<int> (buf[m++]);
     improper_atom2[nlocal][k] = static_cast<int> (buf[m++]);
     improper_atom3[nlocal][k] = static_cast<int> (buf[m++]);
     improper_atom4[nlocal][k] = static_cast<int> (buf[m++]);
   }
 
   double **extra = atom->extra;
   if (atom->nextra_store) {
     int size = static_cast<int> (buf[0]) - m;
     for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++];
   }
 
   atom->nlocal++;
   return m;
 }
 
 /* ----------------------------------------------------------------------
    create one atom of itype at coord
    set other values to defaults
 ------------------------------------------------------------------------- */
 
 void AtomVecFull::create_atom(int itype, double *coord)
 {
   int nlocal = atom->nlocal;
   if (nlocal == nmax) grow(0);
 
   tag[nlocal] = 0;
   type[nlocal] = itype;
   x[nlocal][0] = coord[0];
   x[nlocal][1] = coord[1];
   x[nlocal][2] = coord[2];
   mask[nlocal] = 1;
   image[nlocal] = ((tagint) IMGMAX << IMG2BITS) |
     ((tagint) IMGMAX << IMGBITS) | IMGMAX;
   v[nlocal][0] = 0.0;
   v[nlocal][1] = 0.0;
   v[nlocal][2] = 0.0;
 
   q[nlocal] = 0.0;
   molecule[nlocal] = 0;
   num_bond[nlocal] = 0;
   num_angle[nlocal] = 0;
   num_dihedral[nlocal] = 0;
   num_improper[nlocal] = 0;
   nspecial[nlocal][0] = nspecial[nlocal][1] = nspecial[nlocal][2] = 0;
 
   atom->nlocal++;
 }
 
 /* ----------------------------------------------------------------------
    unpack one line from Atoms section of data file
    initialize other atom quantities
 ------------------------------------------------------------------------- */
 
 void AtomVecFull::data_atom(double *coord, tagint imagetmp, char **values)
 {
   int nlocal = atom->nlocal;
   if (nlocal == nmax) grow(0);
 
   tag[nlocal] = atoi(values[0]);
   if (tag[nlocal] <= 0)
     error->one(FLERR,"Invalid atom ID in Atoms section of data file");
 
   molecule[nlocal] = atoi(values[1]);
 
   type[nlocal] = atoi(values[2]);
   if (type[nlocal] <= 0 || type[nlocal] > atom->ntypes)
     error->one(FLERR,"Invalid atom type in Atoms section of data file");
 
   q[nlocal] = atof(values[3]);
 
   x[nlocal][0] = coord[0];
   x[nlocal][1] = coord[1];
   x[nlocal][2] = coord[2];
 
   image[nlocal] = imagetmp;
 
   mask[nlocal] = 1;
   v[nlocal][0] = 0.0;
   v[nlocal][1] = 0.0;
   v[nlocal][2] = 0.0;
   num_bond[nlocal] = 0;
   num_angle[nlocal] = 0;
   num_dihedral[nlocal] = 0;
   num_improper[nlocal] = 0;
 
   atom->nlocal++;
 }
 
 /* ----------------------------------------------------------------------
    unpack hybrid quantities from one line in Atoms section of data file
    initialize other atom quantities for this sub-style
 ------------------------------------------------------------------------- */
 
 int AtomVecFull::data_atom_hybrid(int nlocal, char **values)
 {
-  molecule[nlocal] = atoi(values[1]);
-  q[nlocal] = atof(values[3]);
+  molecule[nlocal] = atoi(values[0]);
+  q[nlocal] = atof(values[1]);
 
   num_bond[nlocal] = 0;
   num_angle[nlocal] = 0;
   num_dihedral[nlocal] = 0;
   num_improper[nlocal] = 0;
 
   return 2;
 }
 
 /* ----------------------------------------------------------------------
    return # of bytes of allocated memory
 ------------------------------------------------------------------------- */
 
 bigint AtomVecFull::memory_usage()
 {
   bigint bytes = 0;
 
   if (atom->memcheck("tag")) bytes += memory->usage(tag,nmax);
   if (atom->memcheck("type")) bytes += memory->usage(type,nmax);
   if (atom->memcheck("mask")) bytes += memory->usage(mask,nmax);
   if (atom->memcheck("image")) bytes += memory->usage(image,nmax);
   if (atom->memcheck("x")) bytes += memory->usage(x,nmax,3);
   if (atom->memcheck("v")) bytes += memory->usage(v,nmax,3);
   if (atom->memcheck("f")) bytes += memory->usage(f,nmax*comm->nthreads,3);
 
   if (atom->memcheck("q")) bytes += memory->usage(q,nmax);
   if (atom->memcheck("molecule")) bytes += memory->usage(molecule,nmax);
   if (atom->memcheck("nspecial")) bytes += memory->usage(nspecial,nmax,3);
   if (atom->memcheck("special"))
     bytes += memory->usage(special,nmax,atom->maxspecial);
 
   if (atom->memcheck("num_bond")) bytes += memory->usage(num_bond,nmax);
   if (atom->memcheck("bond_type"))
     bytes += memory->usage(bond_type,nmax,atom->bond_per_atom);
   if (atom->memcheck("bond_atom"))
     bytes += memory->usage(bond_atom,nmax,atom->bond_per_atom);
 
   if (atom->memcheck("num_angle")) bytes += memory->usage(num_angle,nmax);
   if (atom->memcheck("angle_type"))
     bytes += memory->usage(angle_type,nmax,atom->angle_per_atom);
   if (atom->memcheck("angle_atom1"))
     bytes += memory->usage(angle_atom1,nmax,atom->angle_per_atom);
   if (atom->memcheck("angle_atom2"))
     bytes += memory->usage(angle_atom2,nmax,atom->angle_per_atom);
   if (atom->memcheck("angle_atom3"))
     bytes += memory->usage(angle_atom3,nmax,atom->angle_per_atom);
 
   if (atom->memcheck("num_dihedral")) bytes += memory->usage(num_dihedral,nmax);
   if (atom->memcheck("dihedral_type"))
     bytes += memory->usage(dihedral_type,nmax,atom->dihedral_per_atom);
   if (atom->memcheck("dihedral_atom1"))
     bytes += memory->usage(dihedral_atom1,nmax,atom->dihedral_per_atom);
   if (atom->memcheck("dihedral_atom2"))
     bytes += memory->usage(dihedral_atom2,nmax,atom->dihedral_per_atom);
   if (atom->memcheck("dihedral_atom3"))
     bytes += memory->usage(dihedral_atom3,nmax,atom->dihedral_per_atom);
   if (atom->memcheck("dihedral_atom4"))
     bytes += memory->usage(dihedral_atom4,nmax,atom->dihedral_per_atom);
 
   if (atom->memcheck("num_improper")) bytes += memory->usage(num_improper,nmax);
   if (atom->memcheck("improper_type"))
     bytes += memory->usage(improper_type,nmax,atom->improper_per_atom);
   if (atom->memcheck("improper_atom1"))
     bytes += memory->usage(improper_atom1,nmax,atom->improper_per_atom);
   if (atom->memcheck("improper_atom2"))
     bytes += memory->usage(improper_atom2,nmax,atom->improper_per_atom);
   if (atom->memcheck("improper_atom3"))
     bytes += memory->usage(improper_atom3,nmax,atom->improper_per_atom);
   if (atom->memcheck("improper_atom4"))
     bytes += memory->usage(improper_atom4,nmax,atom->improper_per_atom);
 
   return bytes;
 }
diff --git a/src/MOLECULE/improper_hybrid.h b/src/MOLECULE/improper_hybrid.h
index f89b19e8f..aff04e09f 100644
--- a/src/MOLECULE/improper_hybrid.h
+++ b/src/MOLECULE/improper_hybrid.h
@@ -1,82 +1,84 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef IMPROPER_CLASS
 
 ImproperStyle(hybrid,ImproperHybrid)
 
 #else
 
 #ifndef LMP_IMPROPER_HYBRID_H
 #define LMP_IMPROPER_HYBRID_H
 
 #include "stdio.h"
 #include "improper.h"
 
 namespace LAMMPS_NS {
 
 class ImproperHybrid : public Improper {
  public:
   int nstyles;                  // # of different improper styles
   Improper **styles;            // class list for each Improper style
   char **keywords;              // keyword for each improper style
 
   ImproperHybrid(class LAMMPS *);
   ~ImproperHybrid();
   void compute(int, int);
   void settings(int, char **);
   void coeff(int, char **);
   void write_restart(FILE *);
   void read_restart(FILE *);
   double memory_usage();
 
  private:
   int *map;                     // which style each improper type points to
 
   int *nimproperlist;           // # of impropers in sub-style improperlists
   int *maximproper;             // max # of impropers sub-style lists can store
   int ***improperlist;          // improperlist for each sub-style
 
   void allocate();
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
-UNDOCUMENTED
+Self-explanatory.  Check the input script syntax and compare to the
+documentation for the command.  You can use -echo screen as a
+command-line option when running LAMMPS to see the offending line.
 
 E: Improper style hybrid cannot use same improper style twice
 
 Self-explanatory.
 
 E: Improper style hybrid cannot have hybrid as an argument
 
 Self-explanatory.
 
 E: Improper style hybrid cannot have none as an argument
 
 Self-explanatory.
 
 E: Improper coeff for hybrid has invalid style
 
 Improper style hybrid uses another improper style as one of its
 coefficients.  The improper style used in the improper_coeff command
 or read from a restart file is not recognized.
 
 */
diff --git a/src/RIGID/fix_rigid.cpp b/src/RIGID/fix_rigid.cpp
index 1b696d45b..e7e6c8336 100644
--- a/src/RIGID/fix_rigid.cpp
+++ b/src/RIGID/fix_rigid.cpp
@@ -1,2355 +1,2355 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "fix_rigid.h"
 #include "math_extra.h"
 #include "atom.h"
 #include "atom_vec_ellipsoid.h"
 #include "atom_vec_line.h"
 #include "atom_vec_tri.h"
 #include "domain.h"
 #include "update.h"
 #include "respa.h"
 #include "modify.h"
 #include "group.h"
 #include "comm.h"
 #include "random_mars.h"
 #include "force.h"
 #include "output.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;
 
 enum{SINGLE,MOLECULE,GROUP};
 enum{NONE,XYZ,XY,YZ,XZ};
 enum{ISO,ANISO,TRICLINIC};
 
 #define MAXLINE 256
 #define CHUNK 1024
 #define ATTRIBUTE_PERBODY 11
 
 #define TOLERANCE 1.0e-6
 #define EPSILON 1.0e-7
 
 #define SINERTIA 0.4            // moment of inertia prefactor for sphere
 #define EINERTIA 0.4            // moment of inertia prefactor for ellipsoid
 #define LINERTIA (1.0/12.0)     // moment of inertia prefactor for line segment
 
 /* ---------------------------------------------------------------------- */
 
 FixRigid::FixRigid(LAMMPS *lmp, int narg, char **arg) :
   Fix(lmp, narg, arg)
 {
   int i,ibody;
 
   scalar_flag = 1;
   extscalar = 0;
   time_integrate = 1;
   rigid_flag = 1;
   virial_flag = 1;
   create_attribute = 1;
 
   MPI_Comm_rank(world,&me);
   MPI_Comm_size(world,&nprocs);
 
   // perform initial allocation of atom-based arrays
   // register with Atom class
 
   extended = orientflag = dorientflag = 0;
   body = NULL;
   displace = NULL;
   eflags = NULL;
   orient = NULL;
   dorient = NULL;
   grow_arrays(atom->nmax);
   atom->add_callback(0);
 
   // parse args for rigid body specification
   // set nbody and body[i] for each atom
 
   if (narg < 4) error->all(FLERR,"Illegal fix rigid command");
   int iarg;
 
   mol2body = NULL;
 
   // single rigid body
   // nbody = 1
   // all atoms in fix group are part of body
 
   if (strcmp(arg[3],"single") == 0) {
     rstyle = SINGLE;
     iarg = 4;
     nbody = 1;
 
     int *mask = atom->mask;
     int nlocal = atom->nlocal;
 
     for (i = 0; i < nlocal; i++) {
       body[i] = -1;
       if (mask[i] & groupbit) body[i] = 0;
     }
 
   // each molecule in fix group is a rigid body
   // maxmol = largest molecule #
   // ncount = # of atoms in each molecule (have to sum across procs)
   // nbody = # of non-zero ncount values
   // use nall as incremented ptr to set body[] values for each atom
 
   } else if (strcmp(arg[3],"molecule") == 0) {
     rstyle = MOLECULE;
     iarg = 4;
     if (atom->molecule_flag == 0)
       error->all(FLERR,"Fix rigid molecule requires atom attribute molecule");
 
     int *mask = atom->mask;
     int *molecule = atom->molecule;
     int nlocal = atom->nlocal;
 
     maxmol = -1;
     for (i = 0; i < nlocal; i++)
       if (mask[i] & groupbit) maxmol = MAX(maxmol,molecule[i]);
 
     int itmp;
     MPI_Allreduce(&maxmol,&itmp,1,MPI_INT,MPI_MAX,world);
     maxmol = itmp;
 
     int *ncount;
     memory->create(ncount,maxmol+1,"rigid:ncount");
     for (i = 0; i <= maxmol; i++) ncount[i] = 0;
 
     for (i = 0; i < nlocal; i++)
       if (mask[i] & groupbit) ncount[molecule[i]]++;
 
     memory->create(mol2body,maxmol+1,"rigid:ncount");
     MPI_Allreduce(ncount,mol2body,maxmol+1,MPI_INT,MPI_SUM,world);
 
     nbody = 0;
     for (i = 0; i <= maxmol; i++)
       if (mol2body[i]) mol2body[i] = nbody++;
       else mol2body[i] = -1;
 
     for (i = 0; i < nlocal; i++) {
       body[i] = -1;
       if (mask[i] & groupbit) body[i] = mol2body[molecule[i]];
     }
 
     memory->destroy(ncount);
 
   // each listed group is a rigid body
   // check if all listed groups exist
   // an atom must belong to fix group and listed group to be in rigid body
   // error if atom belongs to more than 1 rigid body
 
   } else if (strcmp(arg[3],"group") == 0) {
     if (narg < 5) error->all(FLERR,"Illegal fix rigid command");
     rstyle = GROUP;
     nbody = atoi(arg[4]);
     if (nbody <= 0) error->all(FLERR,"Illegal fix rigid command");
     if (narg < 5+nbody) error->all(FLERR,"Illegal fix rigid command");
     iarg = 5+nbody;
 
     int *igroups = new int[nbody];
     for (ibody = 0; ibody < nbody; ibody++) {
       igroups[ibody] = group->find(arg[5+ibody]);
       if (igroups[ibody] == -1)
         error->all(FLERR,"Could not find fix rigid group ID");
     }
 
     int *mask = atom->mask;
     int nlocal = atom->nlocal;
 
     int flag = 0;
     for (i = 0; i < nlocal; i++) {
       body[i] = -1;
       if (mask[i] & groupbit)
         for (ibody = 0; ibody < nbody; ibody++)
           if (mask[i] & group->bitmask[igroups[ibody]]) {
             if (body[i] >= 0) flag = 1;
             body[i] = ibody;
           }
     }
 
     int flagall;
     MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world);
     if (flagall)
       error->all(FLERR,"One or more atoms belong to multiple rigid bodies");
 
     delete [] igroups;
 
   } else error->all(FLERR,"Illegal fix rigid command");
 
   // error check on nbody
 
   if (nbody == 0) error->all(FLERR,"No rigid bodies defined");
 
   // create all nbody-length arrays
 
   memory->create(nrigid,nbody,"rigid:nrigid");
   memory->create(masstotal,nbody,"rigid:masstotal");
   memory->create(xcm,nbody,3,"rigid:xcm");
   memory->create(vcm,nbody,3,"rigid:vcm");
   memory->create(fcm,nbody,3,"rigid:fcm");
   memory->create(inertia,nbody,3,"rigid:inertia");
   memory->create(ex_space,nbody,3,"rigid:ex_space");
   memory->create(ey_space,nbody,3,"rigid:ey_space");
   memory->create(ez_space,nbody,3,"rigid:ez_space");
   memory->create(angmom,nbody,3,"rigid:angmom");
   memory->create(omega,nbody,3,"rigid:omega");
   memory->create(torque,nbody,3,"rigid:torque");
   memory->create(quat,nbody,4,"rigid:quat");
   memory->create(imagebody,nbody,"rigid:imagebody");
   memory->create(fflag,nbody,3,"rigid:fflag");
   memory->create(tflag,nbody,3,"rigid:tflag");
   memory->create(langextra,nbody,6,"rigid:langextra");
 
   memory->create(sum,nbody,6,"rigid:sum");
   memory->create(all,nbody,6,"rigid:all");
   memory->create(remapflag,nbody,4,"rigid:remapflag");
 
   // initialize force/torque flags to default = 1.0
   // for 2d: fz, tx, ty = 0.0
 
   array_flag = 1;
   size_array_rows = nbody;
   size_array_cols = 15;
   global_freq = 1;
   extarray = 0;
 
   for (i = 0; i < nbody; i++) {
     fflag[i][0] = fflag[i][1] = fflag[i][2] = 1.0;
     tflag[i][0] = tflag[i][1] = tflag[i][2] = 1.0;
     if (domain->dimension == 2) fflag[i][2] = tflag[i][0] = tflag[i][1] = 0.0;
   }
 
   // parse optional args
 
   int seed;
   langflag = 0;
   tstat_flag = 0;
   pstat_flag = 0;
   allremap = 1;
   id_dilate = NULL;
   t_chain = 10;
   t_iter = 1;
   t_order = 3;
   p_chain = 10;
   infile = NULL;
 
   pcouple = NONE;
   pstyle = ANISO;
   dimension = domain->dimension;
   
   for (int i = 0; i < 3; i++) {
     p_start[i] = p_stop[i] = p_period[i] = 0.0;
     p_flag[i] = 0;
   }
   
   while (iarg < narg) {
     if (strcmp(arg[iarg],"force") == 0) {
       if (iarg+5 > narg) error->all(FLERR,"Illegal fix rigid command");
 
       int mlo,mhi;
       force->bounds(arg[iarg+1],nbody,mlo,mhi);
 
       double xflag,yflag,zflag;
       if (strcmp(arg[iarg+2],"off") == 0) xflag = 0.0;
       else if (strcmp(arg[iarg+2],"on") == 0) xflag = 1.0;
       else error->all(FLERR,"Illegal fix rigid command");
       if (strcmp(arg[iarg+3],"off") == 0) yflag = 0.0;
       else if (strcmp(arg[iarg+3],"on") == 0) yflag = 1.0;
       else error->all(FLERR,"Illegal fix rigid command");
       if (strcmp(arg[iarg+4],"off") == 0) zflag = 0.0;
       else if (strcmp(arg[iarg+4],"on") == 0) zflag = 1.0;
       else error->all(FLERR,"Illegal fix rigid command");
 
       if (domain->dimension == 2 && zflag == 1.0)
         error->all(FLERR,"Fix rigid z force cannot be on for 2d simulation");
 
       int count = 0;
       for (int m = mlo; m <= mhi; m++) {
         fflag[m-1][0] = xflag;
         fflag[m-1][1] = yflag;
         fflag[m-1][2] = zflag;
         count++;
       }
       if (count == 0) error->all(FLERR,"Illegal fix rigid command");
 
       iarg += 5;
 
     } else if (strcmp(arg[iarg],"torque") == 0) {
       if (iarg+5 > narg) error->all(FLERR,"Illegal fix rigid command");
 
       int mlo,mhi;
       force->bounds(arg[iarg+1],nbody,mlo,mhi);
 
       double xflag,yflag,zflag;
       if (strcmp(arg[iarg+2],"off") == 0) xflag = 0.0;
       else if (strcmp(arg[iarg+2],"on") == 0) xflag = 1.0;
       else error->all(FLERR,"Illegal fix rigid command");
       if (strcmp(arg[iarg+3],"off") == 0) yflag = 0.0;
       else if (strcmp(arg[iarg+3],"on") == 0) yflag = 1.0;
       else error->all(FLERR,"Illegal fix rigid command");
       if (strcmp(arg[iarg+4],"off") == 0) zflag = 0.0;
       else if (strcmp(arg[iarg+4],"on") == 0) zflag = 1.0;
       else error->all(FLERR,"Illegal fix rigid command");
 
       if (domain->dimension == 2 && (xflag == 1.0 || yflag == 1.0))
         error->all(FLERR,"Fix rigid xy torque cannot be on for 2d simulation");
 
       int count = 0;
       for (int m = mlo; m <= mhi; m++) {
         tflag[m-1][0] = xflag;
         tflag[m-1][1] = yflag;
         tflag[m-1][2] = zflag;
         count++;
       }
       if (count == 0) error->all(FLERR,"Illegal fix rigid command");
 
       iarg += 5;
 
     } else if (strcmp(arg[iarg],"langevin") == 0) {
       if (iarg+5 > narg) error->all(FLERR,"Illegal fix rigid command");
       if (strcmp(style,"rigid") != 0 && strcmp(style,"rigid/nve") != 0)
         error->all(FLERR,"Illegal fix rigid command");
       langflag = 1;
       t_start = atof(arg[iarg+1]);
       t_stop = atof(arg[iarg+2]);
       t_period = atof(arg[iarg+3]);
       seed = atoi(arg[iarg+4]);
       if (t_period <= 0.0)
         error->all(FLERR,"Fix rigid langevin period must be > 0.0");
       if (seed <= 0) error->all(FLERR,"Illegal fix rigid command");
       iarg += 5;
 
     } else if (strcmp(arg[iarg],"temp") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix rigid command");
       if (strcmp(style,"rigid/nvt") != 0 && strcmp(style,"rigid/npt") != 0)
         error->all(FLERR,"Illegal fix rigid command");
       tstat_flag = 1;
       t_start = atof(arg[iarg+1]);
       t_stop = atof(arg[iarg+2]);
       t_period = atof(arg[iarg+3]);
       iarg += 4;
 
     } else if (strcmp(arg[iarg],"iso") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix rigid command");
       if (strcmp(style,"rigid/npt") != 0 && strcmp(style,"rigid/nph") != 0)
 	      error->all(FLERR,"Illegal fix rigid command");
       pcouple = XYZ;
       p_start[0] = p_start[1] = p_start[2] = atof(arg[iarg+1]);
       p_stop[0] = p_stop[1] = p_stop[2] = atof(arg[iarg+2]);
       p_period[0] = p_period[1] = p_period[2] = atof(arg[iarg+3]);
       p_flag[0] = p_flag[1] = p_flag[2] = 1;
       if (dimension == 2) {
 	      p_start[2] = p_stop[2] = p_period[2] = 0.0;
       	p_flag[2] = 0;
       }
       iarg += 4;
 
     } else if (strcmp(arg[iarg],"aniso") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix rigid command");
       if (strcmp(style,"rigid/npt") != 0 && strcmp(style,"rigid/nph") != 0)
 	      error->all(FLERR,"Illegal fix rigid command");
       p_start[0] = p_start[1] = p_start[2] = atof(arg[iarg+1]);
       p_stop[0] = p_stop[1] = p_stop[2] = atof(arg[iarg+2]);
       p_period[0] = p_period[1] = p_period[2] = atof(arg[iarg+3]);
       p_flag[0] = p_flag[1] = p_flag[2] = 1;
       if (dimension == 2) {
       	p_start[2] = p_stop[2] = p_period[2] = 0.0;
 	      p_flag[2] = 0;
       }
       iarg += 4;
 
     } else if (strcmp(arg[iarg],"x") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix rigid command");
       p_start[0] = atof(arg[iarg+1]);
       p_stop[0] = atof(arg[iarg+2]);
       p_period[0] = atof(arg[iarg+3]);
       p_flag[0] = 1;
       iarg += 4;
 
     } else if (strcmp(arg[iarg],"y") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix rigid command");
       p_start[1] = atof(arg[iarg+1]);
       p_stop[1] = atof(arg[iarg+2]);
       p_period[1] = atof(arg[iarg+3]);
       p_flag[1] = 1;
       iarg += 4;
 
     } else if (strcmp(arg[iarg],"z") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix rigid command");
       p_start[2] = atof(arg[iarg+1]);
       p_stop[2] = atof(arg[iarg+2]);
       p_period[2] = atof(arg[iarg+3]);
       p_flag[2] = 1;
       iarg += 4;
 
     } else if (strcmp(arg[iarg],"couple") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix rigid command");
       if (strcmp(arg[iarg+1],"xyz") == 0) pcouple = XYZ;
       else if (strcmp(arg[iarg+1],"xy") == 0) pcouple = XY;
       else if (strcmp(arg[iarg+1],"yz") == 0) pcouple = YZ;
       else if (strcmp(arg[iarg+1],"xz") == 0) pcouple = XZ;
       else if (strcmp(arg[iarg+1],"none") == 0) pcouple = NONE;
       else error->all(FLERR,"Illegal fix rigid command");
       iarg += 2;
 
     } else if (strcmp(arg[iarg],"dilate") == 0) {
       if (iarg+2 > narg) 
         error->all(FLERR,"Illegal fix rigid nvt/npt/nph command");
       if (strcmp(arg[iarg+1],"all") == 0) allremap = 1;
       else {
         allremap = 0;
         delete [] id_dilate;
         int n = strlen(arg[iarg+1]) + 1;
         id_dilate = new char[n];
         strcpy(id_dilate,arg[iarg+1]);
         int idilate = group->find(id_dilate);
         if (idilate == -1)
           error->all(FLERR,
-                     "Fix rigid nvt/npt/nph dilate group ID does not exist");
+                     "Fix rigid npt/nph dilate group ID does not exist");
       }
       iarg += 2;
 
     } else if (strcmp(arg[iarg],"tparam") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix rigid command");
       if (strcmp(style,"rigid/nvt") != 0 && strcmp(style,"rigid/npt") != 0)
         error->all(FLERR,"Illegal fix rigid command");
       t_chain = atoi(arg[iarg+1]);
       t_iter = atoi(arg[iarg+2]);
       t_order = atoi(arg[iarg+3]);
       iarg += 4;
 
     } else if (strcmp(arg[iarg],"pchain") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix rigid command");
       if (strcmp(style,"rigid/npt") != 0 && strcmp(style,"rigid/nph") != 0)
         error->all(FLERR,"Illegal fix rigid command");
       p_chain = atoi(arg[iarg+1]);
       iarg += 2;
 
     } else if (strcmp(arg[iarg],"infile") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix rigid command");
       delete [] infile;
       int n = strlen(arg[iarg+1]) + 1;
       infile = new char[n];
       strcpy(infile,arg[iarg+1]);
       iarg += 2;
 
     } else error->all(FLERR,"Illegal fix rigid command");
   }
   
   // set pstat_flag
 
   pstat_flag = 0;
   for (int i = 0; i < 3; i++) 
     if (p_flag[i]) pstat_flag = 1;
 
   if (pcouple == XYZ || (dimension == 2 && pcouple == XY)) pstyle = ISO;
   else pstyle = ANISO;
   
   // initialize Marsaglia RNG with processor-unique seed
 
   if (langflag) random = new RanMars(lmp,seed + me);
   else random = NULL;
 
   // initialize vector output quantities in case accessed before run
 
   for (i = 0; i < nbody; i++) {
     xcm[i][0] = xcm[i][1] = xcm[i][2] = 0.0;
     vcm[i][0] = vcm[i][1] = vcm[i][2] = 0.0;
     fcm[i][0] = fcm[i][1] = fcm[i][2] = 0.0;
     torque[i][0] = torque[i][1] = torque[i][2] = 0.0;
   }
 
   // nrigid[n] = # of atoms in Nth rigid body
   // error if one or zero atoms
 
   int *ncount = new int[nbody];
   for (ibody = 0; ibody < nbody; ibody++) ncount[ibody] = 0;
 
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++)
     if (body[i] >= 0) ncount[body[i]]++;
 
   MPI_Allreduce(ncount,nrigid,nbody,MPI_INT,MPI_SUM,world);
   delete [] ncount;
 
   for (ibody = 0; ibody < nbody; ibody++)
     if (nrigid[ibody] <= 1) error->all(FLERR,"One or zero atoms in rigid body");
 
   // bitmasks for properties of extended particles
 
   POINT = 1;
   SPHERE = 2;
   ELLIPSOID = 4;
   LINE = 8;
   TRIANGLE = 16;
   DIPOLE = 32;
   OMEGA = 64;
   ANGMOM = 128;
   TORQUE = 256;
 
   MINUSPI = -MY_PI;
   TWOPI = 2.0*MY_PI;
 
   // print statistics
 
   int nsum = 0;
   for (ibody = 0; ibody < nbody; ibody++) nsum += nrigid[ibody];
 
   if (me == 0) {
     if (screen) fprintf(screen,"%d rigid bodies with %d atoms\n",nbody,nsum);
     if (logfile) fprintf(logfile,"%d rigid bodies with %d atoms\n",nbody,nsum);
   }
 
   // firstflag = 1 triggers one-time initialization of rigid body attributes
 
   firstflag = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 FixRigid::~FixRigid()
 {
   // unregister callbacks to this fix from Atom class
 
   atom->delete_callback(id,0);
 
   delete random;
   delete [] infile;
   memory->destroy(mol2body);
 
   // delete locally stored arrays
 
   memory->destroy(body);
   memory->destroy(displace);
   memory->destroy(eflags);
   memory->destroy(orient);
   memory->destroy(dorient);
 
   // delete nbody-length arrays
 
   memory->destroy(nrigid);
   memory->destroy(masstotal);
   memory->destroy(xcm);
   memory->destroy(vcm);
   memory->destroy(fcm);
   memory->destroy(inertia);
   memory->destroy(ex_space);
   memory->destroy(ey_space);
   memory->destroy(ez_space);
   memory->destroy(angmom);
   memory->destroy(omega);
   memory->destroy(torque);
   memory->destroy(quat);
   memory->destroy(imagebody);
   memory->destroy(fflag);
   memory->destroy(tflag);
   memory->destroy(langextra);
 
   memory->destroy(sum);
   memory->destroy(all);
   memory->destroy(remapflag);
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixRigid::setmask()
 {
   int mask = 0;
   mask |= INITIAL_INTEGRATE;
   mask |= FINAL_INTEGRATE;
   if (langflag) mask |= POST_FORCE;
   mask |= PRE_NEIGHBOR;
   mask |= INITIAL_INTEGRATE_RESPA;
   mask |= FINAL_INTEGRATE_RESPA;
   return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigid::init()
 {
   int i,ibody;
 
   triclinic = domain->triclinic;
 
   // atom style pointers to particles that store extra info
 
   avec_ellipsoid = (AtomVecEllipsoid *) atom->style_match("ellipsoid");
   avec_line = (AtomVecLine *) atom->style_match("line");
   avec_tri = (AtomVecTri *) atom->style_match("tri");
 
   // warn if more than one rigid fix
 
   int count = 0;
   for (i = 0; i < modify->nfix; i++)
     if (strcmp(modify->fix[i]->style,"rigid") == 0) count++;
   if (count > 1 && me == 0) error->warning(FLERR,"More than one fix rigid");
 
   // error if npt,nph fix comes before rigid fix
 
   for (i = 0; i < modify->nfix; i++) {
     if (strcmp(modify->fix[i]->style,"npt") == 0) break;
     if (strcmp(modify->fix[i]->style,"nph") == 0) break;
   }
   if (i < modify->nfix) {
     for (int j = i; j < modify->nfix; j++)
       if (strcmp(modify->fix[j]->style,"rigid") == 0)
         error->all(FLERR,"Rigid fix must come before NPT/NPH fix");
   }
 
   // timestep info
 
   dtv = update->dt;
   dtf = 0.5 * update->dt * force->ftm2v;
   dtq = 0.5 * update->dt;
 
   if (strstr(update->integrate_style,"respa"))
     step_respa = ((Respa *) update->integrate)->step;
 
   // one-time initialization of rigid body attributes
   // extended flags, masstotal, COM, inertia tensor
 
   if (firstflag) setup_bodies();
   firstflag = 0;
 
   // temperature scale factor
 
   double ndof = 0.0;
   for (ibody = 0; ibody < nbody; ibody++) {
     ndof += fflag[ibody][0] + fflag[ibody][1] + fflag[ibody][2];
     ndof += tflag[ibody][0] + tflag[ibody][1] + tflag[ibody][2];
   }
   if (ndof > 0.0) tfactor = force->mvv2e / (ndof * force->boltz);
   else tfactor = 0.0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigid::setup(int vflag)
 {
   int i,n,ibody;
   double massone,radone;
 
   // vcm = velocity of center-of-mass of each rigid body
   // fcm = force on center-of-mass of each rigid body
 
   double **v = atom->v;
   double **f = atom->f;
   double *rmass = atom->rmass;
   double *mass = atom->mass;
   int *type = atom->type;
   int nlocal = atom->nlocal;
 
   for (ibody = 0; ibody < nbody; ibody++)
     for (i = 0; i < 6; i++) sum[ibody][i] = 0.0;
 
   for (i = 0; i < nlocal; i++) {
     if (body[i] < 0) continue;
     ibody = body[i];
     if (rmass) massone = rmass[i];
     else massone = mass[type[i]];
 
     sum[ibody][0] += v[i][0] * massone;
     sum[ibody][1] += v[i][1] * massone;
     sum[ibody][2] += v[i][2] * massone;
     sum[ibody][3] += f[i][0];
     sum[ibody][4] += f[i][1];
     sum[ibody][5] += f[i][2];
   }
 
   MPI_Allreduce(sum[0],all[0],6*nbody,MPI_DOUBLE,MPI_SUM,world);
 
   for (ibody = 0; ibody < nbody; ibody++) {
     vcm[ibody][0] = all[ibody][0]/masstotal[ibody];
     vcm[ibody][1] = all[ibody][1]/masstotal[ibody];
     vcm[ibody][2] = all[ibody][2]/masstotal[ibody];
     fcm[ibody][0] = all[ibody][3];
     fcm[ibody][1] = all[ibody][4];
     fcm[ibody][2] = all[ibody][5];
   }
 
   // angmom = angular momentum of each rigid body
   // torque = torque on each rigid body
 
   tagint *image = atom->image;
   double **x = atom->x;
 
   double dx,dy,dz;
   double unwrap[3];
 
   for (ibody = 0; ibody < nbody; ibody++)
     for (i = 0; i < 6; i++) sum[ibody][i] = 0.0;
 
   for (i = 0; i < nlocal; i++) {
     if (body[i] < 0) continue;
     ibody = body[i];
 
     domain->unmap(x[i],image[i],unwrap);
     dx = unwrap[0] - xcm[ibody][0];
     dy = unwrap[1] - xcm[ibody][1];
     dz = unwrap[2] - xcm[ibody][2];
 
     if (rmass) massone = rmass[i];
     else massone = mass[type[i]];
 
     sum[ibody][0] += dy * massone*v[i][2] - dz * massone*v[i][1];
     sum[ibody][1] += dz * massone*v[i][0] - dx * massone*v[i][2];
     sum[ibody][2] += dx * massone*v[i][1] - dy * massone*v[i][0];
     sum[ibody][3] += dy * f[i][2] - dz * f[i][1];
     sum[ibody][4] += dz * f[i][0] - dx * f[i][2];
     sum[ibody][5] += dx * f[i][1] - dy * f[i][0];
   }
 
   // extended particles add their rotation/torque to angmom/torque of body
 
   if (extended) {
     AtomVecLine::Bonus *lbonus;
     if (avec_line) lbonus = avec_line->bonus;
     double **omega_one = atom->omega;
     double **angmom_one = atom->angmom;
     double **torque_one = atom->torque;
     double *radius = atom->radius;
     int *line = atom->line;
 
     for (i = 0; i < nlocal; i++) {
       if (body[i] < 0) continue;
       ibody = body[i];
 
       if (eflags[i] & OMEGA) {
         if (eflags[i] & SPHERE) {
           radone = radius[i];
           sum[ibody][0] += SINERTIA*rmass[i] * radone*radone * omega_one[i][0];
           sum[ibody][1] += SINERTIA*rmass[i] * radone*radone * omega_one[i][1];
           sum[ibody][2] += SINERTIA*rmass[i] * radone*radone * omega_one[i][2];
         } else if (eflags[i] & LINE) {
           radone = lbonus[line[i]].length;
           sum[ibody][2] += LINERTIA*rmass[i] * radone*radone * omega_one[i][2];
         }
       }
       if (eflags[i] & ANGMOM) {
         sum[ibody][0] += angmom_one[i][0];
         sum[ibody][1] += angmom_one[i][1];
         sum[ibody][2] += angmom_one[i][2];
       }
       if (eflags[i] & TORQUE) {
         sum[ibody][3] += torque_one[i][0];
         sum[ibody][4] += torque_one[i][1];
         sum[ibody][5] += torque_one[i][2];
       }
     }
   }
 
   MPI_Allreduce(sum[0],all[0],6*nbody,MPI_DOUBLE,MPI_SUM,world);
 
   for (ibody = 0; ibody < nbody; ibody++) {
     angmom[ibody][0] = all[ibody][0];
     angmom[ibody][1] = all[ibody][1];
     angmom[ibody][2] = all[ibody][2];
     torque[ibody][0] = all[ibody][3];
     torque[ibody][1] = all[ibody][4];
     torque[ibody][2] = all[ibody][5];
   }
 
   // zero langextra in case Langevin thermostat not used
   // no point to calling post_force() here since langextra
   //   is only added to fcm/torque in final_integrate()
 
   for (ibody = 0; ibody < nbody; ibody++)
     for (i = 0; i < 6; i++) langextra[ibody][i] = 0.0;
 
   // virial setup before call to set_v
 
   if (vflag) v_setup(vflag);
   else evflag = 0;
 
   // set velocities from angmom & omega
 
   for (ibody = 0; ibody < nbody; ibody++)
     MathExtra::angmom_to_omega(angmom[ibody],ex_space[ibody],ey_space[ibody],
                                ez_space[ibody],inertia[ibody],omega[ibody]);
   set_v();
 
   // guesstimate virial as 2x the set_v contribution
 
   if (vflag_global)
     for (n = 0; n < 6; n++) virial[n] *= 2.0;
   if (vflag_atom) {
     for (i = 0; i < nlocal; i++)
       for (n = 0; n < 6; n++)
         vatom[i][n] *= 2.0;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigid::initial_integrate(int vflag)
 {
   double dtfm;
 
   for (int ibody = 0; ibody < nbody; ibody++) {
 
     // update vcm by 1/2 step
 
     dtfm = dtf / masstotal[ibody];
     vcm[ibody][0] += dtfm * fcm[ibody][0] * fflag[ibody][0];
     vcm[ibody][1] += dtfm * fcm[ibody][1] * fflag[ibody][1];
     vcm[ibody][2] += dtfm * fcm[ibody][2] * fflag[ibody][2];
 
     // update xcm by full step
 
     xcm[ibody][0] += dtv * vcm[ibody][0];
     xcm[ibody][1] += dtv * vcm[ibody][1];
     xcm[ibody][2] += dtv * vcm[ibody][2];
 
     // update angular momentum by 1/2 step
 
     angmom[ibody][0] += dtf * torque[ibody][0] * tflag[ibody][0];
     angmom[ibody][1] += dtf * torque[ibody][1] * tflag[ibody][1];
     angmom[ibody][2] += dtf * torque[ibody][2] * tflag[ibody][2];
 
     // compute omega at 1/2 step from angmom at 1/2 step and current q
     // update quaternion a full step via Richardson iteration
     // returns new normalized quaternion, also updated omega at 1/2 step
     // update ex,ey,ez to reflect new quaternion
 
     MathExtra::angmom_to_omega(angmom[ibody],ex_space[ibody],ey_space[ibody],
                                ez_space[ibody],inertia[ibody],omega[ibody]);
     MathExtra::richardson(quat[ibody],angmom[ibody],omega[ibody],
                           inertia[ibody],dtq);
     MathExtra::q_to_exyz(quat[ibody],
                          ex_space[ibody],ey_space[ibody],ez_space[ibody]);
   }
 
   // virial setup before call to set_xv
 
   if (vflag) v_setup(vflag);
   else evflag = 0;
 
   // set coords/orient and velocity/rotation of atoms in rigid bodies
   // from quarternion and omega
 
   set_xv();
 }
 
 /* ----------------------------------------------------------------------
    apply Langevin thermostat to all 6 DOF of rigid bodies
    computed by proc 0, broadcast to other procs
    unlike fix langevin, this stores extra force in extra arrays,
      which are added in when final_integrate() calculates a new fcm/torque
 ------------------------------------------------------------------------- */
 
 void FixRigid::post_force(int vflag)
 {
   if (me == 0) {
     double gamma1,gamma2;
 
     double delta = update->ntimestep - update->beginstep;
     if (delta != 0.0) delta /= update->endstep - update->beginstep;
     t_target = t_start + delta * (t_stop-t_start);
     double tsqrt = sqrt(t_target);
 
     double boltz = force->boltz;
     double dt = update->dt;
     double mvv2e = force->mvv2e;
     double ftm2v = force->ftm2v;
 
     for (int i = 0; i < nbody; i++) {
       gamma1 = -masstotal[i] / t_period / ftm2v;
       gamma2 = sqrt(masstotal[i]) * tsqrt *
         sqrt(24.0*boltz/t_period/dt/mvv2e) / ftm2v;
       langextra[i][0] = gamma1*vcm[i][0] + gamma2*(random->uniform()-0.5);
       langextra[i][1] = gamma1*vcm[i][1] + gamma2*(random->uniform()-0.5);
       langextra[i][2] = gamma1*vcm[i][2] + gamma2*(random->uniform()-0.5);
 
       gamma1 = -1.0 / t_period / ftm2v;
       gamma2 = tsqrt * sqrt(24.0*boltz/t_period/dt/mvv2e) / ftm2v;
       langextra[i][3] = inertia[i][0]*gamma1*omega[i][0] +
         sqrt(inertia[i][0])*gamma2*(random->uniform()-0.5);
       langextra[i][4] = inertia[i][1]*gamma1*omega[i][1] +
         sqrt(inertia[i][1])*gamma2*(random->uniform()-0.5);
       langextra[i][5] = inertia[i][2]*gamma1*omega[i][2] +
         sqrt(inertia[i][2])*gamma2*(random->uniform()-0.5);
     }
   }
 
   MPI_Bcast(&langextra[0][0],6*nbody,MPI_DOUBLE,0,world);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigid::final_integrate()
 {
   int i,ibody;
   double dtfm,xy,xz,yz;
 
   // sum over atoms to get force and torque on rigid body
 
   tagint *image = atom->image;
   double **x = atom->x;
   double **f = atom->f;
   int nlocal = atom->nlocal;
 
   double dx,dy,dz;
   double unwrap[3];
 
   for (ibody = 0; ibody < nbody; ibody++)
     for (i = 0; i < 6; i++) sum[ibody][i] = 0.0;
 
   for (i = 0; i < nlocal; i++) {
     if (body[i] < 0) continue;
     ibody = body[i];
 
     sum[ibody][0] += f[i][0];
     sum[ibody][1] += f[i][1];
     sum[ibody][2] += f[i][2];
 
     domain->unmap(x[i],image[i],unwrap);
     dx = unwrap[0] - xcm[ibody][0];
     dy = unwrap[1] - xcm[ibody][1];
     dz = unwrap[2] - xcm[ibody][2];
 
     sum[ibody][3] += dy*f[i][2] - dz*f[i][1];
     sum[ibody][4] += dz*f[i][0] - dx*f[i][2];
     sum[ibody][5] += dx*f[i][1] - dy*f[i][0];
   }
 
   // extended particles add their torque to torque of body
 
   if (extended) {
     double **torque_one = atom->torque;
 
     for (i = 0; i < nlocal; i++) {
       if (body[i] < 0) continue;
       ibody = body[i];
 
       if (eflags[i] & TORQUE) {
         sum[ibody][3] += torque_one[i][0];
         sum[ibody][4] += torque_one[i][1];
         sum[ibody][5] += torque_one[i][2];
       }
     }
   }
 
   MPI_Allreduce(sum[0],all[0],6*nbody,MPI_DOUBLE,MPI_SUM,world);
 
   // update vcm and angmom
   // include Langevin thermostat forces
   // fflag,tflag = 0 for some dimensions in 2d
 
   for (ibody = 0; ibody < nbody; ibody++) {
     fcm[ibody][0] = all[ibody][0] + langextra[ibody][0];
     fcm[ibody][1] = all[ibody][1] + langextra[ibody][1];
     fcm[ibody][2] = all[ibody][2] + langextra[ibody][2];
     torque[ibody][0] = all[ibody][3] + langextra[ibody][3];
     torque[ibody][1] = all[ibody][4] + langextra[ibody][4];
     torque[ibody][2] = all[ibody][5] + langextra[ibody][5];
 
     // update vcm by 1/2 step
 
     dtfm = dtf / masstotal[ibody];
     vcm[ibody][0] += dtfm * fcm[ibody][0] * fflag[ibody][0];
     vcm[ibody][1] += dtfm * fcm[ibody][1] * fflag[ibody][1];
     vcm[ibody][2] += dtfm * fcm[ibody][2] * fflag[ibody][2];
 
     // update angular momentum by 1/2 step
 
     angmom[ibody][0] += dtf * torque[ibody][0] * tflag[ibody][0];
     angmom[ibody][1] += dtf * torque[ibody][1] * tflag[ibody][1];
     angmom[ibody][2] += dtf * torque[ibody][2] * tflag[ibody][2];
 
     MathExtra::angmom_to_omega(angmom[ibody],ex_space[ibody],ey_space[ibody],
                                ez_space[ibody],inertia[ibody],omega[ibody]);
   }
 
   // set velocity/rotation of atoms in rigid bodies
   // virial is already setup from initial_integrate
 
   set_v();
 }
 
 /* ----------------------------------------------------------------------
    apply evolution operators to quat, quat momentum
    see Miller paper cited in fix rigid/nvt and fix rigid/npt
 ------------------------------------------------------------------------- */
 
 void FixRigid::no_squish_rotate(int k, double *p, double *q,
                                 double *inertia, double dt)
 {
   double phi,c_phi,s_phi,kp[4],kq[4];
 
   // apply permuation operator on p and q, get kp and kq
 
   if (k == 1) {
     kq[0] = -q[1];  kp[0] = -p[1];
     kq[1] =  q[0];  kp[1] =  p[0];
     kq[2] =  q[3];  kp[2] =  p[3];
     kq[3] = -q[2];  kp[3] = -p[2];
   } else if (k == 2) {
     kq[0] = -q[2];  kp[0] = -p[2];
     kq[1] = -q[3];  kp[1] = -p[3];
     kq[2] =  q[0];  kp[2] =  p[0];
     kq[3] =  q[1];  kp[3] =  p[1];
   } else if (k == 3) {
     kq[0] = -q[3];  kp[0] = -p[3];
     kq[1] =  q[2];  kp[1] =  p[2];
     kq[2] = -q[1];  kp[2] = -p[1];
     kq[3] =  q[0];  kp[3] =  p[0];
   }
 
   // obtain phi, cosines and sines
 
   phi = p[0]*kq[0] + p[1]*kq[1] + p[2]*kq[2] + p[3]*kq[3];
   if (fabs(inertia[k-1]) < 1e-6) phi *= 0.0;
   else phi /= 4.0 * inertia[k-1];
   c_phi = cos(dt * phi);
   s_phi = sin(dt * phi);
 
   // advance p and q
 
   p[0] = c_phi*p[0] + s_phi*kp[0];
   p[1] = c_phi*p[1] + s_phi*kp[1];
   p[2] = c_phi*p[2] + s_phi*kp[2];
   p[3] = c_phi*p[3] + s_phi*kp[3];
 
   q[0] = c_phi*q[0] + s_phi*kq[0];
   q[1] = c_phi*q[1] + s_phi*kq[1];
   q[2] = c_phi*q[2] + s_phi*kq[2];
   q[3] = c_phi*q[3] + s_phi*kq[3];
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigid::initial_integrate_respa(int vflag, int ilevel, int iloop)
 {
   dtv = step_respa[ilevel];
   dtf = 0.5 * step_respa[ilevel] * force->ftm2v;
   dtq = 0.5 * step_respa[ilevel];
 
   if (ilevel == 0) initial_integrate(vflag);
   else final_integrate();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigid::final_integrate_respa(int ilevel, int iloop)
 {
   dtf = 0.5 * step_respa[ilevel] * force->ftm2v;
   final_integrate();
 }
 
 /* ----------------------------------------------------------------------
    remap xcm of each rigid body back into periodic simulation box
    done during pre_neighbor so will be after call to pbc()
      and after fix_deform::pre_exchange() may have flipped box
    use domain->remap() in case xcm is far away from box
      due to 1st definition of rigid body or due to box flip
    if don't do this, then atoms of a body which drifts far away
      from a triclinic box will be remapped back into box
      with huge displacements when the box tilt changes via set_x()
    adjust image flag of body and image flags of all atoms in body
 ------------------------------------------------------------------------- */
 
 void FixRigid::pre_neighbor()
 {
   tagint original,oldimage,newimage;
 
   for (int ibody = 0; ibody < nbody; ibody++) {
     original = imagebody[ibody];
     domain->remap(xcm[ibody],imagebody[ibody]);
 
     if (original == imagebody[ibody]) remapflag[ibody][3] = 0;
     else {
       oldimage = original & IMGMASK;
       newimage = imagebody[ibody] & IMGMASK;
       remapflag[ibody][0] = newimage - oldimage;
       oldimage = (original >> IMGBITS) & IMGMASK;
       newimage = (imagebody[ibody] >> IMGBITS) & IMGMASK;
       remapflag[ibody][1] = newimage - oldimage;
       oldimage = original >> IMG2BITS;
       newimage = imagebody[ibody] >> IMG2BITS;
       remapflag[ibody][2] = newimage - oldimage;
       remapflag[ibody][3] = 1;
     }
   }
 
   // adjust image flags of any atom in a rigid body whose xcm was remapped
 
   tagint *image = atom->image;
   int nlocal = atom->nlocal;
 
   int ibody;
   tagint idim,otherdims;
 
   for (int i = 0; i < nlocal; i++) {
     if (body[i] == -1) continue;
     if (remapflag[body[i]][3] == 0) continue;
     ibody = body[i];
 
     if (remapflag[ibody][0]) {
       idim = image[i] & IMGMASK;
       otherdims = image[i] ^ idim;
       idim -= remapflag[ibody][0];
       idim &= IMGMASK;
       image[i] = otherdims | idim;
     }
     if (remapflag[ibody][1]) {
       idim = (image[i] >> IMGBITS) & IMGMASK;
       otherdims = image[i] ^ (idim << IMGBITS);
       idim -= remapflag[ibody][1];
       idim &= IMGMASK;
       image[i] = otherdims | (idim << IMGBITS);
     }
     if (remapflag[ibody][2]) {
       idim = image[i] >> IMG2BITS;
       otherdims = image[i] ^ (idim << IMG2BITS);
       idim -= remapflag[ibody][2];
       idim &= IMGMASK;
       image[i] = otherdims | (idim << IMG2BITS);
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    count # of DOF removed by rigid bodies for atoms in igroup
    return total count of DOF
 ------------------------------------------------------------------------- */
 
 int FixRigid::dof(int tgroup)
 {
   int tgroupbit = group->bitmask[tgroup];
 
   // nall = # of point particles in each rigid body
   // mall = # of finite-size particles in each rigid body
   // particles must also be in temperature group
 
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
   int *ncount = new int[nbody];
   int *mcount = new int[nbody];
   for (int ibody = 0; ibody < nbody; ibody++)
     ncount[ibody] = mcount[ibody] = 0;
 
   for (int i = 0; i < nlocal; i++)
     if (body[i] >= 0 && mask[i] & tgroupbit) {
       if (extended && eflags[i]) mcount[body[i]]++;
       else ncount[body[i]]++;
     }
 
   int *nall = new int[nbody];
   int *mall = new int[nbody];
   MPI_Allreduce(ncount,nall,nbody,MPI_INT,MPI_SUM,world);
   MPI_Allreduce(mcount,mall,nbody,MPI_INT,MPI_SUM,world);
 
   // warn if nall+mall != nrigid for any body included in temperature group
 
   int flag = 0;
   for (int ibody = 0; ibody < nbody; ibody++) {
     if (nall[ibody]+mall[ibody] > 0 &&
         nall[ibody]+mall[ibody] != nrigid[ibody]) flag = 1;
   }
   if (flag && me == 0)
     error->warning(FLERR,"Computing temperature of portions of rigid bodies");
 
   // remove appropriate DOFs for each rigid body wholly in temperature group
   // N = # of point particles in body
   // M = # of finite-size particles in body
   // 3d body has 3N + 6M dof to start with
   // 2d body has 2N + 3M dof to start with
   // 3d point-particle body with all non-zero I should have 6 dof, remove 3N-6
   // 3d point-particle body (linear) with a 0 I should have 5 dof, remove 3N-5
   // 2d point-particle body should have 3 dof, remove 2N-3
   // 3d body with any finite-size M should have 6 dof, remove (3N+6M) - 6
   // 2d body with any finite-size M should have 3 dof, remove (2N+3M) - 3
 
   int n = 0;
   if (domain->dimension == 3) {
     for (int ibody = 0; ibody < nbody; ibody++)
       if (nall[ibody]+mall[ibody] == nrigid[ibody]) {
         n += 3*nall[ibody] + 6*mall[ibody] - 6;
         if (inertia[ibody][0] == 0.0 || inertia[ibody][1] == 0.0 ||
             inertia[ibody][2] == 0.0) n++;
       }
   } else if (domain->dimension == 2) {
     for (int ibody = 0; ibody < nbody; ibody++)
       if (nall[ibody]+mall[ibody] == nrigid[ibody])
         n += 2*nall[ibody] + 3*mall[ibody] - 3;
   }
 
   delete [] ncount;
   delete [] mcount;
   delete [] nall;
   delete [] mall;
 
   return n;
 }
 
 /* ----------------------------------------------------------------------
    adjust xcm of each rigid body due to box deformation
    called by various fixes that change box size/shape
    flag = 0/1 means map from box to lamda coords or vice versa
 ------------------------------------------------------------------------- */
 
 void FixRigid::deform(int flag)
 {
   if (flag == 0)
     for (int ibody = 0; ibody < nbody; ibody++)
       domain->x2lamda(xcm[ibody],xcm[ibody]);
   else
     for (int ibody = 0; ibody < nbody; ibody++)
       domain->lamda2x(xcm[ibody],xcm[ibody]);
 }
 
 /* ----------------------------------------------------------------------
    set space-frame coords and velocity of each atom in each rigid body
    set orientation and rotation of extended particles
    x = Q displace + Xcm, mapped back to periodic box
    v = Vcm + (W cross (x - Xcm))
 ------------------------------------------------------------------------- */
 
 void FixRigid::set_xv()
 {
   int ibody,itype;
   int xbox,ybox,zbox;
   double x0,x1,x2,v0,v1,v2,fc0,fc1,fc2,massone;
   double xy,xz,yz;
   double ione[3],exone[3],eyone[3],ezone[3],vr[6],p[3][3];
 
   tagint *image = atom->image;
   double **x = atom->x;
   double **v = atom->v;
   double **f = atom->f;
   double *rmass = atom->rmass;
   double *mass = atom->mass;
   int *type = atom->type;
   int nlocal = atom->nlocal;
 
   double xprd = domain->xprd;
   double yprd = domain->yprd;
   double zprd = domain->zprd;
 
   if (triclinic) {
     xy = domain->xy;
     xz = domain->xz;
     yz = domain->yz;
   }
 
   // set x and v of each atom
 
   for (int i = 0; i < nlocal; i++) {
     if (body[i] < 0) continue;
     ibody = body[i];
 
     xbox = (image[i] & IMGMASK) - IMGMAX;
     ybox = (image[i] >> IMGBITS & IMGMASK) - IMGMAX;
     zbox = (image[i] >> IMG2BITS) - IMGMAX;
 
     // save old positions and velocities for virial
 
     if (evflag) {
       if (triclinic == 0) {
         x0 = x[i][0] + xbox*xprd;
         x1 = x[i][1] + ybox*yprd;
         x2 = x[i][2] + zbox*zprd;
       } else {
         x0 = x[i][0] + xbox*xprd + ybox*xy + zbox*xz;
         x1 = x[i][1] + ybox*yprd + zbox*yz;
         x2 = x[i][2] + zbox*zprd;
       }
       v0 = v[i][0];
       v1 = v[i][1];
       v2 = v[i][2];
     }
 
     // x = displacement from center-of-mass, based on body orientation
     // v = vcm + omega around center-of-mass
 
     MathExtra::matvec(ex_space[ibody],ey_space[ibody],
                       ez_space[ibody],displace[i],x[i]);
 
     v[i][0] = omega[ibody][1]*x[i][2] - omega[ibody][2]*x[i][1] +
       vcm[ibody][0];
     v[i][1] = omega[ibody][2]*x[i][0] - omega[ibody][0]*x[i][2] +
       vcm[ibody][1];
     v[i][2] = omega[ibody][0]*x[i][1] - omega[ibody][1]*x[i][0] +
       vcm[ibody][2];
 
     // add center of mass to displacement
     // map back into periodic box via xbox,ybox,zbox
     // for triclinic, add in box tilt factors as well
 
     if (triclinic == 0) {
       x[i][0] += xcm[ibody][0] - xbox*xprd;
       x[i][1] += xcm[ibody][1] - ybox*yprd;
       x[i][2] += xcm[ibody][2] - zbox*zprd;
     } else {
       x[i][0] += xcm[ibody][0] - xbox*xprd - ybox*xy - zbox*xz;
       x[i][1] += xcm[ibody][1] - ybox*yprd - zbox*yz;
       x[i][2] += xcm[ibody][2] - zbox*zprd;
     }
 
     // virial = unwrapped coords dotted into body constraint force
     // body constraint force = implied force due to v change minus f external
     // assume f does not include forces internal to body
     // 1/2 factor b/c final_integrate contributes other half
     // assume per-atom contribution is due to constraint force on that atom
 
     if (evflag) {
       if (rmass) massone = rmass[i];
       else massone = mass[type[i]];
       fc0 = massone*(v[i][0] - v0)/dtf - f[i][0];
       fc1 = massone*(v[i][1] - v1)/dtf - f[i][1];
       fc2 = massone*(v[i][2] - v2)/dtf - f[i][2];
 
       vr[0] = 0.5*x0*fc0;
       vr[1] = 0.5*x1*fc1;
       vr[2] = 0.5*x2*fc2;
       vr[3] = 0.5*x0*fc1;
       vr[4] = 0.5*x0*fc2;
       vr[5] = 0.5*x1*fc2;
 
       v_tally(1,&i,1.0,vr);
     }
   }
 
   // set orientation, omega, angmom of each extended particle
 
   if (extended) {
     double theta_body,theta;
     double *shape,*quatatom,*inertiaatom;
 
     AtomVecEllipsoid::Bonus *ebonus;
     if (avec_ellipsoid) ebonus = avec_ellipsoid->bonus;
     AtomVecLine::Bonus *lbonus;
     if (avec_line) lbonus = avec_line->bonus;
     AtomVecTri::Bonus *tbonus;
     if (avec_tri) tbonus = avec_tri->bonus;
     double **omega_one = atom->omega;
     double **angmom_one = atom->angmom;
     double **mu = atom->mu;
     int *ellipsoid = atom->ellipsoid;
     int *line = atom->line;
     int *tri = atom->tri;
 
     for (int i = 0; i < nlocal; i++) {
       if (body[i] < 0) continue;
       ibody = body[i];
 
       if (eflags[i] & SPHERE) {
         omega_one[i][0] = omega[ibody][0];
         omega_one[i][1] = omega[ibody][1];
         omega_one[i][2] = omega[ibody][2];
       } else if (eflags[i] & ELLIPSOID) {
         shape = ebonus[ellipsoid[i]].shape;
         quatatom = ebonus[ellipsoid[i]].quat;
         MathExtra::quatquat(quat[ibody],orient[i],quatatom);
         MathExtra::qnormalize(quatatom);
         ione[0] = EINERTIA*rmass[i] * (shape[1]*shape[1] + shape[2]*shape[2]);
         ione[1] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[2]*shape[2]);
         ione[2] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[1]*shape[1]);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
         MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,ione,
                                    angmom_one[i]);
       } else if (eflags[i] & LINE) {
         if (quat[ibody][3] >= 0.0) theta_body = 2.0*acos(quat[ibody][0]);
         else theta_body = -2.0*acos(quat[ibody][0]);
         theta = orient[i][0] + theta_body;
         while (theta <= MINUSPI) theta += TWOPI;
         while (theta > MY_PI) theta -= TWOPI;
         lbonus[line[i]].theta = theta;
         omega_one[i][0] = omega[ibody][0];
         omega_one[i][1] = omega[ibody][1];
         omega_one[i][2] = omega[ibody][2];
       } else if (eflags[i] & TRIANGLE) {
         inertiaatom = tbonus[tri[i]].inertia;
         quatatom = tbonus[tri[i]].quat;
         MathExtra::quatquat(quat[ibody],orient[i],quatatom);
         MathExtra::qnormalize(quatatom);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
         MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,
                                    inertiaatom,angmom_one[i]);
       }
       if (eflags[i] & DIPOLE) {
         MathExtra::quat_to_mat(quat[ibody],p);
         MathExtra::matvec(p,dorient[i],mu[i]);
         MathExtra::snormalize3(mu[i][3],mu[i],mu[i]);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    set space-frame velocity of each atom in a rigid body
    set omega and angmom of extended particles
    v = Vcm + (W cross (x - Xcm))
 ------------------------------------------------------------------------- */
 
 void FixRigid::set_v()
 {
   int ibody,itype;
   int xbox,ybox,zbox;
   double dx,dy,dz;
   double x0,x1,x2,v0,v1,v2,fc0,fc1,fc2,massone;
   double xy,xz,yz;
   double ione[3],exone[3],eyone[3],ezone[3],delta[3],vr[6];
 
   double **x = atom->x;
   double **v = atom->v;
   double **f = atom->f;
   double *rmass = atom->rmass;
   double *mass = atom->mass;
   int *type = atom->type;
   tagint *image = atom->image;
   int nlocal = atom->nlocal;
 
   double xprd = domain->xprd;
   double yprd = domain->yprd;
   double zprd = domain->zprd;
   if (triclinic) {
     xy = domain->xy;
     xz = domain->xz;
     yz = domain->yz;
   }
 
   // set v of each atom
 
   for (int i = 0; i < nlocal; i++) {
     if (body[i] < 0) continue;
     ibody = body[i];
 
     MathExtra::matvec(ex_space[ibody],ey_space[ibody],
                       ez_space[ibody],displace[i],delta);
 
     // save old velocities for virial
 
     if (evflag) {
       v0 = v[i][0];
       v1 = v[i][1];
       v2 = v[i][2];
     }
 
     v[i][0] = omega[ibody][1]*delta[2] - omega[ibody][2]*delta[1] +
       vcm[ibody][0];
     v[i][1] = omega[ibody][2]*delta[0] - omega[ibody][0]*delta[2] +
       vcm[ibody][1];
     v[i][2] = omega[ibody][0]*delta[1] - omega[ibody][1]*delta[0] +
       vcm[ibody][2];
 
     // virial = unwrapped coords dotted into body constraint force
     // body constraint force = implied force due to v change minus f external
     // assume f does not include forces internal to body
     // 1/2 factor b/c initial_integrate contributes other half
     // assume per-atom contribution is due to constraint force on that atom
 
     if (evflag) {
       if (rmass) massone = rmass[i];
       else massone = mass[type[i]];
       fc0 = massone*(v[i][0] - v0)/dtf - f[i][0];
       fc1 = massone*(v[i][1] - v1)/dtf - f[i][1];
       fc2 = massone*(v[i][2] - v2)/dtf - f[i][2];
 
       xbox = (image[i] & IMGMASK) - IMGMAX;
       ybox = (image[i] >> IMGBITS & IMGMASK) - IMGMAX;
       zbox = (image[i] >> IMG2BITS) - IMGMAX;
 
       if (triclinic == 0) {
         x0 = x[i][0] + xbox*xprd;
         x1 = x[i][1] + ybox*yprd;
         x2 = x[i][2] + zbox*zprd;
       } else {
         x0 = x[i][0] + xbox*xprd + ybox*xy + zbox*xz;
         x1 = x[i][1] + ybox*yprd + zbox*yz;
         x2 = x[i][2] + zbox*zprd;
       }
 
       vr[0] = 0.5*x0*fc0;
       vr[1] = 0.5*x1*fc1;
       vr[2] = 0.5*x2*fc2;
       vr[3] = 0.5*x0*fc1;
       vr[4] = 0.5*x0*fc2;
       vr[5] = 0.5*x1*fc2;
 
       v_tally(1,&i,1.0,vr);
     }
   }
 
   // set omega, angmom of each extended particle
 
   if (extended) {
     double *shape,*quatatom,*inertiaatom;
 
     AtomVecEllipsoid::Bonus *ebonus;
     if (avec_ellipsoid) ebonus = avec_ellipsoid->bonus;
     AtomVecTri::Bonus *tbonus;
     if (avec_tri) tbonus = avec_tri->bonus;
     double **omega_one = atom->omega;
     double **angmom_one = atom->angmom;
     int *ellipsoid = atom->ellipsoid;
     int *tri = atom->tri;
 
     for (int i = 0; i < nlocal; i++) {
       if (body[i] < 0) continue;
       ibody = body[i];
 
       if (eflags[i] & SPHERE) {
         omega_one[i][0] = omega[ibody][0];
         omega_one[i][1] = omega[ibody][1];
         omega_one[i][2] = omega[ibody][2];
       } else if (eflags[i] & ELLIPSOID) {
         shape = ebonus[ellipsoid[i]].shape;
         quatatom = ebonus[ellipsoid[i]].quat;
         ione[0] = EINERTIA*rmass[i] * (shape[1]*shape[1] + shape[2]*shape[2]);
         ione[1] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[2]*shape[2]);
         ione[2] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[1]*shape[1]);
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
         MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,ione,
                                    angmom_one[i]);
       } else if (eflags[i] & LINE) {
         omega_one[i][0] = omega[ibody][0];
         omega_one[i][1] = omega[ibody][1];
         omega_one[i][2] = omega[ibody][2];
       } else if (eflags[i] & TRIANGLE) {
         inertiaatom = tbonus[tri[i]].inertia;
         quatatom = tbonus[tri[i]].quat;
         MathExtra::q_to_exyz(quatatom,exone,eyone,ezone);
         MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,
                                    inertiaatom,angmom_one[i]);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    one-time initialization of rigid body attributes
    extended flags, masstotal, center-of-mass
    Cartesian and diagonalized inertia tensor
    read per-body attributes from infile if specified
 ------------------------------------------------------------------------- */
 
 void FixRigid::setup_bodies()
 {
   int i,itype,ibody;
 
   // extended = 1 if any particle in a rigid body is finite size
   //              or has a dipole moment
 
   extended = orientflag = dorientflag = 0;
 
   AtomVecEllipsoid::Bonus *ebonus;
   if (avec_ellipsoid) ebonus = avec_ellipsoid->bonus;
   AtomVecLine::Bonus *lbonus;
   if (avec_line) lbonus = avec_line->bonus;
   AtomVecTri::Bonus *tbonus;
   if (avec_tri) tbonus = avec_tri->bonus;
   double **mu = atom->mu;
   double *radius = atom->radius;
   double *rmass = atom->rmass;
   double *mass = atom->mass;
   int *ellipsoid = atom->ellipsoid;
   int *line = atom->line;
   int *tri = atom->tri;
   int *type = atom->type;
   int nlocal = atom->nlocal;
 
   if (atom->radius_flag || atom->ellipsoid_flag || atom->line_flag ||
       atom->tri_flag || atom->mu_flag) {
     int flag = 0;
     for (i = 0; i < nlocal; i++) {
       if (body[i] < 0) continue;
       if (radius && radius[i] > 0.0) flag = 1;
       if (ellipsoid && ellipsoid[i] >= 0) flag = 1;
       if (line && line[i] >= 0) flag = 1;
       if (tri && tri[i] >= 0) flag = 1;
       if (mu && mu[i][3] > 0.0) flag = 1;
     }
 
     MPI_Allreduce(&flag,&extended,1,MPI_INT,MPI_MAX,world);
   }
 
   // grow extended arrays and set extended flags for each particle
   // orientflag = 4 if any particle stores ellipsoid or tri orientation
   // orientflag = 1 if any particle stores line orientation
   // dorientflag = 1 if any particle stores dipole orientation
 
   if (extended) {
     if (atom->ellipsoid_flag) orientflag = 4;
     if (atom->line_flag) orientflag = 1;
     if (atom->tri_flag) orientflag = 4;
     if (atom->mu_flag) dorientflag = 1;
     grow_arrays(atom->nmax);
 
     for (i = 0; i < nlocal; i++) {
       eflags[i] = 0;
       if (body[i] < 0) continue;
 
       // set to POINT or SPHERE or ELLIPSOID or LINE
 
       if (radius && radius[i] > 0.0) {
         eflags[i] |= SPHERE;
         eflags[i] |= OMEGA;
         eflags[i] |= TORQUE;
       } else if (ellipsoid && ellipsoid[i] >= 0) {
         eflags[i] |= ELLIPSOID;
         eflags[i] |= ANGMOM;
         eflags[i] |= TORQUE;
       } else if (line && line[i] >= 0) {
         eflags[i] |= LINE;
         eflags[i] |= OMEGA;
         eflags[i] |= TORQUE;
       } else if (tri && tri[i] >= 0) {
         eflags[i] |= TRIANGLE;
         eflags[i] |= ANGMOM;
         eflags[i] |= TORQUE;
       } else eflags[i] |= POINT;
 
       // set DIPOLE if atom->mu and mu[3] > 0.0
 
       if (atom->mu_flag && mu[i][3] > 0.0)
         eflags[i] |= DIPOLE;
     }
   }
 
   // compute masstotal & center-of-mass of each rigid body
   // error if image flag is not 0 in a non-periodic dim
 
   double **x = atom->x;
   tagint *image = atom->image;
 
   int *periodicity = domain->periodicity;
   double xprd = domain->xprd;
   double yprd = domain->yprd;
   double zprd = domain->zprd;
   double xy = domain->xy;
   double xz = domain->xz;
   double yz = domain->yz;
 
   for (ibody = 0; ibody < nbody; ibody++)
     for (i = 0; i < 6; i++) sum[ibody][i] = 0.0;
   int xbox,ybox,zbox;
   double massone,xunwrap,yunwrap,zunwrap;
 
   for (i = 0; i < nlocal; i++) {
     if (body[i] < 0) continue;
     ibody = body[i];
 
     xbox = (image[i] & IMGMASK) - IMGMAX;
     ybox = (image[i] >> IMGBITS & IMGMASK) - IMGMAX;
     zbox = (image[i] >> IMG2BITS) - IMGMAX;
     if (rmass) massone = rmass[i];
     else massone = mass[type[i]];
 
     if ((xbox && !periodicity[0]) || (ybox && !periodicity[1]) ||
         (zbox && !periodicity[2]))
       error->one(FLERR,"Fix rigid atom has non-zero image flag "
                  "in a non-periodic dimension");
 
     if (triclinic == 0) {
       xunwrap = x[i][0] + xbox*xprd;
       yunwrap = x[i][1] + ybox*yprd;
       zunwrap = x[i][2] + zbox*zprd;
     } else {
       xunwrap = x[i][0] + xbox*xprd + ybox*xy + zbox*xz;
       yunwrap = x[i][1] + ybox*yprd + zbox*yz;
       zunwrap = x[i][2] + zbox*zprd;
     }
 
     sum[ibody][0] += xunwrap * massone;
     sum[ibody][1] += yunwrap * massone;
     sum[ibody][2] += zunwrap * massone;
     sum[ibody][3] += massone;
   }
 
   MPI_Allreduce(sum[0],all[0],6*nbody,MPI_DOUBLE,MPI_SUM,world);
 
   for (ibody = 0; ibody < nbody; ibody++) {
     masstotal[ibody] = all[ibody][3];
     xcm[ibody][0] = all[ibody][0]/masstotal[ibody];
     xcm[ibody][1] = all[ibody][1]/masstotal[ibody];
     xcm[ibody][2] = all[ibody][2]/masstotal[ibody];
   }
 
   // overwrite masstotal and center-of-mass with file values
   // inbody[i] = 0/1 if Ith rigid body is initialized by file
 
   int *inbody;
   if (infile) {
     memory->create(inbody,nbody,"rigid:inbody");
     for (ibody = 0; ibody < nbody; ibody++) inbody[ibody] = 0;
     readfile(0,masstotal,xcm,inbody);
   }
 
   // set image flags for each rigid body to default values
   // then remap the xcm of each body back into simulation box if needed
 
   for (ibody = 0; ibody < nbody; ibody++)
     imagebody[ibody] = ((tagint) IMGMAX << IMG2BITS) | 
       ((tagint) IMGMAX << IMGBITS) | IMGMAX;
 
   pre_neighbor();
 
   // compute 6 moments of inertia of each body in Cartesian reference frame
   // dx,dy,dz = coords relative to center-of-mass
   // symmetric 3x3 inertia tensor stored in Voigt notation as 6-vector
 
   double dx,dy,dz,rad;
 
   for (ibody = 0; ibody < nbody; ibody++)
     for (i = 0; i < 6; i++) sum[ibody][i] = 0.0;
 
   for (i = 0; i < nlocal; i++) {
     if (body[i] < 0) continue;
     ibody = body[i];
 
     xbox = (image[i] & IMGMASK) - IMGMAX;
     ybox = (image[i] >> IMGBITS & IMGMASK) - IMGMAX;
     zbox = (image[i] >> IMG2BITS) - IMGMAX;
 
     if (triclinic == 0) {
       xunwrap = x[i][0] + xbox*xprd;
       yunwrap = x[i][1] + ybox*yprd;
       zunwrap = x[i][2] + zbox*zprd;
     } else {
       xunwrap = x[i][0] + xbox*xprd + ybox*xy + zbox*xz;
       yunwrap = x[i][1] + ybox*yprd + zbox*yz;
       zunwrap = x[i][2] + zbox*zprd;
     }
 
     dx = xunwrap - xcm[ibody][0];
     dy = yunwrap - xcm[ibody][1];
     dz = zunwrap - xcm[ibody][2];
 
     if (rmass) massone = rmass[i];
     else massone = mass[type[i]];
 
     sum[ibody][0] += massone * (dy*dy + dz*dz);
     sum[ibody][1] += massone * (dx*dx + dz*dz);
     sum[ibody][2] += massone * (dx*dx + dy*dy);
     sum[ibody][3] -= massone * dy*dz;
     sum[ibody][4] -= massone * dx*dz;
     sum[ibody][5] -= massone * dx*dy;
   }
 
   // extended particles may contribute extra terms to moments of inertia
 
   if (extended) {
     double ivec[6];
     double *shape,*quatatom,*inertiaatom;
     double length,theta;
 
     for (i = 0; i < nlocal; i++) {
       if (body[i] < 0) continue;
       ibody = body[i];
       if (rmass) massone = rmass[i];
       else massone = mass[type[i]];
 
       if (eflags[i] & SPHERE) {
         sum[ibody][0] += SINERTIA*massone * radius[i]*radius[i];
         sum[ibody][1] += SINERTIA*massone * radius[i]*radius[i];
         sum[ibody][2] += SINERTIA*massone * radius[i]*radius[i];
       } else if (eflags[i] & ELLIPSOID) {
         shape = ebonus[ellipsoid[i]].shape;
         quatatom = ebonus[ellipsoid[i]].quat;
         MathExtra::inertia_ellipsoid(shape,quatatom,massone,ivec);
         sum[ibody][0] += ivec[0];
         sum[ibody][1] += ivec[1];
         sum[ibody][2] += ivec[2];
         sum[ibody][3] += ivec[3];
         sum[ibody][4] += ivec[4];
         sum[ibody][5] += ivec[5];
       } else if (eflags[i] & LINE) {
         length = lbonus[line[i]].length;
         theta = lbonus[line[i]].theta;
         MathExtra::inertia_line(length,theta,massone,ivec);
         sum[ibody][0] += ivec[0];
         sum[ibody][1] += ivec[1];
         sum[ibody][2] += ivec[2];
         sum[ibody][3] += ivec[3];
         sum[ibody][4] += ivec[4];
         sum[ibody][5] += ivec[5];
       } else if (eflags[i] & TRIANGLE) {
         inertiaatom = tbonus[tri[i]].inertia;
         quatatom = tbonus[tri[i]].quat;
         MathExtra::inertia_triangle(inertiaatom,quatatom,massone,ivec);
         sum[ibody][0] += ivec[0];
         sum[ibody][1] += ivec[1];
         sum[ibody][2] += ivec[2];
         sum[ibody][3] += ivec[3];
         sum[ibody][4] += ivec[4];
         sum[ibody][5] += ivec[5];
       }
     }
   }
 
   MPI_Allreduce(sum[0],all[0],6*nbody,MPI_DOUBLE,MPI_SUM,world);
 
   // overwrite Cartesian inertia tensor with file values
 
   if (infile) readfile(1,NULL,all,inbody);
 
   // diagonalize inertia tensor for each body via Jacobi rotations
   // inertia = 3 eigenvalues = principal moments of inertia
   // evectors and exzy_space = 3 evectors = principal axes of rigid body
 
   int ierror;
   double cross[3];
   double tensor[3][3],evectors[3][3];
 
   for (ibody = 0; ibody < nbody; ibody++) {
     tensor[0][0] = all[ibody][0];
     tensor[1][1] = all[ibody][1];
     tensor[2][2] = all[ibody][2];
     tensor[1][2] = tensor[2][1] = all[ibody][3];
     tensor[0][2] = tensor[2][0] = all[ibody][4];
     tensor[0][1] = tensor[1][0] = all[ibody][5];
 
     ierror = MathExtra::jacobi(tensor,inertia[ibody],evectors);
     if (ierror) error->all(FLERR,
                            "Insufficient Jacobi rotations for rigid body");
 
     ex_space[ibody][0] = evectors[0][0];
     ex_space[ibody][1] = evectors[1][0];
     ex_space[ibody][2] = evectors[2][0];
     ey_space[ibody][0] = evectors[0][1];
     ey_space[ibody][1] = evectors[1][1];
     ey_space[ibody][2] = evectors[2][1];
     ez_space[ibody][0] = evectors[0][2];
     ez_space[ibody][1] = evectors[1][2];
     ez_space[ibody][2] = evectors[2][2];
 
     // if any principal moment < scaled EPSILON, set to 0.0
 
     double max;
     max = MAX(inertia[ibody][0],inertia[ibody][1]);
     max = MAX(max,inertia[ibody][2]);
 
     if (inertia[ibody][0] < EPSILON*max) inertia[ibody][0] = 0.0;
     if (inertia[ibody][1] < EPSILON*max) inertia[ibody][1] = 0.0;
     if (inertia[ibody][2] < EPSILON*max) inertia[ibody][2] = 0.0;
 
     // enforce 3 evectors as a right-handed coordinate system
     // flip 3rd vector if needed
 
     MathExtra::cross3(ex_space[ibody],ey_space[ibody],cross);
     if (MathExtra::dot3(cross,ez_space[ibody]) < 0.0)
       MathExtra::negate3(ez_space[ibody]);
 
     // create initial quaternion
 
     MathExtra::exyz_to_q(ex_space[ibody],ey_space[ibody],ez_space[ibody],
                          quat[ibody]);
   }
 
   // displace = initial atom coords in basis of principal axes
   // set displace = 0.0 for atoms not in any rigid body
   // for extended particles, set their orientation wrt to rigid body
 
   double qc[4],delta[3];
   double *quatatom;
   double theta_body;
 
   for (i = 0; i < nlocal; i++) {
     if (body[i] < 0) {
       displace[i][0] = displace[i][1] = displace[i][2] = 0.0;
       continue;
     }
 
     ibody = body[i];
 
     xbox = (image[i] & IMGMASK) - IMGMAX;
     ybox = (image[i] >> IMGBITS & IMGMASK) - IMGMAX;
     zbox = (image[i] >> IMG2BITS) - IMGMAX;
 
     if (triclinic == 0) {
       xunwrap = x[i][0] + xbox*xprd;
       yunwrap = x[i][1] + ybox*yprd;
       zunwrap = x[i][2] + zbox*zprd;
     } else {
       xunwrap = x[i][0] + xbox*xprd + ybox*xy + zbox*xz;
       yunwrap = x[i][1] + ybox*yprd + zbox*yz;
       zunwrap = x[i][2] + zbox*zprd;
     }
 
     delta[0] = xunwrap - xcm[ibody][0];
     delta[1] = yunwrap - xcm[ibody][1];
     delta[2] = zunwrap - xcm[ibody][2];
     MathExtra::transpose_matvec(ex_space[ibody],ey_space[ibody],
                                 ez_space[ibody],delta,displace[i]);
 
     if (extended) {
       if (eflags[i] & ELLIPSOID) {
         quatatom = ebonus[ellipsoid[i]].quat;
         MathExtra::qconjugate(quat[ibody],qc);
         MathExtra::quatquat(qc,quatatom,orient[i]);
         MathExtra::qnormalize(orient[i]);
       } else if (eflags[i] & LINE) {
         if (quat[ibody][3] >= 0.0) theta_body = 2.0*acos(quat[ibody][0]);
         else theta_body = -2.0*acos(quat[ibody][0]);
         orient[i][0] = lbonus[line[i]].theta - theta_body;
         while (orient[i][0] <= MINUSPI) orient[i][0] += TWOPI;
         while (orient[i][0] > MY_PI) orient[i][0] -= TWOPI;
         if (orientflag == 4) orient[i][1] = orient[i][2] = orient[i][3] = 0.0;
       } else if (eflags[i] & TRIANGLE) {
         quatatom = tbonus[tri[i]].quat;
         MathExtra::qconjugate(quat[ibody],qc);
         MathExtra::quatquat(qc,quatatom,orient[i]);
         MathExtra::qnormalize(orient[i]);
       } else if (orientflag == 4) {
         orient[i][0] = orient[i][1] = orient[i][2] = orient[i][3] = 0.0;
       } else if (orientflag == 1)
         orient[i][0] = 0.0;
 
       if (eflags[i] & DIPOLE) {
         MathExtra::transpose_matvec(ex_space[ibody],ey_space[ibody],
                                     ez_space[ibody],mu[i],dorient[i]);
         MathExtra::snormalize3(mu[i][3],dorient[i],dorient[i]);
       } else if (dorientflag)
         dorient[i][0] = dorient[i][1] = dorient[i][2] = 0.0;
     }
   }
 
   // test for valid principal moments & axes
   // recompute moments of inertia around new axes
   // 3 diagonal moments should equal principal moments
   // 3 off-diagonal moments should be 0.0
   // extended particles may contribute extra terms to moments of inertia
 
   for (ibody = 0; ibody < nbody; ibody++)
     for (i = 0; i < 6; i++) sum[ibody][i] = 0.0;
 
   for (i = 0; i < nlocal; i++) {
     if (body[i] < 0) continue;
     ibody = body[i];
     if (rmass) massone = rmass[i];
     else massone = mass[type[i]];
 
     sum[ibody][0] += massone *
       (displace[i][1]*displace[i][1] + displace[i][2]*displace[i][2]);
     sum[ibody][1] += massone *
       (displace[i][0]*displace[i][0] + displace[i][2]*displace[i][2]);
     sum[ibody][2] += massone *
       (displace[i][0]*displace[i][0] + displace[i][1]*displace[i][1]);
     sum[ibody][3] -= massone * displace[i][1]*displace[i][2];
     sum[ibody][4] -= massone * displace[i][0]*displace[i][2];
     sum[ibody][5] -= massone * displace[i][0]*displace[i][1];
   }
 
   if (extended) {
     double ivec[6];
     double *shape,*inertiaatom;
     double length;
 
     for (i = 0; i < nlocal; i++) {
       if (body[i] < 0) continue;
       ibody = body[i];
       if (rmass) massone = rmass[i];
       else massone = mass[type[i]];
 
       if (eflags[i] & SPHERE) {
         sum[ibody][0] += SINERTIA*massone * radius[i]*radius[i];
         sum[ibody][1] += SINERTIA*massone * radius[i]*radius[i];
         sum[ibody][2] += SINERTIA*massone * radius[i]*radius[i];
       } else if (eflags[i] & ELLIPSOID) {
         shape = ebonus[ellipsoid[i]].shape;
         MathExtra::inertia_ellipsoid(shape,orient[i],massone,ivec);
         sum[ibody][0] += ivec[0];
         sum[ibody][1] += ivec[1];
         sum[ibody][2] += ivec[2];
         sum[ibody][3] += ivec[3];
         sum[ibody][4] += ivec[4];
         sum[ibody][5] += ivec[5];
       } else if (eflags[i] & LINE) {
         length = lbonus[line[i]].length;
         MathExtra::inertia_line(length,orient[i][0],massone,ivec);
         sum[ibody][0] += ivec[0];
         sum[ibody][1] += ivec[1];
         sum[ibody][2] += ivec[2];
         sum[ibody][3] += ivec[3];
         sum[ibody][4] += ivec[4];
         sum[ibody][5] += ivec[5];
       } else if (eflags[i] & TRIANGLE) {
         inertiaatom = tbonus[tri[i]].inertia;
         MathExtra::inertia_triangle(inertiaatom,orient[i],massone,ivec);
         sum[ibody][0] += ivec[0];
         sum[ibody][1] += ivec[1];
         sum[ibody][2] += ivec[2];
         sum[ibody][3] += ivec[3];
         sum[ibody][4] += ivec[4];
         sum[ibody][5] += ivec[5];
       }
     }
   }
 
   MPI_Allreduce(sum[0],all[0],6*nbody,MPI_DOUBLE,MPI_SUM,world);
 
   // error check that re-computed momemts of inertia match diagonalized ones
   // do not do test for bodies with params read from infile
 
   double norm;
   for (ibody = 0; ibody < nbody; ibody++) {
     if (infile && inbody[ibody]) continue;
     if (inertia[ibody][0] == 0.0) {
       if (fabs(all[ibody][0]) > TOLERANCE)
         error->all(FLERR,"Fix rigid: Bad principal moments");
     } else {
       if (fabs((all[ibody][0]-inertia[ibody][0])/inertia[ibody][0]) >
           TOLERANCE) error->all(FLERR,"Fix rigid: Bad principal moments");
     }
     if (inertia[ibody][1] == 0.0) {
       if (fabs(all[ibody][1]) > TOLERANCE)
         error->all(FLERR,"Fix rigid: Bad principal moments");
     } else {
       if (fabs((all[ibody][1]-inertia[ibody][1])/inertia[ibody][1]) >
           TOLERANCE) error->all(FLERR,"Fix rigid: Bad principal moments");
     }
     if (inertia[ibody][2] == 0.0) {
       if (fabs(all[ibody][2]) > TOLERANCE)
         error->all(FLERR,"Fix rigid: Bad principal moments");
     } else {
       if (fabs((all[ibody][2]-inertia[ibody][2])/inertia[ibody][2]) >
           TOLERANCE) error->all(FLERR,"Fix rigid: Bad principal moments");
     }
     norm = (inertia[ibody][0] + inertia[ibody][1] + inertia[ibody][2]) / 3.0;
     if (fabs(all[ibody][3]/norm) > TOLERANCE ||
         fabs(all[ibody][4]/norm) > TOLERANCE ||
         fabs(all[ibody][5]/norm) > TOLERANCE)
       error->all(FLERR,"Fix rigid: Bad principal moments");
   }
 
   if (infile) memory->destroy(inbody);
 }
 
 /* ----------------------------------------------------------------------
    read per rigid body info from user-provided file
    which = 0 to read total mass and center-of-mass, store in vec and array
    which = 1 to read 6 moments of inertia, store in array
    flag inbody = 0 for bodies whose info is read from file
    nlines = # of lines of rigid body info
    one line = rigid-ID mass xcm ycm zcm ixx iyy izz ixy ixz iyz
 ------------------------------------------------------------------------- */
 
 void FixRigid::readfile(int which, double *vec, double **array, int *inbody)
 {
   int i,j,m,nchunk,id;
   int nlines;
   FILE *fp;
   char *eof,*start,*next,*buf;
   char line[MAXLINE];
   
   if (me == 0) {
     fp = fopen(infile,"r");
     if (fp == NULL) {
       char str[128];
       sprintf(str,"Cannot open fix rigid infile %s",infile);
       error->one(FLERR,str);
     }
 
     while (1) {
       eof = fgets(line,MAXLINE,fp);
       if (eof == NULL) error->one(FLERR,"Unexpected end of fix rigid file");
       start = &line[strspn(line," \t\n\v\f\r")];
       if (*start != '\0' && *start != '#') break;
     }
 
     sscanf(line,"%d",&nlines);
   }
 
   MPI_Bcast(&nlines,1,MPI_INT,0,world);
 
   char *buffer = new char[CHUNK*MAXLINE];
   char **values = new char*[ATTRIBUTE_PERBODY];
 
   int nread = 0;
   while (nread < nlines) {
     if (nlines-nread > CHUNK) nchunk = CHUNK;
     else nchunk = nlines-nread;
     if (me == 0) {
       char *eof;
       m = 0;
       for (i = 0; i < nchunk; i++) {
         eof = fgets(&buffer[m],MAXLINE,fp);
         if (eof == NULL) error->one(FLERR,"Unexpected end of fix rigid file");
         m += strlen(&buffer[m]);
       }
       if (buffer[m-1] != '\n') strcpy(&buffer[m++],"\n");
       m++;
     }
     MPI_Bcast(&m,1,MPI_INT,0,world);
     MPI_Bcast(buffer,m,MPI_CHAR,0,world);
 
     buf = buffer;
     next = strchr(buf,'\n');
     *next = '\0';
     int nwords = atom->count_words(buf);
     *next = '\n';
 
     if (nwords != ATTRIBUTE_PERBODY)
       error->all(FLERR,"Incorrect rigid body format in fix rigid file");
     
     // loop over lines of rigid body attributes
     // tokenize the line into values
     // id = rigid body ID
     // use ID as-is for SINGLE, as mol-ID for MOLECULE, as-is for GROUP
     // for which = 0, store mass/com in vec/array
     // for which = 1, store interia tensor array, invert 3,4,5 values to Voigt
 
     for (int i = 0; i < nchunk; i++) {
       next = strchr(buf,'\n');
       
       values[0] = strtok(buf," \t\n\r\f");
       for (j = 1; j < nwords; j++)
         values[j] = strtok(NULL," \t\n\r\f");
       
       id = atoi(values[0]);
       if (rstyle == MOLECULE) {
         if (id <= 0 || id > maxmol) 
           error->all(FLERR,"Invalid rigid body ID in fix rigid file");
         id = mol2body[id];
       } else id--;
 
       if (id < 0 || id >= nbody) 
         error->all(FLERR,"Invalid rigid body ID in fix rigid file");
       inbody[id] = 1;
 
       if (which == 0) {
         vec[id] = atof(values[1]);
         array[id][0] = atof(values[2]);
         array[id][1] = atof(values[3]);
         array[id][2] = atof(values[4]);
       } else {
         array[id][0] = atof(values[5]);
         array[id][1] = atof(values[6]);
         array[id][2] = atof(values[7]);
         array[id][3] = atof(values[10]);
         array[id][4] = atof(values[9]);
         array[id][5] = atof(values[8]);
       }
 
       buf = next + 1;
     }
     
     nread += nchunk;
   }
 
   if (me == 0) fclose(fp);
 
   delete [] buffer;
   delete [] values;
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 double FixRigid::memory_usage()
 {
   int nmax = atom->nmax;
   double bytes = nmax * sizeof(int);
   bytes += nmax*3 * sizeof(double);
   bytes += maxvatom*6 * sizeof(double);    // vatom
   if (extended) {
     bytes += nmax * sizeof(int);
     if (orientflag) bytes = nmax*orientflag * sizeof(double);
     if (dorientflag) bytes = nmax*3 * sizeof(double);
   }
   return bytes;
 }
 
 /* ----------------------------------------------------------------------
    allocate local atom-based arrays
 ------------------------------------------------------------------------- */
 
 void FixRigid::grow_arrays(int nmax)
 {
   memory->grow(body,nmax,"rigid:body");
   memory->grow(displace,nmax,3,"rigid:displace");
   if (extended) {
     memory->grow(eflags,nmax,"rigid:eflags");
     if (orientflag) memory->grow(orient,nmax,orientflag,"rigid:orient");
     if (dorientflag) memory->grow(dorient,nmax,3,"rigid:dorient");
   }
 }
 
 /* ----------------------------------------------------------------------
    copy values within local atom-based arrays
 ------------------------------------------------------------------------- */
 
 void FixRigid::copy_arrays(int i, int j, int delflag)
 {
   body[j] = body[i];
   displace[j][0] = displace[i][0];
   displace[j][1] = displace[i][1];
   displace[j][2] = displace[i][2];
   if (extended) {
     eflags[j] = eflags[i];
     for (int k = 0; k < orientflag; k++)
       orient[j][k] = orient[i][k];
     if (dorientflag) {
       dorient[j][0] = dorient[i][0];
       dorient[j][1] = dorient[i][1];
       dorient[j][2] = dorient[i][2];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    initialize one atom's array values, called when atom is created
 ------------------------------------------------------------------------- */
 
 void FixRigid::set_arrays(int i)
 {
   body[i] = -1;
   displace[i][0] = 0.0;
   displace[i][1] = 0.0;
   displace[i][2] = 0.0;
 }
 
 /* ----------------------------------------------------------------------
    pack values in local atom-based arrays for exchange with another proc
 ------------------------------------------------------------------------- */
 
 int FixRigid::pack_exchange(int i, double *buf)
 {
   buf[0] = body[i];
   buf[1] = displace[i][0];
   buf[2] = displace[i][1];
   buf[3] = displace[i][2];
   if (!extended) return 4;
 
   int m = 4;
   buf[m++] = eflags[i];
   for (int j = 0; j < orientflag; j++)
     buf[m++] = orient[i][j];
   if (dorientflag) {
     buf[m++] = dorient[i][0];
     buf[m++] = dorient[i][1];
     buf[m++] = dorient[i][2];
   }
   return m;
 }
 
 /* ----------------------------------------------------------------------
    unpack values in local atom-based arrays from exchange with another proc
 ------------------------------------------------------------------------- */
 
 int FixRigid::unpack_exchange(int nlocal, double *buf)
 {
   body[nlocal] = static_cast<int> (buf[0]);
   displace[nlocal][0] = buf[1];
   displace[nlocal][1] = buf[2];
   displace[nlocal][2] = buf[3];
   if (!extended) return 4;
 
   int m = 4;
   eflags[nlocal] = static_cast<int> (buf[m++]);
   for (int j = 0; j < orientflag; j++)
     orient[nlocal][j] = buf[m++];
   if (dorientflag) {
     dorient[nlocal][0] = buf[m++];
     dorient[nlocal][1] = buf[m++];
     dorient[nlocal][2] = buf[m++];
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigid::reset_dt()
 {
   dtv = update->dt;
   dtf = 0.5 * update->dt * force->ftm2v;
   dtq = 0.5 * update->dt;
 }
 
 /* ----------------------------------------------------------------------
    return temperature of collection of rigid bodies
    non-active DOF are removed by fflag/tflag and in tfactor
 ------------------------------------------------------------------------- */
 
 double FixRigid::compute_scalar()
 {
   double wbody[3],rot[3][3];
 
   double t = 0.0;
 
   for (int i = 0; i < nbody; i++) {
     t += masstotal[i] * (fflag[i][0]*vcm[i][0]*vcm[i][0] +
                              fflag[i][1]*vcm[i][1]*vcm[i][1] +
                              fflag[i][2]*vcm[i][2]*vcm[i][2]);
 
     // wbody = angular velocity in body frame
 
     MathExtra::quat_to_mat(quat[i],rot);
     MathExtra::transpose_matvec(rot,angmom[i],wbody);
     if (inertia[i][0] == 0.0) wbody[0] = 0.0;
     else wbody[0] /= inertia[i][0];
     if (inertia[i][1] == 0.0) wbody[1] = 0.0;
     else wbody[1] /= inertia[i][1];
     if (inertia[i][2] == 0.0) wbody[2] = 0.0;
     else wbody[2] /= inertia[i][2];
 
     t += tflag[i][0]*inertia[i][0]*wbody[0]*wbody[0] +
       tflag[i][1]*inertia[i][1]*wbody[1]*wbody[1] +
       tflag[i][2]*inertia[i][2]*wbody[2]*wbody[2];
   }
 
   t *= tfactor;
   return t;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *FixRigid::extract(const char *str, int &dim)
 {
   if (strcmp(str,"body") == 0) {
     dim = 1;
     return body;
   }
   if (strcmp(str,"masstotal") == 0) {
     dim = 1;
     return masstotal;
   }
   if (strcmp(str,"t_target") == 0) {
     dim = 0;
     return &t_target;
   }
 
   return NULL;
 }
 
 /* ----------------------------------------------------------------------
    return attributes of a rigid body
    15 values per body
    xcm = 0,1,2; vcm = 3,4,5; fcm = 6,7,8; torque = 9,10,11; image = 12,13,14
 ------------------------------------------------------------------------- */
 
 double FixRigid::compute_array(int i, int j)
 {
   if (j < 3) return xcm[i][j];
   if (j < 6) return vcm[i][j-3];
   if (j < 9) return fcm[i][j-6];
   if (j < 12) return torque[i][j-9];
   if (j == 12) return (imagebody[i] & IMGMASK) - IMGMAX;
   if (j == 13) return (imagebody[i] >> IMGBITS & IMGMASK) - IMGMAX;
   return (imagebody[i] >> IMG2BITS) - IMGMAX;
 }
diff --git a/src/RIGID/fix_rigid.h b/src/RIGID/fix_rigid.h
index 2ebcce483..6082cbce0 100644
--- a/src/RIGID/fix_rigid.h
+++ b/src/RIGID/fix_rigid.h
@@ -1,236 +1,236 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef FIX_CLASS
 
 FixStyle(rigid,FixRigid)
 
 #else
 
 #ifndef LMP_FIX_RIGID_H
 #define LMP_FIX_RIGID_H
 
 #include "fix.h"
 
 namespace LAMMPS_NS {
 
 class FixRigid : public Fix {
  public:
   FixRigid(class LAMMPS *, int, char **);
   virtual ~FixRigid();
   virtual int setmask();
   virtual void init();
   virtual void setup(int);
   virtual void initial_integrate(int);
   void post_force(int);
   virtual void final_integrate();
   void initial_integrate_respa(int, int, int);
   void final_integrate_respa(int, int);
   virtual double compute_scalar();
   virtual int modify_param(int, char **) {return 0;}
   
   double memory_usage();
   void grow_arrays(int);
   void copy_arrays(int, int, int);
   void set_arrays(int);
   int pack_exchange(int, double *);
   int unpack_exchange(int, double *);
 
   void pre_neighbor();
   int dof(int);
   void deform(int);
   void reset_dt();
   virtual void *extract(const char*,int &);
   double compute_array(int, int);
     
  protected:
   int me,nprocs;
   double dtv,dtf,dtq;
   double *step_respa;
   int triclinic;
   double MINUSPI,TWOPI;
 
   int rstyle;               // SINGLE,MOLECULE,GROUP
   int firstflag;            // 1 for first-time setup of rigid bodies
   char *infile;             // file to read rigid body attributes from
 
   int dimension;            // # of dimensions
   int nbody;                // # of rigid bodies
   int *nrigid;              // # of atoms in each rigid body
   int *mol2body;            // convert mol-ID to rigid body index
   int maxmol;               // size of mol2body = max mol-ID
 
   int *body;                // which body each atom is part of (-1 if none)
   double **displace;        // displacement of each atom in body coords
 
   double *masstotal;        // total mass of each rigid body
   double **xcm;             // coords of center-of-mass of each rigid body
   double **vcm;             // velocity of center-of-mass of each
   double **fcm;             // force on center-of-mass of each
   double **inertia;         // 3 principal components of inertia of each
   double **ex_space,**ey_space,**ez_space;
                             // principal axes of each in space coords
   double **angmom;          // angular momentum of each in space coords
   double **omega;           // angular velocity of each in space coords
   double **torque;          // torque on each rigid body in space coords
   double **quat;            // quaternion of each rigid body
   tagint *imagebody;        // image flags of xcm of each rigid body
   double **fflag;           // flag for on/off of center-of-mass force
   double **tflag;           // flag for on/off of center-of-mass torque
   double **langextra;       // Langevin thermostat forces and torques
 
   double **sum,**all;       // work vectors for each rigid body
   int **remapflag;          // PBC remap flags for each rigid body
 
   int extended;             // 1 if any particles have extended attributes
   int orientflag;           // 1 if particles store spatial orientation
   int dorientflag;          // 1 if particles store dipole orientation
 
   int *eflags;              // flags for extended particles
   double **orient;          // orientation vector of particle wrt rigid body
   double **dorient;         // orientation of dipole mu wrt rigid body
 
   double tfactor;           // scale factor on temperature of rigid bodies
   int langflag;             // 0/1 = no/yes Langevin thermostat
 
   int tstat_flag;           // NVT settings
   double t_start,t_stop,t_target;
   double t_period,t_freq;
   int t_chain,t_iter,t_order;
 
   int pstat_flag;           // NPT settings
   double p_start[3],p_stop[3];
   double p_period[3],p_freq[3];
   int p_flag[3];  
   int pcouple,pstyle;
   int p_chain;
 
   int allremap;              // remap all atoms
   int dilate_group_bit;      // mask for dilation group
   char *id_dilate;           // group name to dilate
   
   class RanMars *random;
   class AtomVecEllipsoid *avec_ellipsoid;
   class AtomVecLine *avec_line;
   class AtomVecTri *avec_tri;
 
   int POINT,SPHERE,ELLIPSOID,LINE,TRIANGLE,DIPOLE;   // bitmasks for eflags
   int OMEGA,ANGMOM,TORQUE;
 
   void no_squish_rotate(int, double *, double *, double *, double);
   void set_xv();
   void set_v();
   void setup_bodies();
   void readfile(int, double *, double **, int *);
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Fix rigid molecule requires atom attribute molecule
 
 Self-explanatory.
 
 E: Could not find fix rigid group ID
 
 A group ID used in the fix rigid command does not exist.
 
 E: One or more atoms belong to multiple rigid bodies
 
 Two or more rigid bodies defined by the fix rigid command cannot
 contain the same atom.
 
 E: No rigid bodies defined
 
 The fix specification did not end up defining any rigid bodies.
 
 E: Fix rigid z force cannot be on for 2d simulation
 
 Self-explanatory.
 
 E: Fix rigid xy torque cannot be on for 2d simulation
 
 Self-explanatory.
 
 E: Fix rigid langevin period must be > 0.0
 
 Self-explanatory.
 
-E: Fix rigid nvt/npt/nph dilate group ID does not exist
+E: Fix rigid npt/nph dilate group ID does not exist
 
-UNDOCUMENTED
+Self-explanatory.
 
 E: One or zero atoms in rigid body
 
 Any rigid body defined by the fix rigid command must contain 2 or more
 atoms.
 
 W: More than one fix rigid
 
 It is not efficient to use fix rigid more than once.
 
 E: Rigid fix must come before NPT/NPH fix
 
 NPT/NPH fix must be defined in input script after all rigid fixes,
 else the rigid fix contribution to the pressure virial is
 incorrect.
 
 W: Computing temperature of portions of rigid bodies
 
 The group defined by the temperature compute does not encompass all
 the atoms in one or more rigid bodies, so the change in
 degrees-of-freedom for the atoms in those partial rigid bodies will
 not be accounted for.
 
 E: Fix rigid atom has non-zero image flag in a non-periodic dimension
 
-You cannot set image flags for non-periodic dimensions.
+Image flags for non-periodic dimensions should not be set.
 
 E: Insufficient Jacobi rotations for rigid body
 
 Eigensolve for rigid body was not sufficiently accurate.
 
 E: Fix rigid: Bad principal moments
 
 The principal moments of inertia computed for a rigid body
 are not within the required tolerances.
 
 E: Cannot open fix rigid infile %s
 
 The specified file cannot be opened.  Check that the path and name are
 correct.
 
 E: Unexpected end of fix rigid file
 
 A read operation from the file failed.
 
 E: Incorrect rigid body format in fix rigid file
 
 The number of fields per line is not what expected.
 
 E: Invalid rigid body ID in fix rigid file
 
 The ID does not match the number or an existing ID of rigid bodies
 that are defined by the fix rigid command.
 
 */
diff --git a/src/RIGID/fix_rigid_nh.cpp b/src/RIGID/fix_rigid_nh.cpp
index 8be15b3ed..6bcabe2a7 100644
--- a/src/RIGID/fix_rigid_nh.cpp
+++ b/src/RIGID/fix_rigid_nh.cpp
@@ -1,1430 +1,1430 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Tony Sheh (U Michigan), Trung Dac Nguyen (U Michigan)
    references: Kamberaj et al., J. Chem. Phys. 122, 224114 (2005)
                Miller et al., J Chem Phys. 116, 8649-8659 (2002)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "string.h"
 #include "fix_rigid_nh.h"
 #include "math_extra.h"
 #include "atom.h"
 #include "compute.h"
 #include "domain.h"
 #include "update.h"
 #include "modify.h"
 #include "fix_deform.h"
 #include "group.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "output.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
 enum{NONE,XYZ,XY,YZ,XZ};     // same as in FixRigid
 enum{ISO,ANISO,TRICLINIC};   // same as in FixRigid
 
 #define EPSILON 1.0e-7
 
 /* ---------------------------------------------------------------------- */
 
 FixRigidNH::FixRigidNH(LAMMPS *lmp, int narg, char **arg) :
   FixRigid(lmp, narg, arg)
 {
   // error checks: could be moved up to FixRigid
   
   if ((p_flag[0] == 1 && p_period[0] <= 0.0) || 
       (p_flag[1] == 1 && p_period[1] <= 0.0) || 
       (p_flag[2] == 1 && p_period[2] <= 0.0)) 
     error->all(FLERR,"Fix rigid npt/nph period must be > 0.0");
   
   if (dimension == 2 && p_flag[2])
     error->all(FLERR,"Invalid fix rigid npt/nph command for a 2d simulation");
   if (dimension == 2 && (pcouple == YZ || pcouple == XZ))
     error->all(FLERR,"Invalid fix rigid npt/nph command for a 2d simulation");
 
   if (pcouple == XYZ && (p_flag[0] == 0 || p_flag[1] == 0))
     error->all(FLERR,"Invalid fix rigid npt/nph command pressure settings");
   if (pcouple == XYZ && dimension == 3 && p_flag[2] == 0)
     error->all(FLERR,"Invalid fix rigid npt/nph command pressure settings");
   if (pcouple == XY && (p_flag[0] == 0 || p_flag[1] == 0))
     error->all(FLERR,"Invalid fix rigid npt/nph command pressure settings");
   if (pcouple == YZ && (p_flag[1] == 0 || p_flag[2] == 0))
     error->all(FLERR,"Invalid fix rigid npt/nph command pressure settings");
   if (pcouple == XZ && (p_flag[0] == 0 || p_flag[2] == 0))
     error->all(FLERR,"Invalid fix rigid npt/nph command pressure settings");
 
   // require periodicity in tensile dimension
 
   if (p_flag[0] && domain->xperiodic == 0)
     error->all(FLERR,
                "Cannot use fix rigid npt/nph on a non-periodic dimension");
   if (p_flag[1] && domain->yperiodic == 0)
     error->all(FLERR,
                "Cannot use fix rigid npt/nph on a non-periodic dimension");
   if (p_flag[2] && domain->zperiodic == 0)
     error->all(FLERR,
                "Cannot use fix rigid npt/nph on a non-periodic dimension");
   
     if (pcouple == XYZ && dimension == 3 &&
       (p_start[0] != p_start[1] || p_start[0] != p_start[2] ||
        p_stop[0] != p_stop[1] || p_stop[0] != p_stop[2] ||
        p_period[0] != p_period[1] || p_period[0] != p_period[2]))
     error->all(FLERR,"Invalid fix rigid npt/nph pressure settings");
   if (pcouple == XYZ && dimension == 2 &&
       (p_start[0] != p_start[1] || p_stop[0] != p_stop[1] ||
        p_period[0] != p_period[1]))
     error->all(FLERR,"Invalid fix rigid npt/nph pressure settings");
   if (pcouple == XY &&
       (p_start[0] != p_start[1] || p_stop[0] != p_stop[1] ||
        p_period[0] != p_period[1]))
     error->all(FLERR,"Invalid fix rigid npt/nph pressure settings");
   if (pcouple == YZ &&
       (p_start[1] != p_start[2] || p_stop[1] != p_stop[2] ||
        p_period[1] != p_period[2]))
     error->all(FLERR,"Invalid fix rigid npt/nph pressure settings");
   if (pcouple == XZ &&
       (p_start[0] != p_start[2] || p_stop[0] != p_stop[2] ||
        p_period[0] != p_period[2]))
     error->all(FLERR,"Invalid fix rigid npt/nph pressure settings");
 
   if ((tstat_flag && t_period <= 0.0) ||
       (p_flag[0] && p_period[0] <= 0.0) ||
       (p_flag[1] && p_period[1] <= 0.0) ||
       (p_flag[2] && p_period[2] <= 0.0))
     error->all(FLERR,"Fix rigid nvt/npt/nph damping parameters must be > 0.0");
 
   // memory allocation and initialization
   
   memory->create(conjqm,nbody,4,"rigid_nh:conjqm");
   if (tstat_flag || pstat_flag) {
     allocate_chain();
     allocate_order();
   }
   
   if (tstat_flag) {
     eta_t[0] = eta_r[0] = 0.0;
     eta_dot_t[0] = eta_dot_r[0] = 0.0;
     f_eta_t[0] = f_eta_r[0] = 0.0;
   
     for (int i = 1; i < t_chain; i++) {
       eta_t[i] = eta_r[i] = 0.0;
       eta_dot_t[i] = eta_dot_r[i] = 0.0;
     }
   }
   
   if (pstat_flag) {
     epsilon_dot[0] = epsilon_dot[1] = epsilon_dot[2] = 0.0;
     eta_b[0] = eta_dot_b[0] = f_eta_b[0] = 0.0;
     for (int i = 1; i < p_chain; i++) 
       eta_b[i] = eta_dot_b[i] = 0.0;
   }
 
   // rigid body pointers
   
   nrigidfix = 0;
   rfix = NULL;
 
   vol0 = 0.0;
   t0 = 1.0;
   
   tcomputeflag = 0;
   pcomputeflag = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 FixRigidNH::~FixRigidNH()
 {
   memory->destroy(conjqm);
   if (tstat_flag || pstat_flag) {
     deallocate_chain();
     deallocate_order();
   }
   
   if (rfix) delete [] rfix;
 
   if (tcomputeflag) {
     modify->delete_compute(id_temp);
     delete [] id_temp;
   }
   
   // delete pressure if fix created it
   
   if (pstat_flag) {
     if (pcomputeflag) modify->delete_compute(id_press);
     delete [] id_press;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixRigidNH::setmask()
 {
   int mask = 0;
   mask = FixRigid::setmask();
   if (tstat_flag || pstat_flag) mask |= THERMO_ENERGY;
   
   return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigidNH::init()
 {
   FixRigid::init();
 
   // recheck that dilate group has not been deleted
 
   if (allremap == 0) {
     int idilate = group->find(id_dilate);
     if (idilate == -1)
       error->all(FLERR,"Fix rigid npt/nph dilate group ID does not exist");
     dilate_group_bit = group->bitmask[idilate];
   }
 
   // initialize thermostats
   // set timesteps, constants 
   // store Yoshida-Suzuki integrator parameters
   
   dtv = update->dt;
   dtf = 0.5 * update->dt * force->ftm2v;
   dtq = 0.5 * update->dt;
   
   boltz = force->boltz;
   nktv2p = force->nktv2p;
   mvv2e = force->mvv2e;
   
   if (force->kspace) kspace_flag = 1;
   else kspace_flag = 0;
   
   nf_t = nf_r = dimension * nbody;
   for (int ibody = 0; ibody < nbody; ibody++)
     for (int k = 0; k < domain->dimension; k++)
       if (fabs(inertia[ibody][k]) < EPSILON) nf_r--;
   
   // see Table 1 in Kamberaj et al
   
   if (tstat_flag || pstat_flag) {
     if (t_order == 3) {
       w[0] = 1.0 / (2.0 - pow(2.0, 1.0/3.0));
       w[1] = 1.0 - 2.0*w[0];
       w[2] = w[0];
     } else if (t_order == 5) {
       w[0] = 1.0 / (4.0 - pow(4.0, 1.0/3.0));
       w[1] = w[0];
       w[2] = 1.0 - 4.0 * w[0];
       w[3] = w[0];
       w[4] = w[0];
     }
   }
   
   g_f = nf_t + nf_r;  
   onednft = 1.0 + (double)(dimension) / (double)g_f;
   onednfr = (double) (dimension) / (double)g_f;
 
   int icompute;
   if (tcomputeflag) {  
     icompute = modify->find_compute(id_temp);
     if (icompute < 0) 
-      error->all(FLERR,"Temp ID for fix rigid npt/nph does not exist");
+      error->all(FLERR,"Temperature ID for fix rigid nvt/npt/nph does not exist");
     temperature = modify->compute[icompute];
   }
 
   if (pstat_flag) {
     if (domain->triclinic) 
-      error->all(FLERR,"fix rigid npt/nph does not yet allow triclinic box");
+      error->all(FLERR,"Fix rigid npt/nph does not yet allow triclinic box");
   
     // ensure no conflict with fix deform
 
     for (int i = 0; i < modify->nfix; i++)
       if (strcmp(modify->fix[i]->style,"deform") == 0) {
       	int *dimflag = ((FixDeform *) modify->fix[i])->dimflag;
       	if ((p_flag[0] && dimflag[0]) || (p_flag[1] && dimflag[1]) || 
       	    (p_flag[2] && dimflag[2]))
           error->all(FLERR,"Cannot use fix rigid npt/nph and fix deform on "
                      "same component of stress tensor");
       }
 
     // set frequency
   
     p_freq_max = 0.0;
     p_freq_max = MAX(p_freq[0],p_freq[1]);
     p_freq_max = MAX(p_freq_max,p_freq[2]);
 
     // tally the number of dimensions that are barostatted
     // set initial volume and reference cell, if not already done
 
     pdim = p_flag[0] + p_flag[1] + p_flag[2];
     if (vol0 == 0.0) {
       if (dimension == 2) vol0 = domain->xprd * domain->yprd;
       else vol0 = domain->xprd * domain->yprd * domain->zprd;
     }
 
     // set pressure compute ptr
 
     icompute = modify->find_compute(id_press);
     if (icompute < 0) 
-      error->all(FLERR,"Press ID for fix rigid npt/nph does not exist");
+      error->all(FLERR,"Pressure ID for fix rigid npt/nph does not exist");
     pressure = modify->compute[icompute];
     
     // detect if any rigid fixes exist so rigid bodies move on remap
     // rfix[] = indices to each fix rigid
     // this will include self
 
     if (rfix) delete [] rfix;
     nrigidfix = 0;
     rfix = NULL;
 
     for (int i = 0; i < modify->nfix; i++)
       if (modify->fix[i]->rigid_flag) nrigidfix++;
     if (nrigidfix) {
       rfix = new int[nrigidfix];
       nrigidfix = 0;
       for (int i = 0; i < modify->nfix; i++)
         if (modify->fix[i]->rigid_flag) rfix[nrigidfix++] = i;
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigidNH::setup(int vflag)
 {
   FixRigid::setup(vflag);
   
   double mbody[3];
   akin_t = akin_r = 0.0;
   for (int ibody = 0; ibody < nbody; ibody++) {
     MathExtra::transpose_matvec(ex_space[ibody],ey_space[ibody],ez_space[ibody],
                                 angmom[ibody],mbody);
     MathExtra::quatvec(quat[ibody],mbody,conjqm[ibody]);
     conjqm[ibody][0] *= 2.0;
     conjqm[ibody][1] *= 2.0;
     conjqm[ibody][2] *= 2.0;
     conjqm[ibody][3] *= 2.0;
     
     if (tstat_flag || pstat_flag) {
       akin_t += masstotal[ibody]*(vcm[ibody][0]*vcm[ibody][0] + 
         vcm[ibody][1]*vcm[ibody][1] + vcm[ibody][2]*vcm[ibody][2]);
       akin_r += angmom[ibody][0]*omega[ibody][0] + 
         angmom[ibody][1]*omega[ibody][1] + angmom[ibody][2]*omega[ibody][2];
     }
   }
 
   // compute target temperature
   
   if (tstat_flag) compute_temp_target();
   else if (pstat_flag) {
     t0 = temperature->compute_scalar();
     if (t0 == 0.0) {
       if (strcmp(update->unit_style,"lj") == 0) t0 = 1.0;
       else t0 = 300.0;
     }
     t_target = t0;
   }
 
   // compute target pressure
   // compute current pressure
   // trigger virial computation on next timestep
     
   if (pstat_flag) { 
     compute_press_target();
     
     temperature->compute_scalar();
     if (pstyle == ISO) pressure->compute_scalar();
     else pressure->compute_vector();
     couple();
     pressure->addstep(update->ntimestep+1);
   }
   
   // initialize thermostat/barostat settings
   
   double kt, t_mass, tb_mass;
   kt = boltz * t_target;
 
   if (tstat_flag) {
     t_mass = kt / (t_freq*t_freq);
     q_t[0] = nf_t * t_mass;
     q_r[0] = nf_r * t_mass;
     for (int i = 1; i < t_chain; i++) 
       q_t[i] = q_r[i] = t_mass;
 
     for (int i = 1; i < t_chain; i++) {
       f_eta_t[i] = (q_t[i-1] * eta_dot_t[i-1] * eta_dot_t[i-1] - kt)/q_t[i];
       f_eta_r[i] = (q_r[i-1] * eta_dot_r[i-1] * eta_dot_r[i-1] - kt)/q_r[i];
     }
   }
   
   // initial forces on barostat thermostat variables
   
   if (pstat_flag) {
     for (int i = 0; i < 3; i++)
       if (p_flag[i]) {
         epsilon_mass[i] = (g_f + dimension) * kt / (p_freq[i]*p_freq[i]);
         epsilon[i] = log(vol0)/dimension;
       } 
     
     tb_mass = kt / (p_freq_max * p_freq_max);
     q_b[0] = dimension * dimension * tb_mass;
     for (int i = 1; i < p_chain; i++) {
       q_b[i] = tb_mass;
       f_eta_b[i] = (q_b[i] * eta_dot_b[i-1] * eta_dot_b[i-1] - kt)/q_b[i];
     }
   }
   
   // update order/timestep dependent coefficients
   
   if (tstat_flag || pstat_flag) {
     for (int i = 0; i < t_order; i++) {
       wdti1[i] = w[i] * dtv / t_iter;
       wdti2[i] = wdti1[i] / 2.0;
       wdti4[i] = wdti1[i] / 4.0;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    perform preforce velocity Verlet integration
    see Kamberaj paper for step references
 ------------------------------------------------------------------------- */
 
 void FixRigidNH::initial_integrate(int vflag)
 {
   double tmp,scale_r,scale_t[3],scale_v[3];
   double dtfm,mbody[3],tbody[3],fquat[4];
   double dtf2 = dtf * 2.0;
   
   // compute target temperature
   // update thermostat chains coupled to particles
   
   if (tstat_flag) {
     compute_temp_target();
     nhc_temp_integrate();
   }
 
   // compute target pressure
   // update epsilon dot
   // update thermostat coupled to barostat
   
   if (pstat_flag) {
     nhc_press_integrate();
     
     if (pstyle == ISO) {
       temperature->compute_scalar();
       pressure->compute_scalar();
     } else {
       temperature->compute_vector();
       pressure->compute_vector();
     }
     couple();
     pressure->addstep(update->ntimestep+1);
   
     compute_press_target();
     nh_epsilon_dot();
   }  
   
   // compute scale variables
 
   scale_t[0] = scale_t[1] = scale_t[2] = 1.0;
   scale_v[0] = scale_v[1] = scale_v[2] = 1.0;
   scale_r = 1.0;
 
   if (tstat_flag) {
     akin_t = akin_r = 0.0;
     tmp = exp(-dtq * eta_dot_t[0]);
     scale_t[0] = scale_t[1] = scale_t[2] = tmp;
     tmp = exp(-dtq * eta_dot_r[0]);
     scale_r = tmp;
   } 
 
   if (pstat_flag) {
     akin_t = akin_r = 0.0;
     scale_t[0] *= exp(-dtq * (epsilon_dot[0] + mtk_term2));
     scale_t[1] *= exp(-dtq * (epsilon_dot[1] + mtk_term2));
     scale_t[2] *= exp(-dtq * (epsilon_dot[2] + mtk_term2));
     scale_r *= exp(-dtq * (pdim * mtk_term2));
 
     tmp = dtq * epsilon_dot[0];
     scale_v[0] = dtv * exp(tmp) * maclaurin_series(tmp);
     tmp = dtq * epsilon_dot[1];
     scale_v[1] = dtv * exp(tmp) * maclaurin_series(tmp);
     tmp = dtq * epsilon_dot[2];
     scale_v[2] = dtv * exp(tmp) * maclaurin_series(tmp);
   }
     
   // update xcm, vcm, quat, conjqm and angmom
 
   for (int ibody = 0; ibody < nbody; ibody++) {
     
     // step 1.1 - update vcm by 1/2 step
     
     dtfm = dtf / masstotal[ibody];
     vcm[ibody][0] += dtfm * fcm[ibody][0] * fflag[ibody][0];
     vcm[ibody][1] += dtfm * fcm[ibody][1] * fflag[ibody][1];
     vcm[ibody][2] += dtfm * fcm[ibody][2] * fflag[ibody][2];
     
     if (tstat_flag || pstat_flag) {
       vcm[ibody][0] *= scale_t[0];
       vcm[ibody][1] *= scale_t[1];
       vcm[ibody][2] *= scale_t[2];
       
       tmp = vcm[ibody][0]*vcm[ibody][0] + vcm[ibody][1]*vcm[ibody][1] +
         vcm[ibody][2]*vcm[ibody][2];
       akin_t += masstotal[ibody]*tmp;
     }
     
     // step 1.2 - update xcm by full step
 
     if (!pstat_flag) {
       xcm[ibody][0] += dtv * vcm[ibody][0];
       xcm[ibody][1] += dtv * vcm[ibody][1];
       xcm[ibody][2] += dtv * vcm[ibody][2];
     } else {
       xcm[ibody][0] += scale_v[0] * vcm[ibody][0];
       xcm[ibody][1] += scale_v[1] * vcm[ibody][1];
       xcm[ibody][2] += scale_v[2] * vcm[ibody][2];
     }
     
     // step 1.3 - apply torque (body coords) to quaternion momentum
     
     torque[ibody][0] *= tflag[ibody][0];
     torque[ibody][1] *= tflag[ibody][1];
     torque[ibody][2] *= tflag[ibody][2];
     
     MathExtra::transpose_matvec(ex_space[ibody],ey_space[ibody],ez_space[ibody],
                                 torque[ibody],tbody);
     MathExtra::quatvec(quat[ibody],tbody,fquat);
     
     conjqm[ibody][0] += dtf2 * fquat[0];
     conjqm[ibody][1] += dtf2 * fquat[1];
     conjqm[ibody][2] += dtf2 * fquat[2];
     conjqm[ibody][3] += dtf2 * fquat[3];
     
     if (tstat_flag || pstat_flag) {
       conjqm[ibody][0] *= scale_r;
       conjqm[ibody][1] *= scale_r;
       conjqm[ibody][2] *= scale_r;
       conjqm[ibody][3] *= scale_r;
     }
     
     // step 1.4 to 1.13 - use no_squish rotate to update p and q
   
     no_squish_rotate(3,conjqm[ibody],quat[ibody],inertia[ibody],dtq);
     no_squish_rotate(2,conjqm[ibody],quat[ibody],inertia[ibody],dtq);
     no_squish_rotate(1,conjqm[ibody],quat[ibody],inertia[ibody],dtv);
     no_squish_rotate(2,conjqm[ibody],quat[ibody],inertia[ibody],dtq);
     no_squish_rotate(3,conjqm[ibody],quat[ibody],inertia[ibody],dtq);
   
     // update exyz_space
     // transform p back to angmom
     // update angular velocity
     
     MathExtra::q_to_exyz(quat[ibody],ex_space[ibody],ey_space[ibody],
                          ez_space[ibody]);
     MathExtra::invquatvec(quat[ibody],conjqm[ibody],mbody);
     MathExtra::matvec(ex_space[ibody],ey_space[ibody],ez_space[ibody],
                       mbody,angmom[ibody]);
     
     angmom[ibody][0] *= 0.5;
     angmom[ibody][1] *= 0.5;
     angmom[ibody][2] *= 0.5;
     
     MathExtra::angmom_to_omega(angmom[ibody],ex_space[ibody],ey_space[ibody],
                                ez_space[ibody],inertia[ibody],omega[ibody]);
     
     if (tstat_flag || pstat_flag) {
       akin_r += angmom[ibody][0]*omega[ibody][0] + 
         angmom[ibody][1]*omega[ibody][1] + angmom[ibody][2]*omega[ibody][2];
     }
   }
     
   // virial setup before call to set_xv
 
   if (vflag) v_setup(vflag);
   else evflag = 0;
   
   // remap simulation box by 1/2 step
 
   if (pstat_flag) remap();
   
   // set coords/orient and velocity/rotation of atoms in rigid bodies
   // from quarternion and omega
   
   set_xv();
   
   // remap simulation box by full step
   // redo KSpace coeffs since volume has changed
 
   if (pstat_flag) {
     remap();
     if (kspace_flag) force->kspace->setup();
   }  
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigidNH::final_integrate()
 {
   int i,ibody;
   double tmp,scale_t[3],scale_r;
   double dtfm,xy,xz,yz;
   double mbody[3],tbody[3],fquat[4];
   double dtf2 = dtf * 2.0;
 
   // compute scale variables
   
   scale_t[0] = scale_t[1] = scale_t[2] = 1.0;
   scale_r = 1.0;
 
   if (tstat_flag) {
     tmp = exp(-1.0 * dtq * eta_dot_t[0]);
     scale_t[0] = scale_t[1] = scale_t[2] = tmp;
     scale_r = exp(-1.0 * dtq * eta_dot_r[0]);
   } 
   
   if (pstat_flag) {
     scale_t[0] *= exp(-dtq * (epsilon_dot[0] + mtk_term2));
     scale_t[1] *= exp(-dtq * (epsilon_dot[1] + mtk_term2));
     scale_t[2] *= exp(-dtq * (epsilon_dot[2] + mtk_term2));
     scale_r *= exp(-dtq * (pdim * mtk_term2));
     
     akin_t = akin_r = 0.0;
   }
   
   // sum over atoms to get force and torque on rigid body
   
   int *image = atom->image;
   double **x = atom->x;
   double **f = atom->f;
   int nlocal = atom->nlocal;
   
   double xprd = domain->xprd;
   double yprd = domain->yprd;
   double zprd = domain->zprd;
   if (triclinic) {
     xy = domain->xy;
     xz = domain->xz;
     yz = domain->yz;
   }
 
   int xbox,ybox,zbox;
   double xunwrap,yunwrap,zunwrap,dx,dy,dz;
   for (ibody = 0; ibody < nbody; ibody++)
     for (i = 0; i < 6; i++) sum[ibody][i] = 0.0;
   
   for (i = 0; i < nlocal; i++) {
     if (body[i] < 0) continue;
     ibody = body[i];
 
     sum[ibody][0] += f[i][0];
     sum[ibody][1] += f[i][1];
     sum[ibody][2] += f[i][2];
       
     xbox = (image[i] & IMGMASK) - IMGMAX;
     ybox = (image[i] >> IMGBITS & IMGMASK) - IMGMAX;
     zbox = (image[i] >> IMG2BITS) - IMGMAX;
 
     if (triclinic == 0) {
       xunwrap = x[i][0] + xbox*xprd;
       yunwrap = x[i][1] + ybox*yprd;
       zunwrap = x[i][2] + zbox*zprd;
     } else {
       xunwrap = x[i][0] + xbox*xprd + ybox*xy + zbox*xz;
       yunwrap = x[i][1] + ybox*yprd + zbox*yz;
       zunwrap = x[i][2] + zbox*zprd;
     }
 
     dx = xunwrap - xcm[ibody][0];
     dy = yunwrap - xcm[ibody][1];
     dz = zunwrap - xcm[ibody][2];
     
     sum[ibody][3] += dy*f[i][2] - dz*f[i][1];
     sum[ibody][4] += dz*f[i][0] - dx*f[i][2];
     sum[ibody][5] += dx*f[i][1] - dy*f[i][0];
   }
   
   // extended particles add their torque to torque of body
 
   if (extended) {
     double **torque_one = atom->torque;
 
     for (i = 0; i < nlocal; i++) {
       if (body[i] < 0) continue;
       ibody = body[i];
 
       if (eflags[i] & TORQUE) {
         sum[ibody][3] += torque_one[i][0];
         sum[ibody][4] += torque_one[i][1];
         sum[ibody][5] += torque_one[i][2];
       }
     }
   }
 
   MPI_Allreduce(sum[0],all[0],6*nbody,MPI_DOUBLE,MPI_SUM,world);
   
   // update vcm and angmom
   // include Langevin thermostat forces
   // fflag,tflag = 0 for some dimensions in 2d
 
   for (ibody = 0; ibody < nbody; ibody++) {
     fcm[ibody][0] = all[ibody][0] + langextra[ibody][0];
     fcm[ibody][1] = all[ibody][1] + langextra[ibody][1];
     fcm[ibody][2] = all[ibody][2] + langextra[ibody][2];
     torque[ibody][0] = all[ibody][3] + langextra[ibody][3];
     torque[ibody][1] = all[ibody][4] + langextra[ibody][4];
     torque[ibody][2] = all[ibody][5] + langextra[ibody][5];
 
     // update vcm by 1/2 step
   
     dtfm = dtf / masstotal[ibody];
     if (tstat_flag || pstat_flag) {
       vcm[ibody][0] *= scale_t[0];
       vcm[ibody][1] *= scale_t[1];
       vcm[ibody][2] *= scale_t[2];
     }
     
     vcm[ibody][0] += dtfm * fcm[ibody][0] * fflag[ibody][0];
     vcm[ibody][1] += dtfm * fcm[ibody][1] * fflag[ibody][1];
     vcm[ibody][2] += dtfm * fcm[ibody][2] * fflag[ibody][2];
     
     if (pstat_flag) {
       tmp = vcm[ibody][0]*vcm[ibody][0] + vcm[ibody][1]*vcm[ibody][1] +
         vcm[ibody][2]*vcm[ibody][2];
       akin_t += masstotal[ibody]*tmp;
     }
     
     // update conjqm, then transform to angmom, set velocity again
     // virial is already setup from initial_integrate
     
     torque[ibody][0] *= tflag[ibody][0];
     torque[ibody][1] *= tflag[ibody][1];
     torque[ibody][2] *= tflag[ibody][2];
     
     MathExtra::transpose_matvec(ex_space[ibody],ey_space[ibody],
                                 ez_space[ibody],torque[ibody],tbody);
     MathExtra::quatvec(quat[ibody],tbody,fquat);
     
     if (tstat_flag || pstat_flag) {
       conjqm[ibody][0] = scale_r * conjqm[ibody][0] + dtf2 * fquat[0];
       conjqm[ibody][1] = scale_r * conjqm[ibody][1] + dtf2 * fquat[1];
       conjqm[ibody][2] = scale_r * conjqm[ibody][2] + dtf2 * fquat[2];
       conjqm[ibody][3] = scale_r * conjqm[ibody][3] + dtf2 * fquat[3];
     } else {
       conjqm[ibody][0] += dtf2 * fquat[0];
       conjqm[ibody][1] += dtf2 * fquat[1];
       conjqm[ibody][2] += dtf2 * fquat[2];
       conjqm[ibody][3] += dtf2 * fquat[3];
     }
 
     MathExtra::invquatvec(quat[ibody],conjqm[ibody],mbody);
     MathExtra::matvec(ex_space[ibody],ey_space[ibody],ez_space[ibody],
                       mbody,angmom[ibody]);
     
     angmom[ibody][0] *= 0.5;
     angmom[ibody][1] *= 0.5;
     angmom[ibody][2] *= 0.5;  
     
     MathExtra::angmom_to_omega(angmom[ibody],ex_space[ibody],ey_space[ibody],
                                ez_space[ibody],inertia[ibody],omega[ibody]);
     
     if (pstat_flag) {
       akin_r += angmom[ibody][0]*omega[ibody][0] + 
         angmom[ibody][1]*omega[ibody][1] + 
         angmom[ibody][2]*omega[ibody][2];
     }
   }
   
   // set velocity/rotation of atoms in rigid bodies
   // virial is already setup from initial_integrate
 
   set_v();
   
   // compute temperature and pressure tensor
   // couple to compute current pressure components
   // trigger virial computation on next timestep
   
   if (tcomputeflag) t_current = temperature->compute_scalar();
   if (pstat_flag) {
     if (pstyle == ISO) pressure->compute_scalar();
     else pressure->compute_vector();
     couple();
     pressure->addstep(update->ntimestep+1);
   }
 
   if (pstat_flag) nh_epsilon_dot();  
   
   // update eta_dot_t and eta_dot_r
   // update eta_dot_b
       
   if (tstat_flag) nhc_temp_integrate();
   if (pstat_flag) nhc_press_integrate();  
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigidNH::nhc_temp_integrate()
 {
   int i,j,k;
   double kt,gfkt_t,gfkt_r,tmp,ms,s,s2;
   
   kt = boltz * t_target;
   gfkt_t = nf_t * kt;
   gfkt_r = nf_r * kt;
 
   // update thermostat masses
   
   double t_mass = boltz * t_target / (t_freq * t_freq);
   q_t[0] = nf_t * t_mass;
   q_r[0] = nf_r * t_mass;
   for (i = 1; i < t_chain; i++)
     q_t[i] = q_r[i] = t_mass;
   
   // update force of thermostats coupled to particles
   
   f_eta_t[0] = (akin_t * mvv2e - gfkt_t) / q_t[0];
   f_eta_r[0] = (akin_r * mvv2e - gfkt_r) / q_r[0];
   
   // multiple timestep iteration
   
   for (i = 0; i < t_iter; i++) {
     for (j = 0; j < t_order; j++) {
   
       // update thermostat velocities half step
   
       eta_dot_t[t_chain-1] += wdti2[j] * f_eta_t[t_chain-1];
       eta_dot_r[t_chain-1] += wdti2[j] * f_eta_r[t_chain-1];
       
       for (k = 1; k < t_chain; k++) {
         tmp = wdti4[j] * eta_dot_t[t_chain-k];
         ms = maclaurin_series(tmp);
         s = exp(-1.0 * tmp);
         s2 = s * s;
         eta_dot_t[t_chain-k-1] = eta_dot_t[t_chain-k-1] * s2 + 
           wdti2[j] * f_eta_t[t_chain-k-1] * s * ms;
 	
         tmp = wdti4[j] * eta_dot_r[t_chain-k];
         ms = maclaurin_series(tmp);
         s = exp(-1.0 * tmp);
         s2 = s * s;
         eta_dot_r[t_chain-k-1] = eta_dot_r[t_chain-k-1] * s2 + 
           wdti2[j] * f_eta_r[t_chain-k-1] * s * ms;
       }
       
       // update thermostat positions a full step
       
       for (k = 0; k < t_chain; k++) {
         eta_t[k] += wdti1[j] * eta_dot_t[k];
         eta_r[k] += wdti1[j] * eta_dot_r[k];
       }
       
       // update thermostat forces 
       
       for (k = 1; k < t_chain; k++) {
         f_eta_t[k] = q_t[k-1] * eta_dot_t[k-1] * eta_dot_t[k-1] - kt;
         f_eta_t[k] /= q_t[k];
         f_eta_r[k] = q_r[k-1] * eta_dot_r[k-1] * eta_dot_r[k-1] - kt;
         f_eta_r[k] /= q_r[k];
       }
       
       // update thermostat velocities a full step
       
       for (k = 0; k < t_chain-1; k++) {
         tmp = wdti4[j] * eta_dot_t[k+1];
         ms = maclaurin_series(tmp);
         s = exp(-1.0 * tmp);
         s2 = s * s;
         eta_dot_t[k] = eta_dot_t[k] * s2 + wdti2[j] * f_eta_t[k] * s * ms;
         tmp = q_t[k] * eta_dot_t[k] * eta_dot_t[k] - kt;
         f_eta_t[k+1] = tmp / q_t[k+1];
 	
         tmp = wdti4[j] * eta_dot_r[k+1];
         ms = maclaurin_series(tmp);
         s = exp(-1.0 * tmp);
         s2 = s * s;
         eta_dot_r[k] = eta_dot_r[k] * s2 + wdti2[j] * f_eta_r[k] * s * ms;
         tmp = q_r[k] * eta_dot_r[k] * eta_dot_r[k] - kt;
           f_eta_r[k+1] = tmp / q_r[k+1];
       }
       
       eta_dot_t[t_chain-1] += wdti2[j] * f_eta_t[t_chain-1];
       eta_dot_r[t_chain-1] += wdti2[j] * f_eta_r[t_chain-1];
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigidNH::nhc_press_integrate()
 {
   int i,k;
   double tmp,s,s2,ms,kecurrent;
   double kt = boltz * t_target;
   double lkt_press = kt;
   
   // update thermostat masses
   
   double tb_mass = kt / (p_freq_max * p_freq_max);
   q_b[0] = tb_mass;
   for (int i = 1; i < p_chain; i++) {
     q_b[i] = tb_mass;
     f_eta_b[i] = q_b[i-1] * eta_dot_b[i-1] * eta_dot_b[i-1] - kt;
     f_eta_b[i] /= q_b[i];
   }
      
   // update forces acting on thermostat
   
   kecurrent = 0.0;
   for (i = 0; i < 3; i++) 
     if (p_flag[i]) {
       epsilon_mass[i] = (g_f + dimension) * kt / (p_freq[i] * p_freq[i]);
       kecurrent += epsilon_mass[i] * epsilon_dot[i] * epsilon_dot[i];
     }
 
   f_eta_b[0] = (kecurrent - lkt_press) / q_b[0];
   
   // update thermostat velocities a half step
   
   eta_dot_b[p_chain-1] += 0.5 * dtq * f_eta_b[p_chain-1];
   
   for (k = 0; k < p_chain-1; k++) {
     tmp = 0.5 * dtq * eta_dot_b[p_chain-k-1];
     ms = maclaurin_series(tmp);
     s = exp(-0.5 * tmp);
     s2 = s * s;
     eta_dot_b[p_chain-k-2] = eta_dot_b[p_chain-k-2] * s2 + 
       dtq * f_eta_b[p_chain-k-2] * s * ms;
   }
   
   // update thermostat positions
   
   for (k = 0; k < p_chain; k++)
     eta_b[k] += dtv * eta_dot_b[k];
   
   // update epsilon dot
   
   s = exp(-1.0 * dtq * eta_dot_b[0]);
   for (i = 0; i < 3; i++) 
     if (p_flag[i]) epsilon_dot[i] *= s;
       
   kecurrent = 0.0;
   for (i = 0; i < 3; i++) 
     if (p_flag[i]) 
       kecurrent += epsilon_mass[i] * epsilon_dot[i] * epsilon_dot[i];
  
   f_eta_b[0] = (kecurrent - lkt_press) / q_b[0];
   
   // update thermostat velocites a full step
   
   for (k = 0; k < p_chain-1; k++) {
     tmp = 0.5 * dtq * eta_dot_b[k+1];
     ms = maclaurin_series(tmp);
     s = exp(-0.5 * tmp);
     s2 = s * s;
     eta_dot_b[k] = eta_dot_b[k] * s2 + dtq * f_eta_b[k] * s * ms;
     tmp = q_b[k] * eta_dot_b[k] * eta_dot_b[k] - kt;
     f_eta_b[k+1] = tmp / q_b[k+1];
   }
   
   eta_dot_b[p_chain-1] += 0.5 * dtq * f_eta_b[p_chain-1];
 
 }
 
 /* ---------------------------------------------------------------------- 
    compute kinetic energy in the extended Hamiltonian
    conserved quantity = sum of returned energy and potential energy
 -----------------------------------------------------------------------*/
 
 double FixRigidNH::compute_scalar()
 {
   int i,k,ibody;
   double kt = boltz * t_target;
   double energy,ke_t,ke_q,tmp,Pkq[4];
   
   // compute the kinetic parts of H_NVE in Kameraj et al (JCP 2005, pp 224114)
   
   // translational kinetic energy
 
   ke_t = 0.0;
   for (ibody = 0; ibody < nbody; ibody++)
     ke_t += 0.5 * masstotal[ibody] * (vcm[ibody][0]*vcm[ibody][0] +
       vcm[ibody][1]*vcm[ibody][1] +
       vcm[ibody][2]*vcm[ibody][2]);
   
   // rotational kinetic energy
 
   ke_q = 0.0;
   for (ibody = 0; ibody < nbody; ibody++) {
     for (k = 1; k < 4; k++) {
       if (k == 1) {
         Pkq[0] = -quat[ibody][1];
         Pkq[1] =  quat[ibody][0];
         Pkq[2] =  quat[ibody][3];
         Pkq[3] = -quat[ibody][2];
       } else if (k == 2) {
         Pkq[0] = -quat[ibody][2];
         Pkq[1] = -quat[ibody][3];
         Pkq[2] =  quat[ibody][0];
         Pkq[3] =  quat[ibody][1];
       } else if (k == 3) {
         Pkq[0] = -quat[ibody][3];
         Pkq[1] =  quat[ibody][2];
         Pkq[2] = -quat[ibody][1];
         Pkq[3] =  quat[ibody][0];      
       }
    
       tmp = conjqm[ibody][0]*Pkq[0] + conjqm[ibody][1]*Pkq[1] +
         conjqm[ibody][2]*Pkq[2] + conjqm[ibody][3]*Pkq[3];
       tmp *= tmp;
     
       if (fabs(inertia[ibody][k-1]) < 1e-6) tmp = 0.0;
       else tmp /= (8.0 * inertia[ibody][k-1]); 
       ke_q += tmp;
     }
   }
   
   energy = (ke_t + ke_q) * mvv2e;
   
   if (tstat_flag) {
   
     // thermostat chain energy: from equation 12 in Kameraj et al (JCP 2005)
 
     energy += kt * (nf_t * eta_t[0] + nf_r * eta_r[0]);
   
     for (i = 1; i < t_chain; i++) 
       energy += kt * (eta_t[i] + eta_r[i]);
   
     for (i = 0;  i < t_chain; i++) {
       energy += 0.5 * q_t[i] * (eta_dot_t[i] * eta_dot_t[i]);
       energy += 0.5 * q_r[i] * (eta_dot_r[i] * eta_dot_r[i]);
     }
   }
   
   if (pstat_flag) {
 
     // using equation 22 in Kameraj et al for H_NPT
 
     for (i = 0; i < 3; i++)
       energy += 0.5 * epsilon_mass[i] * epsilon_dot[i] * epsilon_dot[i];
   
     double vol;
     if (dimension == 2) vol = domain->xprd * domain->yprd;
     else vol = domain->xprd * domain->yprd * domain->zprd;
 
     double p0 = (p_target[0] + p_target[1] + p_target[2]) / 3.0;
     energy += p0 * vol / nktv2p;
   
     for (i = 0;  i < p_chain; i++) {
       energy += kt * eta_b[i];
       energy += 0.5 * q_b[i] * (eta_dot_b[i] * eta_dot_b[i]);
     }
   }
   
   return energy;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigidNH::couple()
 {
   double *tensor = pressure->vector;
  
   if (pstyle == ISO) {
     p_current[0] = p_current[1] = p_current[2] = pressure->scalar;
   } else if (pcouple == XYZ) {
     double ave = 1.0/3.0 * (tensor[0] + tensor[1] + tensor[2]);
     p_current[0] = p_current[1] = p_current[2] = ave;
   } else if (pcouple == XY) {
     double ave = 0.5 * (tensor[0] + tensor[1]);
     p_current[0] = p_current[1] = ave;
     p_current[2] = tensor[2];
   } else if (pcouple == YZ) {
     double ave = 0.5 * (tensor[1] + tensor[2]);
     p_current[1] = p_current[2] = ave;
     p_current[0] = tensor[0];
   } else if (pcouple == XZ) {
     double ave = 0.5 * (tensor[0] + tensor[2]);
     p_current[0] = p_current[2] = ave;
     p_current[1] = tensor[1];
   } else {
     p_current[0] = tensor[0];
     p_current[1] = tensor[1];
     p_current[2] = tensor[2];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigidNH::remap()
 {
   int i;
   double oldlo,oldhi,ctr,expfac;
   
   double **x = atom->x;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
   
   // epsilon is not used, except for book-keeping
   
   for (i = 0; i < 3; i++) epsilon[i] += dtq * epsilon_dot[i];
   
   // convert pertinent atoms and rigid bodies to lamda coords
   
   if (allremap) domain->x2lamda(nlocal);
   else {
     for (i = 0; i < nlocal; i++)
       if (mask[i] & dilate_group_bit)
         domain->x2lamda(x[i],x[i]);
   }
   
   if (nrigid)
     for (i = 0; i < nrigidfix; i++)
       modify->fix[rfix[i]]->deform(0);
   
   // reset global and local box to new size/shape
   
   for (i = 0; i < 3; i++) {
     if (p_flag[i]) {
       oldlo = domain->boxlo[i];
       oldhi = domain->boxhi[i];
       ctr = 0.5 * (oldlo + oldhi);
       expfac = exp(dtq * epsilon_dot[i]);
       domain->boxlo[i] = (oldlo-ctr)*expfac + ctr;
       domain->boxhi[i] = (oldhi-ctr)*expfac + ctr;
     }
   }
 
   domain->set_global_box();
   domain->set_local_box();
   
   // convert pertinent atoms and rigid bodies back to box coords
   
   if (allremap) domain->lamda2x(nlocal);
   else {
     for (i = 0; i < nlocal; i++)
       if (mask[i] & dilate_group_bit)
         domain->lamda2x(x[i],x[i]);
   }
   
   if (nrigid)
     for (i = 0; i< nrigidfix; i++)
       modify->fix[rfix[i]]->deform(1);
 }
 
 /* ----------------------------------------------------------------------
    compute target temperature and kinetic energy
 -----------------------------------------------------------------------*/
 
 void FixRigidNH::compute_temp_target()
 {
   double delta = update->ntimestep - update->beginstep;
   if (delta != 0.0) delta /= update->endstep - update->beginstep;
       
   t_target = t_start + delta * (t_stop-t_start);
 }
 
 /* ----------------------------------------------------------------------
    compute hydrostatic target pressure
 -----------------------------------------------------------------------*/
 
 void FixRigidNH::compute_press_target()
 {
   double delta = update->ntimestep - update->beginstep;
   if (delta != 0.0) delta /= update->endstep - update->beginstep;
       
   p_hydro = 0.0;
   for (int i = 0; i < 3; i++)
     if (p_flag[i]) {
       p_target[i] = p_start[i] + delta * (p_stop[i]-p_start[i]);
       p_hydro += p_target[i];
     }
   p_hydro /= pdim;
 }
 
 /* ----------------------------------------------------------------------
    update epsilon_dot
 -----------------------------------------------------------------------*/
 
 void FixRigidNH::nh_epsilon_dot()
 {
   int i;
   double volume,scale,f_epsilon;
 
   if (dimension == 2) volume = domain->xprd*domain->yprd;
   else volume = domain->xprd*domain->yprd*domain->zprd;
 
   // MTK terms
   
   mtk_term1 = (akin_t + akin_r) * mvv2e / g_f;
   
   scale = exp(-1.0 * dtq * eta_dot_b[0]);
 
   for (i = 0; i < 3; i++)
     if (p_flag[i]) {
       f_epsilon = (p_current[i]-p_hydro)*volume / nktv2p + mtk_term1;
       f_epsilon /= epsilon_mass[i];
       epsilon_dot[i] += dtq * f_epsilon;
       epsilon_dot[i] *= scale;
     }
   
   mtk_term2 = 0.0;
   for (i = 0; i < 3; i++)
     if (p_flag[i]) mtk_term2 += epsilon_dot[i];
   mtk_term2 /= g_f;
 }
 
 /* ----------------------------------------------------------------------
    pack entire state of Fix into one write 
 ------------------------------------------------------------------------- */
 
 void FixRigidNH::write_restart(FILE *fp)
 {
   if (tstat_flag == 0 && pstat_flag == 0) return;
   
   int nsize = 2; // tstat_flag and pstat_flag
   
   if (tstat_flag) {
     nsize += 1;         // t_chain
     nsize += 4*t_chain; // eta_t, eta_r, eta_dot_t, eta_dot_r
   }
 
   if (pstat_flag) {
     nsize += 7;         // p_chain, epsilon(3) and epsilon_dot(3)
     nsize += 2*p_chain;
   }
   
   double *list;
   memory->create(list,nsize,"rigid_nh:list");
   
   int n = 0;
   
   list[n++] = tstat_flag;
   if (tstat_flag) {
     list[n++] = t_chain;
     for (int i = 0; i < t_chain; i++) {
       list[n++] = eta_t[i];
       list[n++] = eta_r[i];
       list[n++] = eta_dot_t[i];
       list[n++] = eta_dot_r[i];
     }
   }
   
   list[n++] = pstat_flag;
   if (pstat_flag) {
     list[n++] = epsilon[0];
     list[n++] = epsilon[1];
     list[n++] = epsilon[2];
     list[n++] = epsilon_dot[0];
     list[n++] = epsilon_dot[1];
     list[n++] = epsilon_dot[2];
         
     list[n++] = p_chain;
     for (int i = 0; i < p_chain; i++) {
       list[n++] = eta_b[i];
       list[n++] = eta_dot_b[i];
     }
   }
   
   if (comm->me == 0) {
     int size = (nsize)*sizeof(double);
     fwrite(&size,sizeof(int),1,fp);
     fwrite(list,sizeof(double),nsize,fp);
   }
     
   memory->destroy(list);
 }
 
 /* ----------------------------------------------------------------------
    use state info from restart file to restart the Fix 
 ------------------------------------------------------------------------- */
 
 void FixRigidNH::restart(char *buf)
 {
   int n = 0;
   double *list = (double *) buf;
   int flag = static_cast<int> (list[n++]);
   
   if (flag) {
     int m = static_cast<int> (list[n++]);
     if (tstat_flag && m == t_chain) {
       for (int i = 0; i < t_chain; i++) {
         eta_t[i] = list[n++];
         eta_r[i] = list[n++];
         eta_dot_t[i] = list[n++];
         eta_dot_r[i] = list[n++];
       }
     } else n += 4*m;
   }
 
   flag = static_cast<int> (list[n++]);
   if (flag) {
     epsilon[0] = list[n++];
     epsilon[1] = list[n++];
     epsilon[2] = list[n++];
     epsilon_dot[0] = list[n++];
     epsilon_dot[1] = list[n++];
     epsilon_dot[2] = list[n++];
     
     int m = static_cast<int> (list[n++]);
     if (pstat_flag && m == p_chain) {
       for (int i = 0; i < p_chain; i++) {
         eta_b[i] = list[n++];
         eta_dot_b[i] = list[n++];
       }
     } else n += 2*m;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixRigidNH::modify_param(int narg, char **arg)
 {
   if (strcmp(arg[0],"temp") == 0) {
     if (narg < 2) error->all(FLERR,"Illegal fix_modify command");
     if (!pstat_flag) error->all(FLERR,"Illegal fix_modify command");
     if (tcomputeflag) {
       modify->delete_compute(id_temp);
       tcomputeflag = 0;
     }
     delete [] id_temp;
     int n = strlen(arg[1]) + 1;
     id_temp = new char[n];
     strcpy(id_temp,arg[1]);
 
     int icompute = modify->find_compute(arg[1]);
     if (icompute < 0)
       error->all(FLERR,"Could not find fix_modify temperature ID");
     temperature = modify->compute[icompute];
 
     if (temperature->tempflag == 0)
       error->all(FLERR,
                  "Fix_modify temperature ID does not compute temperature");
     if (temperature->igroup != 0 && comm->me == 0)
       error->warning(FLERR,"Temperature for fix modify is not for group all");
 
     // reset id_temp of pressure to new temperature ID
 
     if (pstat_flag) {
       icompute = modify->find_compute(id_press);
       if (icompute < 0)
         error->all(FLERR,"Pressure ID for fix modify does not exist");
       modify->compute[icompute]->reset_extra_compute_fix(id_temp);
     }
 
     return 2;
 
   } else if (strcmp(arg[0],"press") == 0) {
     if (narg < 2) error->all(FLERR,"Illegal fix_modify command");
     if (!pstat_flag) error->all(FLERR,"Illegal fix_modify command");
     if (pcomputeflag) {
       modify->delete_compute(id_press);
       pcomputeflag = 0;
     }
     delete [] id_press;
     int n = strlen(arg[1]) + 1;
     id_press = new char[n];
     strcpy(id_press,arg[1]);
 
     int icompute = modify->find_compute(arg[1]);
     if (icompute < 0) error->all(FLERR,"Could not find fix_modify pressure ID");
     pressure = modify->compute[icompute];
 
     if (pressure->pressflag == 0)
       error->all(FLERR,"Fix_modify pressure ID does not compute pressure");
     return 2;
   }
 
   return 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigidNH::allocate_chain()
 {
   if (tstat_flag) {
     q_t = new double[t_chain];
     q_r = new double[t_chain];
     eta_t = new double[t_chain];
     eta_r = new double[t_chain];
     eta_dot_t = new double[t_chain];
     eta_dot_r = new double[t_chain];
     f_eta_t = new double[t_chain];
     f_eta_r = new double[t_chain];
   }
   
   if (pstat_flag) {
     q_b = new double[p_chain];
     eta_b = new double[p_chain];
     eta_dot_b = new double[p_chain];
     f_eta_b = new double[p_chain];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigidNH::reset_target(double t_new)
 {
   t_start = t_stop = t_new;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigidNH::allocate_order()
 {
   w = new double[t_order];
   wdti1 = new double[t_order];
   wdti2 = new double[t_order];
   wdti4 = new double[t_order];
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigidNH::deallocate_chain()
 {
   if (tstat_flag) {
     delete [] q_t;
     delete [] q_r;
     delete [] eta_t;
     delete [] eta_r;
     delete [] eta_dot_t;
     delete [] eta_dot_r;
     delete [] f_eta_t;
     delete [] f_eta_r;
   }
   
   if (pstat_flag) {
     delete [] q_b;
     delete [] eta_b;
     delete [] eta_dot_b;
     delete [] f_eta_b;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigidNH::deallocate_order()
 {
   delete [] w;
   delete [] wdti1;
   delete [] wdti2;
   delete [] wdti4;
 }
 
diff --git a/src/RIGID/fix_rigid_nh.h b/src/RIGID/fix_rigid_nh.h
index f19efc65f..f62f2d889 100644
--- a/src/RIGID/fix_rigid_nh.h
+++ b/src/RIGID/fix_rigid_nh.h
@@ -1,194 +1,183 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifndef LMP_FIX_RIGID_NH_H
 #define LMP_FIX_RIGID_NH_H
 
 #include "fix_rigid.h"
 
 namespace LAMMPS_NS {
 
 class FixRigidNH : public FixRigid {
  public:
   FixRigidNH(class LAMMPS *, int, char **);
   virtual ~FixRigidNH();
   virtual int setmask();
   virtual void init();
   virtual void setup(int);
   virtual void initial_integrate(int);
   virtual void final_integrate();
   virtual double compute_scalar();
   int modify_param(int, char **);
   void write_restart(FILE *);
   void restart(char *buf);
   void reset_target(double);
   
  protected:
   double **conjqm;                    // conjugate quaternion momentum
   double boltz,nktv2p,mvv2e;          // boltzman constant, conversion factors
 
   int nf_t,nf_r;                      // trans/rot degrees of freedom
-  double onednft,onednfr;             // factors 1 + dimension/trans(rot) degrees of freedom
+  double onednft,onednfr;             // factors 1 + dimension/trans(rot) 
+                                      //   degrees of freedom
   double *w,*wdti1,*wdti2,*wdti4;     // Yoshida-Suzuki coefficients
   double *q_t,*q_r;                   // trans/rot thermostat masses
   double *eta_t,*eta_r;               // trans/rot thermostat positions
   double *eta_dot_t,*eta_dot_r;       // trans/rot thermostat velocities
   double *f_eta_t,*f_eta_r;           // trans/rot thermostat forces
   
   double epsilon_mass[3], *q_b;       // baro/thermo masses
   double epsilon[3],*eta_b;           // baro/thermo positions
   double epsilon_dot[3],*eta_dot_b;   // baro/thermo velocities
   double *f_eta_b;                    // thermo forces
   double akin_t,akin_r;               // translational/rotational kinetic energies
   
   int kspace_flag;                    // 1 if KSpace invoked, 0 if not
   int nrigidfix;                      // number of rigid fixes
   int *rfix;                          // indicies of rigid fixes
 
   double vol0;                        // reference volume
   double t0;                          // reference temperature
   int pdim,g_f;                       // number of barostatted dims, total DoFs
   double p_hydro;                     // hydrostatic target pressure
   double p_freq_max;                  // maximum barostat frequency
   
   double mtk_term1,mtk_term2;         // Martyna-Tobias-Klein corrections
   
   double t_current,t_target;
   double p_current[3],p_target[3];
 
   char *id_temp,*id_press;
   class Compute *temperature,*pressure;
   int tcomputeflag,pcomputeflag;
 
   void couple();
   void remap();  
   void nhc_temp_integrate();
   void nhc_press_integrate();
 
   virtual void compute_temp_target();
   void compute_press_target();
   void nh_epsilon_dot();
 
   void allocate_chain();
   void allocate_order();
   void deallocate_chain();
   void deallocate_order();
 
   inline double maclaurin_series(double);
 };
 
 inline double FixRigidNH::maclaurin_series(double x)
 {
   double x2,x4;
   x2 = x * x;
   x4 = x2 * x2;
   return (1.0 + (1.0/6.0) * x2 + (1.0/120.0) * x4 + (1.0/5040.0) * x2 * x4 +
          (1.0/362880.0) * x4 * x4);
 }
 
 }
 
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Fix rigid npt/nph period must be > 0.0
 
-UNDOCUMENTED
+Self-explanatory.
 
 E: Invalid fix rigid npt/nph command for a 2d simulation
 
 Cannot control z dimension in a 2d model.
 
 E: Invalid fix rigid npt/nph command pressure settings
 
 If multiple dimensions are coupled, those dimensions must be
 specified.
 
 E: Cannot use fix rigid npt/nph on a non-periodic dimension
 
 When specifying a diagonal pressure component, the dimension must be
 periodic.
 
 E: Invalid fix rigid npt/nph pressure settings
 
 Settings for coupled dimensions must be the same.
 
 E: Fix rigid nvt/npt/nph damping parameters must be > 0.0
 
 Self-explanatory.
 
 E: Fix rigid npt/nph dilate group ID does not exist
 
 Self-explanatory.
 
-E: Temp ID for fix rigid npt/nph does not exist
+E: Temperature ID for fix rigid nvt/npt does not exist
 
-UNDOCUMENTED
+Self-explanatory
 
 E: fix rigid npt/nph does not yet allow triclinic box
 
-UNDOCUMENTED
+This is a current restriction of the command.
 
 E: Cannot use fix rigid npt/nph and fix deform on same component of stress tensor
 
 This would be changing the same box dimension twice.
 
-E: Press ID for fix rigid npt/nph does not exist
+E: Pressure ID for fix rigid npt/nph does not exist
 
-UNDOCUMENTED
+Self-explanatory.
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Could not find fix_modify temperature ID
 
 The compute ID for computing temperature does not exist.
 
 E: Fix_modify temperature ID does not compute temperature
 
 The compute ID assigned to the fix must compute temperature.
 
 W: Temperature for fix modify is not for group all
 
 The temperature compute is being used with a pressure calculation
 which does operate on group all, so this may be inconsistent.
 
 E: Pressure ID for fix modify does not exist
 
 Self-explanatory.
 
 E: Could not find fix_modify pressure ID
 
 The compute ID for computing pressure does not exist.
 
 E: Fix_modify pressure ID does not compute pressure
 
 The compute ID assigned to the fix must compute pressure.
 
-U: Target temperature for fix rigid nvt/npt cannot be 0.0
-
-Self-explanatory.
-
-U: Temperature ID for fix rigid npt/nph does not exist
-
-Self-explanatory.
-
-U: Pressure ID for fix rigid npt/nph does not exist
-
-Self-explanatory.
-
 */
diff --git a/src/RIGID/fix_rigid_nph.cpp b/src/RIGID/fix_rigid_nph.cpp
index 49be257e0..3c9328ea3 100644
--- a/src/RIGID/fix_rigid_nph.cpp
+++ b/src/RIGID/fix_rigid_nph.cpp
@@ -1,92 +1,90 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Tony Sheh (U Michigan), Trung Dac Nguyen (U Michigan)
    references: Kamberaj et al., J. Chem. Phys. 122, 224114 (2005)
                Miller et al., J Chem Phys. 116, 8649-8659 (2002)
 ------------------------------------------------------------------------- */
 
 #include "string.h"
 #include "fix_rigid_nph.h"
 #include "domain.h"
 #include "modify.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 FixRigidNPH::FixRigidNPH(LAMMPS *lmp, int narg, char **arg) :
   FixRigidNH(lmp, narg, arg)
 { 
   // other setting are made by parent
 
   scalar_flag = 1;
   restart_global = 1;
   box_change = 1;
   extscalar = 1;
     
   // error checks
 
   if (pstat_flag == 0)
-    error->all(FLERR,"Pressure control must be used with fix nph");
+    error->all(FLERR,"Did not set pressure for fix rigid/nph");
   if (tstat_flag == 1)
-    error->all(FLERR,"Temperature control must not be used with fix nph");
-  if (p_start[0] < 0.0 || p_start[1] < 0.0 || p_start[2] < 0.0 || 
-      p_stop[0] < 0.0 || p_stop[1] < 0.0 || p_stop[2] < 0.0)
-    error->all(FLERR,"Target pressure for fix rigid/nph cannot be 0.0");
-  
+    error->all(FLERR,"Cannot set temperature for fix rigid/nph");
+
   // convert input periods to frequency
+
   p_freq[0] = p_freq[1] = p_freq[2] = 0.0;
 
   if (p_flag[0]) p_freq[0] = 1.0 / p_period[0];
   if (p_flag[1]) p_freq[1] = 1.0 / p_period[1];
   if (p_flag[2]) p_freq[2] = 1.0 / p_period[2];
 
   // create a new compute temp style
   // id = fix-ID + temp
   // compute group = all since pressure is always global (group all)
   //   and thus its KE/temperature contribution should use group all
 
   int n = strlen(id) + 6;
   id_temp = new char[n];
   strcpy(id_temp,id);
   strcat(id_temp,"_temp");
 
   char **newarg = new char*[3];
   newarg[0] = id_temp;
   newarg[1] = (char *) "all";
   newarg[2] = (char *) "temp";  
   modify->add_compute(3,newarg);
   delete [] newarg;
   tcomputeflag = 1;
   
   // create a new compute pressure style
   // id = fix-ID + press, compute group = all
   // pass id_temp as 4th arg to pressure constructor
   
   n = strlen(id) + 7;
   id_press = new char[n];
   strcpy(id_press,id);
   strcat(id_press,"_press");
 
   newarg = new char*[4];
   newarg[0] = id_press;
   newarg[1] = (char *) "all";
   newarg[2] = (char *) "pressure";
   newarg[3] = id_temp;
   modify->add_compute(4,newarg);
   delete [] newarg;
   pcomputeflag = 1;
 }
diff --git a/src/RIGID/fix_rigid_nph.h b/src/RIGID/fix_rigid_nph.h
index 49e9ab97a..70208d959 100644
--- a/src/RIGID/fix_rigid_nph.h
+++ b/src/RIGID/fix_rigid_nph.h
@@ -1,53 +1,49 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef FIX_CLASS
 
 FixStyle(rigid/nph,FixRigidNPH)
 
 #else
 
 #ifndef LMP_FIX_RIGID_NPH_H
 #define LMP_FIX_RIGID_NPH_H
 
 #include "fix_rigid_nh.h"
 
 namespace LAMMPS_NS {
 
 class FixRigidNPH : public FixRigidNH {
  public:
   FixRigidNPH(class LAMMPS *, int, char **);
   ~FixRigidNPH() {}
 };
 
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
-E: Pressure control must be used with fix nph
+E: Did not set pressure for fix rigid/nph
 
-UNDOCUMENTED
+The press keyword must be specified.
 
-E: Temperature control must not be used with fix nph
+E: Cannot set temperature for fix rigid/nph
 
-UNDOCUMENTED
-
-E: Target pressure for fix rigid/nph cannot be 0.0
-
-UNDOCUMENTED
+The temp keyword cannot be specified.
 
 */
diff --git a/src/RIGID/fix_rigid_npt.cpp b/src/RIGID/fix_rigid_npt.cpp
index 235faaba5..8a600c367 100644
--- a/src/RIGID/fix_rigid_npt.cpp
+++ b/src/RIGID/fix_rigid_npt.cpp
@@ -1,104 +1,100 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Tony Sheh (U Michigan), Trung Dac Nguyen (U Michigan)
    references: Kamberaj et al., J. Chem. Phys. 122, 224114 (2005)
                Miller et al., J Chem Phys. 116, 8649-8659 (2002)
 ------------------------------------------------------------------------- */
 
 #include "string.h"
 #include "fix_rigid_npt.h"
 #include "domain.h"
 #include "modify.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 FixRigidNPT::FixRigidNPT(LAMMPS *lmp, int narg, char **arg) :
   FixRigidNH(lmp, narg, arg)
 { 
   // other setting are made by parent
 
   scalar_flag = 1;
   restart_global = 1;
   box_change = 1;
   extscalar = 1;
   
   // error checks
 
   if (tstat_flag == 0 || pstat_flag == 0)
-    error->all(FLERR,"Did not set temp or press for fix rigid/npt");
+    error->all(FLERR,"Did not set temperature or pressure for fix rigid/npt");
   if (t_start <= 0.0 || t_stop <= 0.0)
     error->all(FLERR,"Target temperature for fix rigid/npt cannot be 0.0");
-  if (p_start[0] < 0.0 || p_start[1] < 0.0 || p_start[2] < 0.0 || 
-      p_stop[0] < 0.0 || p_stop[1] < 0.0 || p_stop[2] < 0.0)
-    error->all(FLERR,"Target pressure for fix rigid/npt cannot be 0.0");
-
   if (t_period <= 0.0) error->all(FLERR,"Fix rigid/npt period must be > 0.0");
 
   // thermostat chain parameters
 
-  if (t_chain < 1) error->all(FLERR,"Illegal fix_modify command");
-  if (t_iter < 1) error->all(FLERR,"Illegal fix_modify command");
+  if (t_chain < 1) error->all(FLERR,"Illegal fix rigid/npt command");
+  if (t_iter < 1) error->all(FLERR,"Illegal fix rigid/npt command");
   if (t_order != 3 && t_order != 5) 
-    error->all(FLERR,"Fix_modify order must be 3 or 5");
+    error->all(FLERR,"Fix rigid/npt temperature order must be 3 or 5");
 
   // convert input periods to frequency
 
   t_freq = 0.0;
   p_freq[0] = p_freq[1] = p_freq[2] = 0.0;
 
   t_freq = 1.0 / t_period;
   if (p_flag[0]) p_freq[0] = 1.0 / p_period[0];
   if (p_flag[1]) p_freq[1] = 1.0 / p_period[1];
   if (p_flag[2]) p_freq[2] = 1.0 / p_period[2];
 
   // create a new compute temp style
   // id = fix-ID + temp
   // compute group = all since pressure is always global (group all)
   //   and thus its KE/temperature contribution should use group all
 
   int n = strlen(id) + 6;
   id_temp = new char[n];
   strcpy(id_temp,id);
   strcat(id_temp,"_temp");
 
   char **newarg = new char*[3];
   newarg[0] = id_temp;
   newarg[1] = (char *) "all";
   newarg[2] = (char *) "temp";  
   modify->add_compute(3,newarg);
   delete [] newarg;
   tcomputeflag = 1;
   
   // create a new compute pressure style
   // id = fix-ID + press, compute group = all
   // pass id_temp as 4th arg to pressure constructor
   
   n = strlen(id) + 7;
   id_press = new char[n];
   strcpy(id_press,id);
   strcat(id_press,"_press");
 
   newarg = new char*[4];
   newarg[0] = id_press;
   newarg[1] = (char *) "all";
   newarg[2] = (char *) "pressure";
   newarg[3] = id_temp;
   modify->add_compute(4,newarg);
   delete [] newarg;
   pcomputeflag = 1;
 }
diff --git a/src/RIGID/fix_rigid_npt.h b/src/RIGID/fix_rigid_npt.h
index f72955248..e36d5375c 100644
--- a/src/RIGID/fix_rigid_npt.h
+++ b/src/RIGID/fix_rigid_npt.h
@@ -1,65 +1,63 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef FIX_CLASS
 
 FixStyle(rigid/npt,FixRigidNPT)
 
 #else
 
 #ifndef LMP_FIX_RIGID_NPT_H
 #define LMP_FIX_RIGID_NPT_H
 
 #include "fix_rigid_nh.h"
 
 namespace LAMMPS_NS {
 
 class FixRigidNPT : public FixRigidNH {
  public:
   FixRigidNPT(class LAMMPS *, int, char **);
   ~FixRigidNPT() {}
 };
 
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
-E: Did not set temp or press for fix rigid/npt
+E: Did not set temperature or pressure for fix rigid/npt
 
-UNDOCUMENTED
+The temp and press keywords must be specified.
 
 E: Target temperature for fix rigid/npt cannot be 0.0
 
-UNDOCUMENTED
-
-E: Target pressure for fix rigid/npt cannot be 0.0
-
-UNDOCUMENTED
+Self-explanatory.
 
 E: Fix rigid/npt period must be > 0.0
 
-UNDOCUMENTED
+Self-explanatory.
 
 E: Illegal ... command
 
-UNDOCUMENTED
+Self-explanatory.  Check the input script syntax and compare to the
+documentation for the command.  You can use -echo screen as a
+command-line option when running LAMMPS to see the offending line.
 
-E: Fix_modify order must be 3 or 5
+E: Fix rigid/npt temperature order must be 3 or 5
 
-UNDOCUMENTED
+Self-explanatory.
 
 */
diff --git a/src/RIGID/fix_rigid_nvt.cpp b/src/RIGID/fix_rigid_nvt.cpp
index abf681d90..0f5593777 100644
--- a/src/RIGID/fix_rigid_nvt.cpp
+++ b/src/RIGID/fix_rigid_nvt.cpp
@@ -1,50 +1,50 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Tony Sheh (U Michigan), Trung Dac Nguyen (U Michigan)
    references: Kamberaj et al., J. Chem. Phys. 122, 224114 (2005)
                Miller et al., J Chem Phys. 116, 8649-8659 (2002)
 ------------------------------------------------------------------------- */
 
 #include "fix_rigid_nvt.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 FixRigidNVT::FixRigidNVT(LAMMPS *lmp, int narg, char **arg) :
   FixRigidNH(lmp, narg, arg)
 { 
   // other settings are made by parent
 
   scalar_flag = 1;
   restart_global = 1;
   extscalar = 1;
   
   // error checking
   // convert input period to frequency
 
   if (tstat_flag == 0)
-    error->all(FLERR,"Did not set temp for fix rigid/nvt");
+    error->all(FLERR,"Did not set temperature for fix rigid/nvt");
   if (t_start < 0.0 || t_stop <= 0.0)
     error->all(FLERR,"Target temperature for fix rigid/nvt cannot be 0.0");
   if (t_period <= 0.0) error->all(FLERR,"Fix rigid/nvt period must be > 0.0");
   t_freq = 1.0 / t_period;
 
-  if (t_chain < 1) error->all(FLERR,"Illegal fix_modify command");
-  if (t_iter < 1) error->all(FLERR,"Illegal fix_modify command");
+  if (t_chain < 1) error->all(FLERR,"Illegal fix rigid/nvt command");
+  if (t_iter < 1) error->all(FLERR,"Illegal fix rigid/nvt  command");
   if (t_order != 3 && t_order != 5) 
-    error->all(FLERR,"Fix_modify order must be 3 or 5"); 
+    error->all(FLERR,"Fix rigid/nvt temperature order must be 3 or 5"); 
 }
diff --git a/src/RIGID/fix_rigid_nvt.h b/src/RIGID/fix_rigid_nvt.h
index 7248cf96d..0a0b8b151 100644
--- a/src/RIGID/fix_rigid_nvt.h
+++ b/src/RIGID/fix_rigid_nvt.h
@@ -1,60 +1,62 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef FIX_CLASS
 
 FixStyle(rigid/nvt,FixRigidNVT)
 
 #else
 
 #ifndef LMP_FIX_RIGID_NVT_H
 #define LMP_FIX_RIGID_NVT_H
 
 #include "fix_rigid_nh.h"
 
 namespace LAMMPS_NS {
 
 class FixRigidNVT : public FixRigidNH {
  public:
   FixRigidNVT(class LAMMPS *, int, char **);
   ~FixRigidNVT() {}
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
-E: Did not set temp for fix rigid/nvt
+E: Illegal ... command
 
-UNDOCUMENTED
+Self-explanatory.  Check the input script syntax and compare to the
+documentation for the command.  You can use -echo screen as a
+command-line option when running LAMMPS to see the offending line.
 
-E: Target temperature for fix rigid/nvt cannot be 0.0
+E: Did not set temperature for fix rigid/nvt
 
-UNDOCUMENTED
+The temp keyword must be specified.
 
-E: Fix rigid/nvt period must be > 0.0
+E: Target temperature for fix rigid/nvt cannot be 0.0
 
-UNDOCUMENTED
+Self-explanatory.
 
-E: Illegal ... command
+E: Fix rigid/nvt period must be > 0.0
 
-UNDOCUMENTED
+Self-explanatory.
 
-E: Fix_modify order must be 3 or 5
+E: Fix rigid/nvt temperature order must be 3 or 5
 
-UNDOCUMENTED
+Self-explanatory.
 
 */
diff --git a/src/RIGID/fix_rigid_small.h b/src/RIGID/fix_rigid_small.h
index 866aa113b..8eca0e8d5 100644
--- a/src/RIGID/fix_rigid_small.h
+++ b/src/RIGID/fix_rigid_small.h
@@ -1,323 +1,229 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef FIX_CLASS
 
 FixStyle(rigid/small,FixRigidSmall)
 
 #else
 
 #ifndef LMP_FIX_RIGID_SMALL_H
 #define LMP_FIX_RIGID_SMALL_H
 
 #include "fix.h"
 
 // replace this later
 #include <map>
 
 namespace LAMMPS_NS {
 
 class FixRigidSmall : public Fix {
  public:
   // static variable for ring communication callback to access class data
 
   static FixRigidSmall *frsptr;
 
   FixRigidSmall(class LAMMPS *, int, char **);
   virtual ~FixRigidSmall();
   virtual int setmask();
   virtual void init();
   virtual void setup(int);
   virtual void initial_integrate(int);
   void post_force(int);
   virtual void final_integrate();
   void initial_integrate_respa(int, int, int);
   void final_integrate_respa(int, int);
 
   void grow_arrays(int);
   void copy_arrays(int, int, int);
   void set_arrays(int);
   int pack_exchange(int, double *);
   int unpack_exchange(int, double *);
   int pack_comm(int, int *, double *, int, int *);
   void unpack_comm(int, int, double *);
   int pack_reverse_comm(int, int, double *);
   void unpack_reverse_comm(int, int *, double *);
 
   void setup_pre_neighbor();
   void pre_neighbor();
   int dof(int);
   void deform(int);
   void reset_dt();
   double compute_scalar();
   double memory_usage();
 
  protected:
   int me,nprocs;
   double dtv,dtf,dtq;
   double *step_respa;
   int triclinic;
   double MINUSPI,TWOPI;
 
   int firstflag;            // 1 for first-time setup of rigid bodies
   int commflag;             // various modes of forward/reverse comm
   int nbody;                // total # of rigid bodies
   double maxextent;         // furthest distance from body owner to body atom
 
   struct Body {
     double mass;              // total mass of body
     double xcm[3];            // COM position
     double vcm[3];            // COM velocity
     double fcm[3];            // force on COM
     double torque[3];         // torque around COM
     double quat[4];           // quaternion for orientation of body
     double inertia[3];        // 3 principal components of inertia
     double ex_space[3];       // principal axes in space coords
     double ey_space[3];
     double ez_space[3];
     double angmom[3];         // space-frame angular momentum of body
     double omega[3];          // space-frame omega of body
     tagint image;             // image flags of xcm
     int remapflag[4];         // PBC remap flags
     int ilocal;               // index of owning atom
   };
 
   Body *body;               // list of rigid bodies, owned and ghost
   int nlocal_body;          // # of owned rigid bodies
   int nghost_body;          // # of ghost rigid bodies
   int nmax_body;            // max # of bodies that body can hold
   int bodysize;             // sizeof(Body) in doubles
 
   // per-atom quantities
   // only defined for owned atoms, except bodyown for own+ghost
 
   int *bodyown;         // index of body if atom owns a body, -1 if not
   int *bodytag;         // ID of body this atom is in, 0 if none
                         // ID = tag of atom that owns body
   int *atom2body;       // index of owned/ghost body this atom is in, -1 if not
                         // can point to original or any image of the body
   double **displace;    // displacement of each atom in body coords
 
   int *eflags;              // flags for extended particles
   double **orient;          // orientation vector of particle wrt rigid body
   double **dorient;         // orientation of dipole mu wrt rigid body
 
   int extended;             // 1 if any particles have extended attributes
   int orientflag;           // 1 if particles store spatial orientation
   int dorientflag;          // 1 if particles store dipole orientation
 
   int POINT,SPHERE,ELLIPSOID,LINE,TRIANGLE,DIPOLE;   // bitmasks for eflags
   int OMEGA,ANGMOM,TORQUE;
 
   class AtomVecEllipsoid *avec_ellipsoid;
   class AtomVecLine *avec_line;
   class AtomVecTri *avec_tri;
 
   // temporary per-body storage
 
   int **counts;            // counts of atom types in bodies
   double **itensor;        // 6 space-frame components of inertia tensor
 
   // Langevin thermostatting
 
   int langflag;                     // 0/1 = no/yes Langevin thermostat
   double t_start,t_stop,t_period;   // thermostat params
   double **langextra;               // Langevin thermostat forces and torques
   int maxlang;                      // max size of langextra
   class RanMars *random;            // RNG
 
   // class data used by ring communication callbacks
 
   std::map<int,int> *hash;
   double **bbox;
   double **ctr;
   int *idclose;
   double *rsqclose;
   double rsqfar;
 
   void set_xv();
   void set_v();
   void create_bodies();
   void setup_bodies();
   void grow_body();
   void reset_atom2body();
 
   // callback functions for ring communication
 
   static void ring_bbox(int, char *);
   static void ring_nearest(int, char *);
   static void ring_farthest(int, char *);
 
   // debug
 
   //void check(int);
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Fix rigid/small langevin period must be > 0.0
 
-UNDOCUMENTED
+Self-explanatory.
 
 E: Fix rigid/small requires atom attribute molecule
 
-UNDOCUMENTED
+Self-explanatory.
 
 E: No rigid bodies defined
 
 The fix specification did not end up defining any rigid bodies.
 
 W: More than one fix rigid
 
 It is not efficient to use fix rigid more than once.
 
 E: Rigid fix must come before NPT/NPH fix
 
 NPT/NPH fix must be defined in input script after all rigid fixes,
 else the rigid fix contribution to the pressure virial is
 incorrect.
 
 W: Computing temperature of portions of rigid bodies
 
 The group defined by the temperature compute does not encompass all
 the atoms in one or more rigid bodies, so the change in
 degrees-of-freedom for the atoms in those partial rigid bodies will
 not be accounted for.
 
 E: Fix rigid/small atom has non-zero image flag in a non-periodic dimension
 
-UNDOCUMENTED
+Image flags for non-periodic dimensions should not be set.
 
 E: Insufficient Jacobi rotations for rigid body
 
 Eigensolve for rigid body was not sufficiently accurate.
 
 E: Fix rigid: Bad principal moments
 
 The principal moments of inertia computed for a rigid body
 are not within the required tolerances.
 
 E: Rigid body atoms %d %d missing on proc %d at step %ld
 
-UNDOCUMENTED
-
-E: BAD AAA
-
-UNDOCUMENTED
-
-E: BAD BBB
-
-UNDOCUMENTED
-
-E: BAD CCC
-
-UNDOCUMENTED
-
-E: BAD DDD
-
-UNDOCUMENTED
-
-E: BAD EEE
-
-UNDOCUMENTED
-
-E: BAD FFF
-
-UNDOCUMENTED
-
-E: BAD GGG
-
-UNDOCUMENTED
-
-E: BAD HHH
-
-UNDOCUMENTED
-
-E: BAD III
-
-UNDOCUMENTED
-
-E: BAD JJJ
-
-UNDOCUMENTED
-
-E: BAD KKK
-
-UNDOCUMENTED
-
-E: BAD LLL
-
-UNDOCUMENTED
-
-U: Fix rigid molecule requires atom attribute molecule
-
-Self-explanatory.
-
-U: Could not find fix rigid group ID
-
-A group ID used in the fix rigid command does not exist.
-
-U: One or more atoms belong to multiple rigid bodies
-
-Two or more rigid bodies defined by the fix rigid command cannot
-contain the same atom.
-
-U: Fix rigid z force cannot be on for 2d simulation
-
-Self-explanatory.
-
-U: Fix rigid xy torque cannot be on for 2d simulation
-
-Self-explanatory.
-
-U: Fix rigid langevin period must be > 0.0
-
-Self-explanatory.
-
-U: One or zero atoms in rigid body
-
-Any rigid body defined by the fix rigid command must contain 2 or more
-atoms.
-
-U: Fix rigid atom has non-zero image flag in a non-periodic dimension
-
-You cannot set image flags for non-periodic dimensions.
-
-U: Cannot open fix rigid infile %s
-
-UNDOCUMENTED
-
-U: Unexpected end of fix rigid file
-
-UNDOCUMENTED
-
-U: Incorrect rigid body format in fix rigid file
-
-UNDOCUMENTED
-
-U: Invalid rigid body ID in fix rigid file
-
-UNDOCUMENTED
+This means that an atom cannot find the atom that owns the rigid body
+it is part of, or vice versa.  The solution is to use the communicate
+cutoff command to insure ghost atoms are acquired from far enough away
+to encompass the max distance printed when the fix rigid/small command
+was invoked.
 
 */
diff --git a/src/SHOCK/fix_append_atoms.cpp b/src/SHOCK/fix_append_atoms.cpp
index 9b2f92b9f..7fe1d4a38 100644
--- a/src/SHOCK/fix_append_atoms.cpp
+++ b/src/SHOCK/fix_append_atoms.cpp
@@ -1,514 +1,514 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "string.h"
 #include "stdlib.h"
 #include "fix_append_atoms.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "modify.h"
 #include "domain.h"
 #include "lattice.h"
 #include "update.h"
 #include "random_mars.h"
 #include "error.h"
 #include "force.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
 #define BIG      1.0e30
 #define EPSILON  1.0e-6
 
 /* ---------------------------------------------------------------------- */
 
 FixAppendAtoms::FixAppendAtoms(LAMMPS *lmp, int narg, char **arg) :
   Fix(lmp, narg, arg)
 {
   force_reneighbor = 1;
   next_reneighbor = -1;
   box_change = 1;
   time_depend = 1;
 
   if (narg < 4) error->all(FLERR,"Illegal fix append/atoms command");
 
   // default settings
 
   scaleflag = 1;
   spatflag=0;
   xloflag = xhiflag = yloflag = yhiflag = zloflag = zhiflag = 0;
 
   tempflag = 0;
 
   ranflag = 0;
   ranx = 0.0;
   rany = 0.0;
   ranz = 0.0;
 
   randomx = NULL;
   randomt = NULL;
 
   if (!domain->lattice) 
     error->all(FLERR,"Fix append/atoms requires a lattice be defined");
 
   nbasis = domain->lattice->nbasis;
   basistype = new int[nbasis];
   for (int i = 0; i < nbasis; i++) basistype[i] = 1;
 
   int iarg = 0;
   iarg = 3;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"xlo") == 0) {
       error->all(FLERR,"Only zhi currently implemented for fix append/atoms");
       xloflag = 1;
       iarg++;
       if (domain->boundary[0][0] != 3)
         error->all(FLERR,"Append boundary must be shrink/minimum");
     } else if (strcmp(arg[iarg],"xhi") == 0) {
       error->all(FLERR,"Only zhi currently implemented for fix append/atoms");
       xhiflag = 1;
       iarg++;
       if (domain->boundary[0][1] != 3)
         error->all(FLERR,"Append boundary must be shrink/minimum");
     } else if (strcmp(arg[iarg],"ylo") == 0) {
       error->all(FLERR,"Only zhi currently implemented for fix append/atoms");
       yloflag = 1;
       iarg++;
       if (domain->boundary[1][0] != 3)
         error->all(FLERR,"Append boundary must be shrink/minimum");
     } else if (strcmp(arg[iarg],"yhi") == 0) {
       error->all(FLERR,"Only zhi currently implemented for fix append/atoms");
       yhiflag = 1;
       iarg++;
       if (domain->boundary[1][1] != 3)
         error->all(FLERR,"Append boundary must be shrink/minimum");
     } else if (strcmp(arg[iarg],"zlo") == 0) {
       error->all(FLERR,"Only zhi currently implemented for fix append/atoms");
       zloflag = 1;
       iarg++;
       if (domain->boundary[2][0] != 3)
         error->all(FLERR,"Append boundary must be shrink/minimum");
     } else if (strcmp(arg[iarg],"zhi") == 0) {
       zhiflag = 1;
       iarg++;
       if (domain->boundary[2][1] != 3)
         error->all(FLERR,"Append boundary must be shrink/minimum");
     } else if (strcmp(arg[iarg],"freq") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix append/atoms command");
       freq = atoi(arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"spatial") == 0) {
       if (iarg+3 > narg) error->all(FLERR,"Illegal fix append/atoms command");
       if (strcmp(arg[iarg+1],"f_") == 0)
         error->all(FLERR,
                    "Bad fix ID in fix append/atoms command");
       spatflag = 1;
       int n = strlen(arg[iarg+1]);
       spatlead = atof(arg[iarg+2]);
       char *suffix = new char[n];
       strcpy(suffix,&arg[iarg+1][2]);
       n = strlen(suffix) + 1;
       spatialid = new char[n];
       strcpy(spatialid,suffix);
       delete [] suffix;
       iarg += 3;
     } else if (strcmp(arg[iarg],"basis") == 0) {
       if (iarg+3 > narg) error->all(FLERR,"Illegal fix append/atoms command");
       if (domain->lattice == NULL)
-        error->all(FLERR,"Must define lattice to append/atoms");
+        error->all(FLERR,"Fis append/atoms requires a lattice be defined");
       int ibasis = atoi(arg[iarg+1]);
       int itype = atoi(arg[iarg+2]);
       if (ibasis <= 0 || ibasis > nbasis || itype <= 0 || itype > atom->ntypes)
         error->all(FLERR,"Illegal fix append/atoms command");
       basistype[ibasis-1] = itype;
       iarg += 3;
     } else if (strcmp(arg[iarg],"size") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix append/atoms command");
       size = atof(arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"units") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix append/atoms command");
       if (strcmp(arg[iarg+1],"box") == 0) scaleflag = 0;
       else if (strcmp(arg[iarg+1],"lattice") == 0) scaleflag = 1;
       else error->all(FLERR,"Illegal fix append/atoms command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"random") == 0) {
       if (iarg+5 > narg) error->all(FLERR,"Illegal fix append/atoms command");
       ranflag = 1;
       ranx = atof(arg[iarg+1]);
       rany = atof(arg[iarg+2]);
       ranz = atof(arg[iarg+3]);
       xseed = atoi(arg[iarg+4]);
       if (xseed <= 0) error->all(FLERR,"Illegal fix append/atoms command");
       randomx = new RanMars(lmp,xseed + comm->me);
       iarg += 5;
     } else if (strcmp(arg[iarg],"temp") == 0) {
       if (iarg+5 > narg) error->all(FLERR,"Illegal fix append/atoms command");
       tempflag = 1;
       t_target = atof(arg[iarg+1]);
       t_period = atof(arg[iarg+2]);
       tseed    = atoi(arg[iarg+3]);
       t_extent = atof(arg[iarg+4]);
       if (t_target <= 0) error->all(FLERR,"Illegal fix append/atoms command");
       if (t_period <= 0) error->all(FLERR,"Illegal fix append/atoms command");
       if (t_extent <= 0) error->all(FLERR,"Illegal fix append/atoms command");
       if (tseed <= 0) error->all(FLERR,"Illegal fix append/atoms command");
       randomt = new RanMars(lmp,tseed + comm->me);
       gfactor1 = new double[atom->ntypes+1];
       gfactor2 = new double[atom->ntypes+1];
       iarg += 5;
     } else error->all(FLERR,"Illegal fix append/atoms command");
   }
 
   if ((xloflag || xhiflag) && domain->xperiodic)
     error->all(FLERR,"Cannot use append/atoms in periodic dimension");
   if ((yloflag || yhiflag) && domain->yperiodic)
     error->all(FLERR,"Cannot use append/atoms in periodic dimension");
   if ((zloflag || zhiflag) && domain->zperiodic)
     error->all(FLERR,"Cannot use append/atoms in periodic dimension");
 
   if (domain->triclinic == 1)
     error->all(FLERR,"Cannot append atoms to a triclinic box");
 
   // setup scaling
 
   if (scaleflag && domain->lattice == NULL)
     error->all(FLERR,"Use of fix append/atoms with undefined lattice");
 
   double xscale,yscale,zscale;
   if (scaleflag) {
     xscale = domain->lattice->xlattice;
     yscale = domain->lattice->ylattice;
     zscale = domain->lattice->zlattice;
   }
   else xscale = yscale = zscale = 1.0;
 
   if (xloflag || xhiflag) size *= xscale;
   if (yloflag || yhiflag) size *= yscale;
   if (zloflag || zhiflag) size *= zscale;
 
   if (ranflag) {
     ranx *= xscale;
     rany *= yscale;
     ranz *= zscale;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 FixAppendAtoms::~FixAppendAtoms()
 {
   delete [] basistype;
 
   if (ranflag) delete randomx;
   if (tempflag) {
     delete randomt;
     delete [] gfactor1;
     delete [] gfactor2;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixAppendAtoms::setmask()
 {
   int mask = 0;
   mask |= PRE_EXCHANGE;
   mask |= INITIAL_INTEGRATE;
   mask |= POST_FORCE;
   return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixAppendAtoms::initial_integrate(int vflag)
 {
   if (update->ntimestep % freq == 0) next_reneighbor = update->ntimestep;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixAppendAtoms::setup(int vflag)
 {
   /*** CALL TO CREATE GROUP?  SEE POST_FORCE ***/
   post_force(vflag);
 }
 
 
 /* ---------------------------------------------------------------------- */
 
 int FixAppendAtoms::get_spatial()
 {
   if (update->ntimestep % freq == 0) {
     int ifix = modify->find_fix(spatialid);
     if (ifix < 0)
       error->all(FLERR,"Fix ID for fix ave/spatial does not exist");
     Fix *fix = modify->fix[ifix];
 
     int failed = 0;
     int count = 0;
     while (failed < 2) {
       double tmp = fix->compute_vector(2*count);
       if (tmp == 0.0) failed++;
       else failed = 0;
       count++;
     }
     double *pos = new double[count-2];
     double *val = new double[count-2];
     for (int loop=0; loop < count-2; loop++) {
       pos[loop] = fix->compute_vector(2*loop);
       val[loop] = fix->compute_vector(2*loop+1);
     }
 
     // always ignore the first and last
 
     double binsize = 2.0;
     double min_energy=0.0;
     double max_energy=0.0;
     int header = static_cast<int> (size / binsize);
     advance = 0;
 
     for (int loop=1; loop <= header; loop++) {
         max_energy += val[loop];
     }
     for (int loop=count-2-2*header; loop <=count-3-header; loop++) {
       min_energy += val[loop];
     }
     max_energy /= header;
     min_energy /= header;
 
     double shockfront_min = 0.0;
     double shockfront_max = 0.0;
     double shockfront_loc = 0.0;
     int front_found1 = 0;
     for (int loop=count-3-header; loop > header; loop--) {
       if (front_found1 == 1) continue;
       if (val[loop] > min_energy + 0.1*(max_energy - min_energy)) {
         shockfront_max = pos[loop];
         front_found1=1;
       }
     }
     int front_found2 = 0;
     for (int loop=header+1; loop <=count-3-header; loop++) {
       if (val[loop] > min_energy + 0.6*(max_energy - min_energy)) {
         shockfront_min = pos[loop];
         front_found2=1;
       }
     }
     if      (front_found1 + front_found2 == 0) shockfront_loc = 0.0;
     else if (front_found1 + front_found2 == 1)
       shockfront_loc = shockfront_max + shockfront_min;
     else if (front_found1 == 1 && front_found2 == 1 &&
              shockfront_max-shockfront_min > spatlead/2.0)
       shockfront_loc = shockfront_max;
     else shockfront_loc = (shockfront_max + shockfront_min) / 2.0;
     if (comm->me == 0)
       printf("SHOCK: %g %g %g %g %g\n", shockfront_loc, shockfront_min,
              shockfront_max, domain->boxlo[2], domain->boxhi[2]);
 
     if (domain->boxhi[2] - shockfront_loc < spatlead) advance = 1;
 
     delete [] pos,val;
   }
 
   advance_sum = 0;
   MPI_Allreduce(&advance,&advance_sum,1,MPI_INT,MPI_SUM,world);
 
   if (advance_sum > 0) return 1;
   else return 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixAppendAtoms::post_force(int vflag)
 {
   double **f = atom->f;
   double **v = atom->v;
   double **x = atom->x;
   int  *type = atom->type;
   int nlocal = atom->nlocal;
 
   double gamma1,gamma2;
   double tsqrt = sqrt(t_target);
 
   if (atom->mass) {
     if (tempflag) {
       for (int i = 1; i <= atom->ntypes; i++) {
         gfactor1[i] = -atom->mass[i] / t_period / force->ftm2v;
         gfactor2[i] = sqrt(atom->mass[i]) *
           sqrt(24.0*force->boltz/t_period/update->dt/force->mvv2e) /
           force->ftm2v;
       }
     }
     for (int i = 0; i < nlocal; i++) {
       // SET TEMP AHEAD OF SHOCK
       if (tempflag && x[i][2] >= domain->boxhi[2] - t_extent ) {
         gamma1 = gfactor1[type[i]];
         gamma2 = gfactor2[type[i]] * tsqrt;
         f[i][0] += gamma1*v[i][0] + gamma2*(randomt->uniform()-0.5);
         f[i][1] += gamma1*v[i][1] + gamma2*(randomt->uniform()-0.5);
         f[i][2] += gamma1*v[i][2] + gamma2*(randomt->uniform()-0.5);
       }
       // FREEZE ATOMS AT BOUNDARY
       if (x[i][2] >= domain->boxhi[2] - size) {
         f[i][0] = 0.0;
         f[i][1] = 0.0;
         f[i][2] = 0.0;
         v[i][0] = 0.0;
         v[i][1] = 0.0;
         v[i][2] = 0.0;
       }
     }
   } else {
     double *rmass = atom->rmass;
     double boltz = force->boltz;
     double dt = update->dt;
     double mvv2e = force->mvv2e;
     double ftm2v = force->ftm2v;
 
     for (int i = 0; i < nlocal; i++) {
 
       // set temp ahead of shock
 
       if (tempflag && x[i][2] >= domain->boxhi[2] - t_extent ) {
         gamma1 = -rmass[i] / t_period / ftm2v;
         gamma2 = sqrt(rmass[i]) * sqrt(24.0*boltz/t_period/dt/mvv2e) / ftm2v;
         gamma2 *= tsqrt;
         f[i][0] += gamma1*v[i][0] + gamma2*(randomt->uniform()-0.5);
         f[i][1] += gamma1*v[i][1] + gamma2*(randomt->uniform()-0.5);
         f[i][2] += gamma1*v[i][2] + gamma2*(randomt->uniform()-0.5);
       }
 
       // freeze atoms at boundary
 
       if (x[i][2] >= domain->boxhi[2] - size) {
         f[i][0] = 0.0;
         f[i][1] = 0.0;
         f[i][2] = 0.0;
         v[i][0] = 0.0;
         v[i][1] = 0.0;
         v[i][2] = 0.0;
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixAppendAtoms::pre_exchange()
 {
   int ntimestep = update->ntimestep;
   int addnode = 0;
 
   if (ntimestep % freq == 0) {
     if (spatflag==1) if (get_spatial()==0) return;
     if (comm->myloc[2] == comm->procgrid[2]-1) {
       double bboxlo[3],bboxhi[3];
 
       bboxlo[0] = domain->sublo[0]; bboxhi[0] = domain->subhi[0];
       bboxlo[1] = domain->sublo[1]; bboxhi[1] = domain->subhi[1];
       bboxlo[2] = domain->subhi[2]; bboxhi[2] = domain->subhi[2]+size;
 
       double xmin,ymin,zmin,xmax,ymax,zmax;
       xmin = ymin = zmin = BIG;
       xmax = ymax = zmax = -BIG;
 
       domain->lattice->bbox(1,bboxlo[0],bboxlo[1],bboxlo[2],
                             xmin,ymin,zmin,xmax,ymax,zmax);
       domain->lattice->bbox(1,bboxhi[0],bboxlo[1],bboxlo[2],
                             xmin,ymin,zmin,xmax,ymax,zmax);
       domain->lattice->bbox(1,bboxlo[0],bboxhi[1],bboxlo[2],
                             xmin,ymin,zmin,xmax,ymax,zmax);
       domain->lattice->bbox(1,bboxhi[0],bboxhi[1],bboxlo[2],
                             xmin,ymin,zmin,xmax,ymax,zmax);
       domain->lattice->bbox(1,bboxlo[0],bboxlo[1],bboxhi[2],
                             xmin,ymin,zmin,xmax,ymax,zmax);
       domain->lattice->bbox(1,bboxhi[0],bboxlo[1],bboxhi[2],
                             xmin,ymin,zmin,xmax,ymax,zmax);
       domain->lattice->bbox(1,bboxlo[0],bboxhi[1],bboxhi[2],
                             xmin,ymin,zmin,xmax,ymax,zmax);
       domain->lattice->bbox(1,bboxhi[0],bboxhi[1],bboxhi[2],
                             xmin,ymin,zmin,xmax,ymax,zmax);
 
       int ilo,ihi,jlo,jhi,klo,khi;
       ilo = static_cast<int> (xmin);
       jlo = static_cast<int> (ymin);
       klo = static_cast<int> (zmin);
       ihi = static_cast<int> (xmax);
       jhi = static_cast<int> (ymax);
       khi = static_cast<int> (zmax);
 
       if (xmin < 0.0) ilo--;
       if (ymin < 0.0) jlo--;
       if (zmin < 0.0) klo--;
 
       double **basis = domain->lattice->basis;
       double x[3];
       double *sublo = domain->sublo;
       double *subhi = domain->subhi;
       double *mass = atom->mass;
 
       int i,j,k,m;
       for (k = klo; k <= khi; k++) {
         for (j = jlo; j <= jhi; j++) {
           for (i = ilo; i <= ihi; i++) {
             for (m = 0; m < nbasis; m++) {
               x[0] = i + basis[m][0];
               x[1] = j + basis[m][1];
               x[2] = k + basis[m][2];
 
               int flag = 0;
               // convert from lattice coords to box coords
               domain->lattice->lattice2box(x[0],x[1],x[2]);
 
               if (x[0] >= sublo[0] && x[0] < subhi[0] &&
                   x[1] >= sublo[1] && x[1] < subhi[1] &&
                   x[2] >= subhi[2] && x[2] < subhi[2]+size) flag = 1;
               else if (domain->dimension == 2 && x[1] >= domain->boxhi[1] &&
                        comm->myloc[1] == comm->procgrid[1]-1 &&
                        x[0] >= sublo[0] && x[0] < subhi[0]) flag = 1;
 
               if (flag) {
                 if (ranflag) {
                   x[0] += ranx * 2.0*(randomx->uniform()-0.5);
                   x[1] += rany * 2.0*(randomx->uniform()-0.5);
                   x[2] += ranz * 2.0*(randomx->uniform()-0.5);
                 }
                 addnode++;
                 atom->avec->create_atom(basistype[m],x);
               }
             }
           }
         }
       }
     }
     int addtotal = 0;
     MPI_Barrier(world);
     MPI_Allreduce(&addnode,&addtotal,1,MPI_INT,MPI_SUM,world);
 
     if (addtotal) {
       domain->reset_box();
       if (atom->tag_enable) {
         atom->tag_extend();
         atom->natoms += addtotal;
         if (atom->map_style) {
           atom->nghost = 0;
           atom->map_init();
           atom->map_set();
         }
       }
     }
   }
 }
diff --git a/src/SHOCK/fix_append_atoms.h b/src/SHOCK/fix_append_atoms.h
index 7f6670350..043f5d1c1 100644
--- a/src/SHOCK/fix_append_atoms.h
+++ b/src/SHOCK/fix_append_atoms.h
@@ -1,107 +1,103 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef FIX_CLASS
 
 FixStyle(append/atoms,FixAppendAtoms)
 
 #else
 
 #ifndef FIX_APPEND_ATOMS_H
 #define FIX_APPEND_ATOMS_H
 
 #include "fix.h"
 
 namespace LAMMPS_NS {
 
 class FixAppendAtoms : public Fix {
  public:
   FixAppendAtoms(class LAMMPS *, int, char **);
   ~FixAppendAtoms();
   int setmask();
   void setup(int);
   void pre_exchange();
   void initial_integrate(int);
   void post_force(int);
 
  private:
   int get_spatial();
   int spatflag, xloflag, xhiflag, yloflag, yhiflag, zloflag, zhiflag;
   int ranflag, tempflag, xseed, tseed;
   double ranx, rany, ranz, t_target, t_period, t_extent;
   class RanMars *randomx;
   class RanMars *randomt;
   int scaleflag, freq;
   int nbasis;
   int *basistype;
   int advance,advance_sum;
   double size,spatlead;
   char *spatialid;
   double tfactor;
   double *gfactor1,*gfactor2;
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Fix append/atoms requires a lattice be defined
 
-UNDOCUMENTED
+Use the lattice command for this purpose.
 
 E: Only zhi currently implemented for fix append/atoms
 
 Self-explanatory.
 
 E: Append boundary must be shrink/minimum
 
 The boundary style of the face where atoms are added
 must be of type m (shrink/minimum).
 
 E: Bad fix ID in fix append/atoms command
 
 The value of the fix_id for keyword spatial must start with the suffix
 f_.
 
-E: Must define lattice to append/atoms
-
-A lattice must be defined before using this fix.
-
 E: Cannot use append/atoms in periodic dimension
 
 The boundary style of the face where atoms are added can not be of
 type p (periodic).
 
 E: Cannot append atoms to a triclinic box
 
 The simulation box must be defined with edges alligned with the
 Cartesian axes.
 
 E: Use of fix append/atoms with undefined lattice
 
 A lattice must be defined before using this fix.
 
 E: Fix ID for fix ave/spatial does not exist
 
 Self-explanatory.
 
 */
diff --git a/src/SRD/fix_srd.h b/src/SRD/fix_srd.h
index 4498a4996..93faa4ba1 100644
--- a/src/SRD/fix_srd.h
+++ b/src/SRD/fix_srd.h
@@ -1,419 +1,415 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef FIX_CLASS
 
 FixStyle(srd,FixSRD)
 
 #else
 
 #ifndef LMP_FIX_SRD_H
 #define LMP_FIX_SRD_H
 
 #include "fix.h"
 
 namespace LAMMPS_NS {
 
 class FixSRD : public Fix {
  public:
   FixSRD(class LAMMPS *, int, char **);
   ~FixSRD();
   int setmask();
   void init();
   void setup(int);
   void pre_neighbor();
   void post_force(int);
   double compute_vector(int);
 
   double memory_usage();
   int pack_reverse_comm(int, int, double *);
   void unpack_reverse_comm(int, int *, double *);
 
  private:
   int me,nprocs;
   int bigexist,biggroup,biggroupbit;
   int collidestyle,lamdaflag,overlap,insideflag,exactflag,maxbounceallow;
   int cubicflag,shiftuser,shiftseed,shiftflag,tstat;
   int rescale_rotate,rescale_collide;
   double gridsrd,gridsearch,lamda,radfactor,cubictol;
   int triclinic,change_size,change_shape,deformflag;
 
   double dt_big,dt_srd;
   double mass_big,mass_srd;
   double temperature_srd;
   double sigma;
   double srd_per_cell;
   double dmax,vmax,vmaxsq;
   double maxbigdiam,minbigdiam;
   double dist_ghost,dist_srd,dist_srd_reneigh;   // explained in code
 
   int wallexist,nwall,wallvarflag;
   class FixWallSRD *wallfix;
   int *wallwhich;
   double *xwall,*xwallhold,*vwall;
   double **fwall;
   double walltrigger;
 
   class AtomVecEllipsoid *avec_ellipsoid;
   class AtomVecLine *avec_line;
   class AtomVecTri *avec_tri;
 
   // for orthogonal box, these are in box units
   // for triclinic box, these are in lamda units
 
   double srdlo[3],srdhi[3];                  // SRDs must stay inside
   double srdlo_reneigh[3],srdhi_reneigh[3];  // SRDs trigger a reneigh
 
   int dimension;
   int initflag,setupflag,reneighflag;
   class RanMars *random;
   class RanPark *randomshift;
 
   // stats
 
   int ncheck,ncollide,ninside,nrescale,reneighcount;
   int nbounce,bouncemaxnum,bouncemax;
   int stats_flag;
   int srd_bin_count;
   double srd_bin_temp;
   double stats[12],stats_all[12];
 
   double **flocal;       // local ptrs to atom force and torque
   double **tlocal;
 
   // info to store for each owned and ghost big particle and wall
 
   struct Big {
     int index;                 // local index of particle/wall
     int type;                  // SPHERE or ELLIPSOID or LINE or TRI or WALL
     double radius,radsq;       // radius of sphere
     double aradsqinv;          // 3 ellipsoid radii
     double bradsqinv;
     double cradsqinv;
     double length;             // length of line segment
     double normbody[3];        // normal of tri in body-frame
     double cutbinsq;           // add big to bin if within this distance
     double omega[3];           // current omega for sphere/ellipsoid/tri/line
     double ex[3],ey[3],ez[3];  // current orientation vecs for ellipsoid/tri
     double norm[3];            // current unit normal of tri in space-frame
     double theta;              // current orientation of line
   };
 
   Big *biglist;           // list of info for each owned & ghost big and wall
   int torqueflag;         // 1 if any big particle is torqued
 
   // current size of particle-based arrays
 
   int nbig;               // # of owned/ghost big particles and walls
   int maxbig;             // max number of owned/ghost big particles and walls
   int nmax;               // max number of SRD particles
 
   // bins for SRD velocity remap, shifting and communication
   // binsize and inv are in lamda units for triclinic
 
   int nbins1,nbin1x,nbin1y,nbin1z;
   double binsize1x,binsize1y,binsize1z;
   double bininv1x,bininv1y,bininv1z;
 
   struct BinAve {
     int owner;           // 1 if I am owner of this bin, 0 if not
     int n;               // # of SRD particles in bin
     double xctr[3];      // center point of bin, only used for triclinic
     double vsum[3];      // sum of v components for SRD particles in bin
     double random;       // random value if I am owner
   };
 
   struct BinComm {
     int nsend,nrecv;            // # of bins to send/recv
     int sendproc,recvproc;      // who to send/recv to/from
     int *sendlist,*recvlist;    // list of bins to send/recv
   };
 
   struct BinShift {
     int commflag;                 // 1 if this shift requires any comm
     int nbins,nbinx,nbiny,nbinz;  // extent of my bins
     int maxbinsq,maxvbin;
     int binlo[3],binhi[3];        // extent of my bins in global array
     double corner[3];             // lower,left corner to offset from
                                   // corner is in lamda units for triclinic
     BinAve *vbin;                 // my bins
     BinComm bcomm[6];             // bin communication pattern for overlaps
   };
   BinShift shifts[2];             // 0 = no shift, 1 = shift
 
   int maxbin1;
   int *binhead;          // 1st SRD particle in each bin
   int *binnext;          // next SRD particle in same bin
   int maxbuf;
   double *sbuf1,*sbuf2;  // buffers for send/recv of velocity bin data
   double *rbuf1,*rbuf2;
 
   // bins and stencil for collision searching for SRDs & big particles
 
   int nbins2,nbin2x,nbin2y,nbin2z;
   int maxbin2;
   double binsize2x,binsize2y,binsize2z;
   double bininv2x,bininv2y,bininv2z;
   double xblo2,yblo2,zblo2;
 
   int *nbinbig;          // # of big particles overlapping each bin
   int **binbig;          // indices of big particles overlapping each bin
   int *binsrd;           // which bin each SRD particle is in
   int nstencil;          // # of bins in stencil
   int maxstencil;        // max # of bins stencil array can hold
   int **stencil;         // list of 3d bin offsets a big particle can overlap
 
   // persistent data for line/tri collision calculations
 
   double tfraction,theta0,theta1;
   double xs0[3],xs1[3],xsc[3];
   double xb0[3],xb1[3],xbc[3];
   double nbc[3];
 
   // shared data for triangle collision calculations
 
   // private functions
 
   void reset_velocities();
   void vbin_comm(int);
   void vbin_pack(BinAve *, int, int *, double *);
   void vbin_unpack(double *, BinAve *, int, int *);
 
   void collisions_single();
   void collisions_multi();
 
   int inside_sphere(double *, double *, Big *);
   int inside_ellipsoid(double *, double *, Big *);
   int inside_line(double *, double *, double *, double *, Big *, double);
   int inside_tri(double *, double *, double *, double *, Big *, double);
   int inside_wall(double *, int);
 
   double collision_sphere_exact(double *, double *, double *, double *,
                                 Big *, double *, double *, double *);
   void collision_sphere_inexact(double *, double *,
                                 Big *, double *, double *, double *);
   double collision_ellipsoid_exact(double *, double *, double *, double *,
                                    Big *, double *, double *, double *);
   void collision_ellipsoid_inexact(double *, double *,
                                    Big *, double *, double *, double *);
   double collision_line_exact(double *, double *, double *, double *,
                               Big *, double, double *, double *, double *);
   double collision_tri_exact(double *, double *, double *, double *,
                              Big *, double, double *, double *, double *);
   double collision_wall_exact(double *, int, double *,
                               double *, double *, double *);
   void collision_wall_inexact(double *, int, double *, double *, double *);
 
   void slip(double *, double *, double *, Big *,
             double *, double *,  double *);
   void slip_wall(double *, int, double *, double *);
   void noslip(double *, double *, double *, Big *, int,
               double *, double *,  double *);
 
   void force_torque(double *, double *, double *,
                     double *, double *,  double *);
   void force_wall(double *, double *, int);
 
   int update_srd(int, double, double *, double *, double *, double *);
 
   void parameterize();
   void setup_bounds();
   void setup_velocity_bins();
   void setup_velocity_shift(int, int);
   void setup_search_bins();
   void setup_search_stencil();
   void big_static();
   void big_dynamic();
 
   double point_bin_distance(double *, int, int, int);
   double bin_bin_distance(int, int, int);
   void velocity_stats(int);
 
   double newton_raphson(double, double);
   void lineside(double, double &, double &);
   void triside(double, double &, double &);
 
   double distance(int, int);
   void print_collision(int, int, int, double, double,
                        double *, double *, double *, int);
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Could not find fix srd group ID
 
 Self-explanatory.
 
 E: Fix srd requires newton pair on
 
 Self-explanatory.
 
 E: Fix srd requires ghost atoms store velocity
 
 Use the communicate vel yes command to enable this.
 
 E: Fix SRD no-slip requires atom attribute torque
 
 This is because the SRD collisions will impart torque to the solute
 particles.
 
 E: Cannot change timestep once fix srd is setup
 
 This is because various SRD properties depend on the timestep
 size.
 
 E: Cannot use fix wall/srd more than once
 
 Nor is their a need to since multiple walls can be specified
 in one command.
 
 W: Fix SRD walls overlap but fix srd overlap not set
 
 You likely want to set this in your input script.
 
 E: Using fix srd with inconsistent fix deform remap option
 
 When shearing the box in an SRD simulation, the remap v option for fix
 deform needs to be used.
 
 W: Using fix srd with box deformation but no SRD thermostat
 
 The deformation will heat the SRD particles so this can
 be dangerous.
 
 W: Fix srd SRD moves may trigger frequent reneighboring
 
 This is because the SRD particles may move long distances.
 
 E: Fix SRD: bad search bin assignment
 
 Something has gone wrong in your SRD model; try using more
 conservative settings.
 
 E: Fix SRD: bad stencil bin for big particle
 
 Something has gone wrong in your SRD model; try using more
 conservative settings.
 
 E: Fix SRD: too many big particles in bin
 
 Reset the ATOMPERBIN parameter at the top of fix_srd.cpp
 to a larger value, and re-compile the code.
 
 E: Fix SRD: too many walls in bin
 
 This should not happen unless your system has been setup incorrectly.
 
 E: Fix SRD: bad bin assignment for SRD advection
 
 Something has gone wrong in your SRD model; try using more
 conservative settings.
 
 E: SRD particle %d started inside big particle %d on step %ld bounce %d
 
 See the inside keyword if you want this message to be an error vs
 warning.
 
 W: SRD particle %d started inside big particle %d on step %ld bounce %d
 
 See the inside keyword if you want this message to be an error vs
 warning.
 
-W: SRD particle %d started inside big particle %d on step %ld bounce %d
-
-UNDOCUMENTED
-
 E: Bad quadratic solve for particle/line collision
 
 This is an internal error.  It should nornally not occur.
 
 E: Bad quadratic solve for particle/tri collision
 
 This is an internal error.  It should nornally not occur.
 
 W: Fix srd particle moved outside valid domain
 
 This may indicate a problem with your simulation parameters.
 
 E: Big particle in fix srd cannot be point particle
 
 Big particles must be extended spheriods or ellipsoids.
 
 E: Cannot use lines with fix srd unless overlap is set
 
 This is because line segements are connected to each other.
 
 E: Cannot use tris with fix srd unless overlap is set
 
 This is because triangles are connected to each other.
 
 E: Fix srd requires SRD particles all have same mass
 
 Self-explanatory.
 
 E: Fewer SRD bins than processors in some dimension
 
 This is not allowed.  Make your SRD bin size smaller.
 
 E: SRD bins for fix srd are not cubic enough
 
 The bin shape is not within tolerance of cubic.  See the cubic
 keyword if you want this message to be an error vs warning.
 
 W: SRD bins for fix srd are not cubic enough
 
 The bin shape is not within tolerance of cubic.  See the cubic
 keyword if you want this message to be an error vs warning.
 
 E: SRD bin size for fix srd differs from user request
 
 Fix SRD had to adjust the bin size to fit the simulation box.  See the
 cubic keyword if you want this message to be an error vs warning.
 
 W: SRD bin size for fix srd differs from user request
 
 Fix SRD had to adjust the bin size to fit the simulation box.  See the
 cubic keyword if you want this message to be an error vs warning.
 
 E: Fix srd lamda must be >= 0.6 of SRD grid size
 
 This is a requirement for accuracy reasons.
 
 W: SRD bin shifting turned on due to small lamda
 
 This is done to try to preserve accuracy.
 
 W: Fix srd grid size > 1/4 of big particle diameter
 
 This may cause accuracy problems.
 
 W: Fix srd viscosity < 0.0 due to low SRD density
 
 This may cause accuracy problems.
 
 W: Fix srd particles may move > big particle diameter
 
 This may cause accuracy problems.
 
 */
diff --git a/src/atom_vec_body.cpp b/src/atom_vec_body.cpp
index 65711cf15..1809b21e1 100644
--- a/src/atom_vec_body.cpp
+++ b/src/atom_vec_body.cpp
@@ -1,1459 +1,1462 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdlib.h"
 #include "string.h"
 #include "atom_vec_body.h"
 #include "style_body.h"
 #include "body.h"
 #include "atom.h"
 #include "comm.h"
 #include "domain.h"
 #include "modify.h"
 #include "force.h"
 #include "fix.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define DELTA 10000
 #define DELTA_BONUS 10000
 
 /* ---------------------------------------------------------------------- */
 
 AtomVecBody::AtomVecBody(LAMMPS *lmp) : AtomVec(lmp)
 {
   molecular = 0;
 
   // size_forward and size_border set in settings(), via Body class
 
   comm_x_only = comm_f_only = 0;
   size_forward = 0;
   size_reverse = 6;
   size_border = 0;
   size_velocity = 6;
   size_data_atom = 7;
   size_data_vel = 7;
   xcol_data = 5;
 
   atom->body_flag = 1;
   atom->rmass_flag = 1;
   atom->angmom_flag = atom->torque_flag = 1;
 
   nlocal_bonus = nghost_bonus = nmax_bonus = 0;
   bonus = NULL;
 
   bptr = NULL;
 
   nargcopy = 0;
   argcopy = NULL;
   copyflag = 1;
 
   if (sizeof(double) == sizeof(int)) intdoubleratio = 1;
   else if (sizeof(double) == 2*sizeof(int)) intdoubleratio = 2;
   else error->all(FLERR,"Internal error in atom_style body");
 }
 
 /* ---------------------------------------------------------------------- */
 
 AtomVecBody::~AtomVecBody()
 {
   int nall = nlocal_bonus + nghost_bonus;
   for (int i = 0; i < nall; i++) {
-    memory->destroy(bonus[i].ivalue);
-    memory->destroy(bonus[i].dvalue);
+    icp->put(bonus[i].iindex);
+    dcp->put(bonus[i].dindex);
   }
   memory->sfree(bonus);
 
   delete bptr;
 
   for (int i = 0; i < nargcopy; i++) delete [] argcopy[i];
   delete [] argcopy;
 }
 
 /* ----------------------------------------------------------------------
    process additional args
    instantiate Body class
    set size_forward and size_border to max sizes
 ------------------------------------------------------------------------- */
 
 void AtomVecBody::settings(int narg, char **arg)
 {
   if (narg < 1) error->all(FLERR,"Invalid atom_style body command");
 
   if (0) bptr = NULL;
 
 #define BODY_CLASS
 #define BodyStyle(key,Class) \
   else if (strcmp(arg[0],#key) == 0) bptr = new Class(lmp,narg,arg);
 #include "style_body.h"
 #undef BodyStyle
 #undef BODY_CLASS
 
   else error->all(FLERR,"Invalid body style");
 
   bptr->avec = this;
+  icp = bptr->icp;
+  dcp = bptr->dcp;
 
   // max size of forward/border comm
   // 7,16 are packed in pack_comm/pack_border
   // bptr values = max number of additional ivalues/dvalues from Body class
 
   size_forward = 7 + bptr->size_forward;
   size_border = 16 + bptr->size_border;
 
   // make copy of args if called externally, so can write to restart file
   // make no copy of args if called from read_restart()
 
   if (copyflag) {
     nargcopy = narg;
     argcopy = new char*[nargcopy];
     for (int i = 0; i < nargcopy; i++) {
       int n = strlen(arg[i]) + 1;
       argcopy[i] = new char[n];
       strcpy(argcopy[i],arg[i]);
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    grow atom arrays
    n = 0 grows arrays by DELTA
    n > 0 allocates arrays to size n
 ------------------------------------------------------------------------- */
 
 void AtomVecBody::grow(int n)
 {
   if (n == 0) nmax += DELTA;
   else nmax = n;
   atom->nmax = nmax;
   if (nmax < 0 || nmax > MAXSMALLINT)
     error->one(FLERR,"Per-processor system is too big");
 
   tag = memory->grow(atom->tag,nmax,"atom:tag");
   type = memory->grow(atom->type,nmax,"atom:type");
   mask = memory->grow(atom->mask,nmax,"atom:mask");
   image = memory->grow(atom->image,nmax,"atom:image");
   x = memory->grow(atom->x,nmax,3,"atom:x");
   v = memory->grow(atom->v,nmax,3,"atom:v");
   f = memory->grow(atom->f,nmax*comm->nthreads,3,"atom:f");
 
   rmass = memory->grow(atom->rmass,nmax,"atom:rmass");
   angmom = memory->grow(atom->angmom,nmax,3,"atom:angmom");
   torque = memory->grow(atom->torque,nmax*comm->nthreads,3,"atom:torque");
   body = memory->grow(atom->body,nmax,"atom:body");
 
   if (atom->nextra_grow)
     for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
       modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax);
 }
 
 /* ----------------------------------------------------------------------
    reset local array ptrs
 ------------------------------------------------------------------------- */
 
 void AtomVecBody::grow_reset()
 {
   tag = atom->tag; type = atom->type;
   mask = atom->mask; image = atom->image;
   x = atom->x; v = atom->v; f = atom->f;
   rmass = atom->rmass; angmom = atom->angmom; torque = atom->torque;
   body = atom->body;
 }
 
 /* ----------------------------------------------------------------------
    grow bonus data structure
 ------------------------------------------------------------------------- */
 
 void AtomVecBody::grow_bonus()
 {
   nmax_bonus += DELTA_BONUS;
   if (nmax_bonus < 0 || nmax_bonus > MAXSMALLINT)
     error->one(FLERR,"Per-processor system is too big");
 
   bonus = (Bonus *) memory->srealloc(bonus,nmax_bonus*sizeof(Bonus),
                                      "atom:bonus");
 }
 
 /* ----------------------------------------------------------------------
    copy atom I info to atom J
    if delflag and atom J has bonus data, then delete it
 ------------------------------------------------------------------------- */
 
 void AtomVecBody::copy(int i, int j, int delflag)
 {
   tag[j] = tag[i];
   type[j] = type[i];
   mask[j] = mask[i];
   image[j] = image[i];
   x[j][0] = x[i][0];
   x[j][1] = x[i][1];
   x[j][2] = x[i][2];
   v[j][0] = v[i][0];
   v[j][1] = v[i][1];
   v[j][2] = v[i][2];
 
   rmass[j] = rmass[i];
   angmom[j][0] = angmom[i][0];
   angmom[j][1] = angmom[i][1];
   angmom[j][2] = angmom[i][2];
 
   // if deleting atom J via delflag and J has bonus data, then delete it
 
   if (delflag && body[j] >= 0) {
-    memory->destroy(bonus[body[j]].ivalue);
-    memory->destroy(bonus[body[j]].dvalue);
+    icp->put(bonus[body[j]].iindex);
+    dcp->put(bonus[body[j]].dindex);
     copy_bonus(nlocal_bonus-1,body[j]);
     nlocal_bonus--;
   }
 
   // if atom I has bonus data, reset I's bonus.ilocal to loc J
   // do NOT do this if self-copy (I=J) since I's bonus data is already deleted
 
   if (body[i] >= 0 && i != j) bonus[body[i]].ilocal = j;
   body[j] = body[i];
 
   if (atom->nextra_grow)
     for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
       modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag);
 }
 
 /* ----------------------------------------------------------------------
    copy bonus data from I to J, effectively deleting the J entry
    also reset body that points to I to now point to J
 ------------------------------------------------------------------------- */
 
 void AtomVecBody::copy_bonus(int i, int j)
 {
   body[bonus[i].ilocal] = j;
   memcpy(&bonus[j],&bonus[i],sizeof(Bonus));
 }
 
 /* ----------------------------------------------------------------------
    clear ghost info in bonus data
    called before ghosts are recommunicated in comm and irregular
 ------------------------------------------------------------------------- */
 
 void AtomVecBody::clear_bonus()
 {
   int nall = nlocal_bonus + nghost_bonus;
   for (int i = nlocal_bonus; i < nall; i++) {
-    memory->destroy(bonus[i].ivalue);
-    memory->destroy(bonus[i].dvalue);
+    icp->put(bonus[i].iindex);
+    dcp->put(bonus[i].dindex);
   }
   nghost_bonus = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecBody::pack_comm(int n, int *list, double *buf,
                           int pbc_flag, int *pbc)
 {
   int i,j,m;
   double dx,dy,dz;
   double *quat;
 
   m = 0;
   if (pbc_flag == 0) {
     for (i = 0; i < n; i++) {
       j = list[i];
       buf[m++] = x[j][0];
       buf[m++] = x[j][1];
       buf[m++] = x[j][2];
       if (body[j] >= 0) {
         quat = bonus[body[j]].quat;
         buf[m++] = quat[0];
         buf[m++] = quat[1];
         buf[m++] = quat[2];
         buf[m++] = quat[3];
         m += bptr->pack_comm_body(&bonus[body[j]],&buf[m]);
       }
     }
   } else {
     if (domain->triclinic == 0) {
       dx = pbc[0]*domain->xprd;
       dy = pbc[1]*domain->yprd;
       dz = pbc[2]*domain->zprd;
     } else {
       dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
       dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
       dz = pbc[2]*domain->zprd;
     }
     for (i = 0; i < n; i++) {
       j = list[i];
       buf[m++] = x[j][0] + dx;
       buf[m++] = x[j][1] + dy;
       buf[m++] = x[j][2] + dz;
       if (body[j] >= 0) {
         quat = bonus[body[j]].quat;
         buf[m++] = quat[0];
         buf[m++] = quat[1];
         buf[m++] = quat[2];
         buf[m++] = quat[3];
         m += bptr->pack_comm_body(&bonus[body[j]],&buf[m]);
       }
     }
   }
 
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecBody::pack_comm_vel(int n, int *list, double *buf,
                               int pbc_flag, int *pbc)
 {
   int i,j,m;
   double dx,dy,dz,dvx,dvy,dvz;
   double *quat;
 
   m = 0;
   if (pbc_flag == 0) {
     for (i = 0; i < n; i++) {
       j = list[i];
       buf[m++] = x[j][0];
       buf[m++] = x[j][1];
       buf[m++] = x[j][2];
       if (body[j] >= 0) {
         quat = bonus[body[j]].quat;
         buf[m++] = quat[0];
         buf[m++] = quat[1];
         buf[m++] = quat[2];
         buf[m++] = quat[3];
         m += bptr->pack_comm_body(&bonus[body[j]],&buf[m]);
       }
       buf[m++] = v[j][0];
       buf[m++] = v[j][1];
       buf[m++] = v[j][2];
       buf[m++] = angmom[j][0];
       buf[m++] = angmom[j][1];
       buf[m++] = angmom[j][2];
     }
   } else {
     if (domain->triclinic == 0) {
       dx = pbc[0]*domain->xprd;
       dy = pbc[1]*domain->yprd;
       dz = pbc[2]*domain->zprd;
     } else {
       dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
       dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
       dz = pbc[2]*domain->zprd;
     }
     if (!deform_vremap) {
       for (i = 0; i < n; i++) {
         j = list[i];
         buf[m++] = x[j][0] + dx;
         buf[m++] = x[j][1] + dy;
         buf[m++] = x[j][2] + dz;
         if (body[j] >= 0) {
           quat = bonus[body[j]].quat;
           buf[m++] = quat[0];
           buf[m++] = quat[1];
           buf[m++] = quat[2];
           buf[m++] = quat[3];
           m += bptr->pack_comm_body(&bonus[body[j]],&buf[m]);
         }
         buf[m++] = v[j][0];
         buf[m++] = v[j][1];
         buf[m++] = v[j][2];
         buf[m++] = angmom[j][0];
         buf[m++] = angmom[j][1];
         buf[m++] = angmom[j][2];
       }
     } else {
       dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
       dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
       dvz = pbc[2]*h_rate[2];
       for (i = 0; i < n; i++) {
         j = list[i];
         buf[m++] = x[j][0] + dx;
         buf[m++] = x[j][1] + dy;
         buf[m++] = x[j][2] + dz;
         if (body[j] >= 0) {
           quat = bonus[body[j]].quat;
           buf[m++] = quat[0];
           buf[m++] = quat[1];
           buf[m++] = quat[2];
           buf[m++] = quat[3];
           m += bptr->pack_comm_body(&bonus[body[j]],&buf[m]);
         }
         if (mask[i] & deform_groupbit) {
           buf[m++] = v[j][0] + dvx;
           buf[m++] = v[j][1] + dvy;
           buf[m++] = v[j][2] + dvz;
         } else {
           buf[m++] = v[j][0];
           buf[m++] = v[j][1];
           buf[m++] = v[j][2];
         }
         buf[m++] = angmom[j][0];
         buf[m++] = angmom[j][1];
         buf[m++] = angmom[j][2];
       }
     }
   }
 
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecBody::pack_comm_hybrid(int n, int *list, double *buf)
 {
   int i,j,m;
   double *quat;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     if (body[j] >= 0) {
       quat = bonus[body[j]].quat;
       buf[m++] = quat[0];
       buf[m++] = quat[1];
       buf[m++] = quat[2];
       buf[m++] = quat[3];
       m += bptr->pack_comm_body(&bonus[body[j]],&buf[m]);
     }
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AtomVecBody::unpack_comm(int n, int first, double *buf)
 {
   int i,m,last;
   double *quat;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     x[i][0] = buf[m++];
     x[i][1] = buf[m++];
     x[i][2] = buf[m++];
     if (body[i] >= 0) {
       quat = bonus[body[i]].quat;
       quat[0] = buf[m++];
       quat[1] = buf[m++];
       quat[2] = buf[m++];
       quat[3] = buf[m++];
       m += bptr->unpack_comm_body(&bonus[body[i]],&buf[m]);
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AtomVecBody::unpack_comm_vel(int n, int first, double *buf)
 {
   int i,m,last;
   double *quat;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     x[i][0] = buf[m++];
     x[i][1] = buf[m++];
     x[i][2] = buf[m++];
     if (body[i] >= 0) {
       quat = bonus[body[i]].quat;
       quat[0] = buf[m++];
       quat[1] = buf[m++];
       quat[2] = buf[m++];
       quat[3] = buf[m++];
       m += bptr->unpack_comm_body(&bonus[body[i]],&buf[m]);
     }
     v[i][0] = buf[m++];
     v[i][1] = buf[m++];
     v[i][2] = buf[m++];
     angmom[i][0] = buf[m++];
     angmom[i][1] = buf[m++];
     angmom[i][2] = buf[m++];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecBody::unpack_comm_hybrid(int n, int first, double *buf)
 {
   int i,m,last;
   double *quat;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++)
     if (body[i] >= 0) {
       quat = bonus[body[i]].quat;
       quat[0] = buf[m++];
       quat[1] = buf[m++];
       quat[2] = buf[m++];
       quat[3] = buf[m++];
       m += bptr->unpack_comm_body(&bonus[body[i]],&buf[m]);
     }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecBody::pack_reverse(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     buf[m++] = f[i][0];
     buf[m++] = f[i][1];
     buf[m++] = f[i][2];
     buf[m++] = torque[i][0];
     buf[m++] = torque[i][1];
     buf[m++] = torque[i][2];
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecBody::pack_reverse_hybrid(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     buf[m++] = torque[i][0];
     buf[m++] = torque[i][1];
     buf[m++] = torque[i][2];
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AtomVecBody::unpack_reverse(int n, int *list, double *buf)
 {
   int i,j,m;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     f[j][0] += buf[m++];
     f[j][1] += buf[m++];
     f[j][2] += buf[m++];
     torque[j][0] += buf[m++];
     torque[j][1] += buf[m++];
     torque[j][2] += buf[m++];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecBody::unpack_reverse_hybrid(int n, int *list, double *buf)
 {
   int i,j,m;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     torque[j][0] += buf[m++];
     torque[j][1] += buf[m++];
     torque[j][2] += buf[m++];
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecBody::pack_border(int n, int *list, double *buf,
                             int pbc_flag, int *pbc)
 {
   int i,j,m;
   double dx,dy,dz;
   double *quat,*c1,*c2,*c3,*inertia;
 
   m = 0;
   if (pbc_flag == 0) {
     for (i = 0; i < n; i++) {
       j = list[i];
       buf[m++] = x[j][0];
       buf[m++] = x[j][1];
       buf[m++] = x[j][2];
       buf[m++] = tag[j];
       buf[m++] = type[j];
       buf[m++] = mask[j];
       if (body[j] < 0) buf[m++] = 0;
       else {
         buf[m++] = 1;
         quat = bonus[body[j]].quat;
         inertia = bonus[body[j]].inertia;
         buf[m++] = quat[0];
         buf[m++] = quat[1];
         buf[m++] = quat[2];
         buf[m++] = quat[3];
         buf[m++] = inertia[0];
         buf[m++] = inertia[1];
         buf[m++] = inertia[2];
         buf[m++] = bonus[body[j]].ninteger;
         buf[m++] = bonus[body[j]].ndouble;
         m += bptr->pack_border_body(&bonus[body[j]],&buf[m]);
       }
     }
   } else {
     if (domain->triclinic == 0) {
       dx = pbc[0]*domain->xprd;
       dy = pbc[1]*domain->yprd;
       dz = pbc[2]*domain->zprd;
     } else {
       dx = pbc[0];
       dy = pbc[1];
       dz = pbc[2];
     }
     for (i = 0; i < n; i++) {
       j = list[i];
       buf[m++] = x[j][0] + dx;
       buf[m++] = x[j][1] + dy;
       buf[m++] = x[j][2] + dz;
       buf[m++] = tag[j];
       buf[m++] = type[j];
       buf[m++] = mask[j];
       if (body[j] < 0) buf[m++] = 0;
       else {
         buf[m++] = 1;
         quat = bonus[body[j]].quat;
         inertia = bonus[body[j]].inertia;
         buf[m++] = quat[0];
         buf[m++] = quat[1];
         buf[m++] = quat[2];
         buf[m++] = quat[3];
         buf[m++] = inertia[0];
         buf[m++] = inertia[1];
         buf[m++] = inertia[2];
         buf[m++] = bonus[body[j]].ninteger;
         buf[m++] = bonus[body[j]].ndouble;
         m += bptr->pack_border_body(&bonus[body[j]],&buf[m]);
       }
     }
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecBody::pack_border_vel(int n, int *list, double *buf,
                                 int pbc_flag, int *pbc)
 {
   int i,j,m;
   double dx,dy,dz,dvx,dvy,dvz;
   double *quat,*c1,*c2,*c3,*inertia;
 
   m = 0;
   if (pbc_flag == 0) {
     for (i = 0; i < n; i++) {
       j = list[i];
       buf[m++] = x[j][0];
       buf[m++] = x[j][1];
       buf[m++] = x[j][2];
       buf[m++] = tag[j];
       buf[m++] = type[j];
       buf[m++] = mask[j];
       if (body[j] < 0) buf[m++] = 0;
       else {
         buf[m++] = 1;
         quat = bonus[body[j]].quat;
         inertia = bonus[body[j]].inertia;
         buf[m++] = quat[0];
         buf[m++] = quat[1];
         buf[m++] = quat[2];
         buf[m++] = quat[3];
         buf[m++] = inertia[0];
         buf[m++] = inertia[1];
         buf[m++] = inertia[2];
         buf[m++] = bonus[body[j]].ninteger;
         buf[m++] = bonus[body[j]].ndouble;
         m += bptr->pack_border_body(&bonus[body[j]],&buf[m]);
       }
       buf[m++] = v[j][0];
       buf[m++] = v[j][1];
       buf[m++] = v[j][2];
       buf[m++] = angmom[j][0];
       buf[m++] = angmom[j][1];
       buf[m++] = angmom[j][2];
     }
   } else {
     if (domain->triclinic == 0) {
       dx = pbc[0]*domain->xprd;
       dy = pbc[1]*domain->yprd;
       dz = pbc[2]*domain->zprd;
     } else {
       dx = pbc[0];
       dy = pbc[1];
       dz = pbc[2];
     }
     if (!deform_vremap) {
       for (i = 0; i < n; i++) {
         j = list[i];
         buf[m++] = x[j][0] + dx;
         buf[m++] = x[j][1] + dy;
         buf[m++] = x[j][2] + dz;
         buf[m++] = tag[j];
         buf[m++] = type[j];
         buf[m++] = mask[j];
         if (body[j] < 0) buf[m++] = 0;
         else {
           buf[m++] = 1;
           quat = bonus[body[j]].quat;
           inertia = bonus[body[j]].inertia;
           buf[m++] = quat[0];
           buf[m++] = quat[1];
           buf[m++] = quat[2];
           buf[m++] = quat[3];
           buf[m++] = inertia[0];
           buf[m++] = inertia[1];
           buf[m++] = inertia[2];
           buf[m++] = bonus[body[j]].ninteger;
           buf[m++] = bonus[body[j]].ndouble;
           m += bptr->pack_border_body(&bonus[body[j]],&buf[m]);
         }
         buf[m++] = v[j][0];
         buf[m++] = v[j][1];
         buf[m++] = v[j][2];
         buf[m++] = angmom[j][0];
         buf[m++] = angmom[j][1];
         buf[m++] = angmom[j][2];
       }
     } else {
       dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
       dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
       dvz = pbc[2]*h_rate[2];
       for (i = 0; i < n; i++) {
         j = list[i];
         buf[m++] = x[j][0] + dx;
         buf[m++] = x[j][1] + dy;
         buf[m++] = x[j][2] + dz;
         buf[m++] = tag[j];
         buf[m++] = type[j];
         buf[m++] = mask[j];
         if (body[j] < 0) buf[m++] = 0;
         else {
           buf[m++] = 1;
           quat = bonus[body[j]].quat;
           inertia = bonus[body[j]].inertia;
           buf[m++] = quat[0];
           buf[m++] = quat[1];
           buf[m++] = quat[2];
           buf[m++] = quat[3];
           buf[m++] = inertia[0];
           buf[m++] = inertia[1];
           buf[m++] = inertia[2];
           buf[m++] = bonus[body[j]].ninteger;
           buf[m++] = bonus[body[j]].ndouble;
           m += bptr->pack_border_body(&bonus[body[j]],&buf[m]);
         }
         if (mask[i] & deform_groupbit) {
           buf[m++] = v[j][0] + dvx;
           buf[m++] = v[j][1] + dvy;
           buf[m++] = v[j][2] + dvz;
         } else {
           buf[m++] = v[j][0];
           buf[m++] = v[j][1];
           buf[m++] = v[j][2];
         }
         buf[m++] = angmom[j][0];
         buf[m++] = angmom[j][1];
         buf[m++] = angmom[j][2];
       }
     }
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecBody::pack_border_hybrid(int n, int *list, double *buf)
 {
   int i,j,m;
   double *quat,*c1,*c2,*c3,*inertia;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     if (body[j] < 0) buf[m++] = 0;
     else {
       buf[m++] = 1;
       quat = bonus[body[j]].quat;
       inertia = bonus[body[j]].inertia;
       buf[m++] = quat[0];
       buf[m++] = quat[1];
       buf[m++] = quat[2];
       buf[m++] = quat[3];
       buf[m++] = inertia[0];
       buf[m++] = inertia[1];
       buf[m++] = inertia[2];
       buf[m++] = bonus[body[j]].ninteger;
       buf[m++] = bonus[body[j]].ndouble;
       m += bptr->pack_border_body(&bonus[body[j]],&buf[m]);
     }
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AtomVecBody::unpack_border(int n, int first, double *buf)
 {
   int i,j,m,last;
   double *quat,*c1,*c2,*c3,*inertia;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     if (i == nmax) grow(0);
     x[i][0] = buf[m++];
     x[i][1] = buf[m++];
     x[i][2] = buf[m++];
     tag[i] = static_cast<int> (buf[m++]);
     type[i] = static_cast<int> (buf[m++]);
     mask[i] = static_cast<int> (buf[m++]);
     body[i] = static_cast<int> (buf[m++]);
     if (body[i] == 0) body[i] = -1;
     else {
       j = nlocal_bonus + nghost_bonus;
       if (j == nmax_bonus) grow_bonus();
       quat = bonus[j].quat;
       inertia = bonus[j].inertia;
       quat[0] = buf[m++];
       quat[1] = buf[m++];
       quat[2] = buf[m++];
       quat[3] = buf[m++];
       inertia[0] = buf[m++];
       inertia[1] = buf[m++];
       inertia[2] = buf[m++];
       bonus[j].ninteger = static_cast<int> (buf[m++]);
       bonus[j].ndouble = static_cast<int> (buf[m++]);
-      memory->create(bonus[j].ivalue,bonus[j].ninteger,"body:ivalue");
-      memory->create(bonus[j].dvalue,bonus[j].ndouble,"body:dvalue");
+      bonus[j].ivalue = icp->get(bonus[j].ninteger,bonus[j].iindex);
+      bonus[j].dvalue = dcp->get(bonus[j].ndouble,bonus[j].dindex);
       m += bptr->unpack_border_body(&bonus[j],&buf[m]);
       bonus[j].ilocal = i;
       body[i] = j;
       nghost_bonus++;
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AtomVecBody::unpack_border_vel(int n, int first, double *buf)
 {
   int i,j,m,last;
   double *quat,*c1,*c2,*c3,*inertia;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     if (i == nmax) grow(0);
     x[i][0] = buf[m++];
     x[i][1] = buf[m++];
     x[i][2] = buf[m++];
     tag[i] = static_cast<int> (buf[m++]);
     type[i] = static_cast<int> (buf[m++]);
     mask[i] = static_cast<int> (buf[m++]);
     body[i] = static_cast<int> (buf[m++]);
     if (body[i] == 0) body[i] = -1;
     else {
       j = nlocal_bonus + nghost_bonus;
       if (j == nmax_bonus) grow_bonus();
       quat = bonus[j].quat;
       inertia = bonus[j].inertia;
       quat[0] = buf[m++];
       quat[1] = buf[m++];
       quat[2] = buf[m++];
       quat[3] = buf[m++];
       inertia[0] = buf[m++];
       inertia[1] = buf[m++];
       inertia[2] = buf[m++];
       bonus[j].ninteger = static_cast<int> (buf[m++]);
       bonus[j].ndouble = static_cast<int> (buf[m++]);
-      memory->create(bonus[j].ivalue,bonus[j].ninteger,"body:ivalue");
-      memory->create(bonus[j].dvalue,bonus[j].ndouble,"body:dvalue");
+      bonus[j].ivalue = icp->get(bonus[j].ninteger,bonus[j].iindex);
+      bonus[j].dvalue = dcp->get(bonus[j].ndouble,bonus[j].dindex);
       m += bptr->unpack_border_body(&bonus[j],&buf[m]);
       bonus[j].ilocal = i;
       body[i] = j;
       nghost_bonus++;
     }
     v[i][0] = buf[m++];
     v[i][1] = buf[m++];
     v[i][2] = buf[m++];
     angmom[i][0] = buf[m++];
     angmom[i][1] = buf[m++];
     angmom[i][2] = buf[m++];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecBody::unpack_border_hybrid(int n, int first, double *buf)
 {
   int i,j,m,last;
   double *quat,*c1,*c2,*c3,*inertia;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     body[i] = static_cast<int> (buf[m++]);
     if (body[i] == 0) body[i] = -1;
     else {
       j = nlocal_bonus + nghost_bonus;
       if (j == nmax_bonus) grow_bonus();
       quat = bonus[j].quat;
       inertia = bonus[j].inertia;
       quat[0] = buf[m++];
       quat[1] = buf[m++];
       quat[2] = buf[m++];
       quat[3] = buf[m++];
       inertia[0] = buf[m++];
       inertia[1] = buf[m++];
       inertia[2] = buf[m++];
       bonus[j].ninteger = static_cast<int> (buf[m++]);
       bonus[j].ndouble = static_cast<int> (buf[m++]);
-      memory->create(bonus[j].ivalue,bonus[j].ninteger,"body:ivalue");
-      memory->create(bonus[j].dvalue,bonus[j].ndouble,"body:dvalue");
+      bonus[j].ivalue = icp->get(bonus[j].ninteger,bonus[j].iindex);
+      bonus[j].dvalue = dcp->get(bonus[j].ndouble,bonus[j].dindex);
       m += bptr->unpack_border_body(&bonus[j],&buf[m]);
       bonus[j].ilocal = i;
       body[i] = j;
       nghost_bonus++;
     }
   }
   return m;
 }
 
 /* ----------------------------------------------------------------------
    pack data for atom I for sending to another proc
    xyz must be 1st 3 values, so comm::exchange() can test on them
 ------------------------------------------------------------------------- */
 
 int AtomVecBody::pack_exchange(int i, double *buf)
 {
   int m = 1;
   buf[m++] = x[i][0];
   buf[m++] = x[i][1];
   buf[m++] = x[i][2];
   buf[m++] = v[i][0];
   buf[m++] = v[i][1];
   buf[m++] = v[i][2];
   buf[m++] = tag[i];
   buf[m++] = type[i];
   buf[m++] = mask[i];
   *((tagint *) &buf[m++]) = image[i];
 
   buf[m++] = rmass[i];
   buf[m++] = angmom[i][0];
   buf[m++] = angmom[i][1];
   buf[m++] = angmom[i][2];
 
   if (body[i] < 0) buf[m++] = 0;
   else {
     buf[m++] = 1;
     int j = body[i];
     double *quat = bonus[j].quat;
     double *inertia = bonus[j].inertia;
     buf[m++] = quat[0];
     buf[m++] = quat[1];
     buf[m++] = quat[2];
     buf[m++] = quat[3];
     buf[m++] = inertia[0];
     buf[m++] = inertia[1];
     buf[m++] = inertia[2];
     buf[m++] = bonus[j].ninteger;
     buf[m++] = bonus[j].ndouble;
     memcpy(&buf[m],bonus[j].ivalue,bonus[j].ninteger*sizeof(int));
     if (intdoubleratio == 1) m += bonus[j].ninteger;
     else m += (bonus[j].ninteger+1)/2;
     memcpy(&buf[m],bonus[j].dvalue,bonus[j].ndouble*sizeof(double));
     m += bonus[j].ndouble;
   }
 
   if (atom->nextra_grow)
     for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
       m += modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf[m]);
 
   buf[0] = m;
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecBody::unpack_exchange(double *buf)
 {
   int nlocal = atom->nlocal;
   if (nlocal == nmax) grow(0);
 
   int m = 1;
   x[nlocal][0] = buf[m++];
   x[nlocal][1] = buf[m++];
   x[nlocal][2] = buf[m++];
   v[nlocal][0] = buf[m++];
   v[nlocal][1] = buf[m++];
   v[nlocal][2] = buf[m++];
   tag[nlocal] = static_cast<int> (buf[m++]);
   type[nlocal] = static_cast<int> (buf[m++]);
   mask[nlocal] = static_cast<int> (buf[m++]);
   image[nlocal] = *((tagint *) &buf[m++]);
 
   rmass[nlocal] = buf[m++];
   angmom[nlocal][0] = buf[m++];
   angmom[nlocal][1] = buf[m++];
   angmom[nlocal][2] = buf[m++];
 
   body[nlocal] = static_cast<int> (buf[m++]);
   if (body[nlocal] == 0) body[nlocal] = -1;
   else {
     if (nlocal_bonus == nmax_bonus) grow_bonus();
     double *quat = bonus[nlocal_bonus].quat;
     double *inertia = bonus[nlocal_bonus].inertia;
     quat[0] = buf[m++];
     quat[1] = buf[m++];
     quat[2] = buf[m++];
     quat[3] = buf[m++];
     inertia[0] = buf[m++];
     inertia[1] = buf[m++];
     inertia[2] = buf[m++];
     bonus[nlocal_bonus].ninteger = static_cast<int> (buf[m++]);
     bonus[nlocal_bonus].ndouble = static_cast<int> (buf[m++]);
-    memory->create(bonus[nlocal_bonus].ivalue,bonus[nlocal_bonus].ninteger,
-                   "body:ivalue");
-    memory->create(bonus[nlocal_bonus].dvalue,bonus[nlocal_bonus].ndouble,
-                   "body:dvalue");
+    bonus[nlocal_bonus].ivalue = icp->get(bonus[nlocal_bonus].ninteger,
+					  bonus[nlocal_bonus].iindex);
+    bonus[nlocal_bonus].dvalue = dcp->get(bonus[nlocal_bonus].ndouble,
+					  bonus[nlocal_bonus].dindex);
     memcpy(bonus[nlocal_bonus].ivalue,&buf[m],
            bonus[nlocal_bonus].ninteger*sizeof(int));
     if (intdoubleratio == 1) m += bonus[nlocal_bonus].ninteger;
     else m += (bonus[nlocal_bonus].ninteger+1)/2;
     memcpy(bonus[nlocal_bonus].dvalue,&buf[m],
            bonus[nlocal_bonus].ndouble*sizeof(double));
     m += bonus[nlocal_bonus].ndouble;
 
     bonus[nlocal_bonus].ilocal = nlocal;
     body[nlocal] = nlocal_bonus++;
   }
 
   if (atom->nextra_grow)
     for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
       m += modify->fix[atom->extra_grow[iextra]]->
         unpack_exchange(nlocal,&buf[m]);
 
   atom->nlocal++;
   return m;
 }
 
 /* ----------------------------------------------------------------------
    size of restart data for all atoms owned by this proc
    include extra data stored by fixes
 ------------------------------------------------------------------------- */
 
 int AtomVecBody::size_restart()
 {
   int i;
 
   int n = 0;
   int nlocal = atom->nlocal;
   for (i = 0; i < nlocal; i++)
     if (body[i] >= 0) {
       n += 25;
       if (intdoubleratio == 1) n += bonus[body[i]].ninteger;
       else n += (bonus[body[i]].ninteger+1)/2;
       n += bonus[body[i]].ndouble;
     } else n += 16;
 
   if (atom->nextra_restart)
     for (int iextra = 0; iextra < atom->nextra_restart; iextra++)
       for (i = 0; i < nlocal; i++)
         n += modify->fix[atom->extra_restart[iextra]]->size_restart(i);
 
   return n;
 }
 
 /* ----------------------------------------------------------------------
    pack atom I's data for restart file including extra quantities
    xyz must be 1st 3 values, so that read_restart can test on them
    molecular types may be negative, but write as positive
 ------------------------------------------------------------------------- */
 
 int AtomVecBody::pack_restart(int i, double *buf)
 {
   int m = 1;
   buf[m++] = x[i][0];
   buf[m++] = x[i][1];
   buf[m++] = x[i][2];
   buf[m++] = tag[i];
   buf[m++] = type[i];
   buf[m++] = mask[i];
   *((tagint *) &buf[m++]) = image[i];
   buf[m++] = v[i][0];
   buf[m++] = v[i][1];
   buf[m++] = v[i][2];
 
   buf[m++] = rmass[i];
   buf[m++] = angmom[i][0];
   buf[m++] = angmom[i][1];
   buf[m++] = angmom[i][2];
 
   if (body[i] < 0) buf[m++] = 0;
   else {
     buf[m++] = 1;
     int j = body[i];
     double *quat = bonus[j].quat;
     double *inertia = bonus[j].inertia;
     buf[m++] = quat[0];
     buf[m++] = quat[1];
     buf[m++] = quat[2];
     buf[m++] = quat[3];
     buf[m++] = inertia[0];
     buf[m++] = inertia[1];
     buf[m++] = inertia[2];
     buf[m++] = bonus[j].ninteger;
     buf[m++] = bonus[j].ndouble;
     memcpy(&buf[m],bonus[j].ivalue,bonus[j].ninteger*sizeof(int));
     if (intdoubleratio == 1) m += bonus[j].ninteger;
     else m += (bonus[j].ninteger+1)/2;
     memcpy(&buf[m],bonus[j].dvalue,bonus[j].ndouble*sizeof(double));
     m += bonus[j].ndouble;
   }
 
   if (atom->nextra_restart)
     for (int iextra = 0; iextra < atom->nextra_restart; iextra++)
       m += modify->fix[atom->extra_restart[iextra]]->pack_restart(i,&buf[m]);
 
   buf[0] = m;
   return m;
 }
 
 /* ----------------------------------------------------------------------
    unpack data for one atom from restart file including extra quantities
 ------------------------------------------------------------------------- */
 
 int AtomVecBody::unpack_restart(double *buf)
 {
   int nlocal = atom->nlocal;
   if (nlocal == nmax) {
     grow(0);
     if (atom->nextra_store)
       memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
   }
 
   int m = 1;
   x[nlocal][0] = buf[m++];
   x[nlocal][1] = buf[m++];
   x[nlocal][2] = buf[m++];
   tag[nlocal] = static_cast<int> (buf[m++]);
   type[nlocal] = static_cast<int> (buf[m++]);
   mask[nlocal] = static_cast<int> (buf[m++]);
   image[nlocal] = *((tagint *) &buf[m++]);
   v[nlocal][0] = buf[m++];
   v[nlocal][1] = buf[m++];
   v[nlocal][2] = buf[m++];
 
   rmass[nlocal] = buf[m++];
   angmom[nlocal][0] = buf[m++];
   angmom[nlocal][1] = buf[m++];
   angmom[nlocal][2] = buf[m++];
 
   body[nlocal] = static_cast<int> (buf[m++]);
   if (body[nlocal] == 0) body[nlocal] = -1;
   else {
     if (nlocal_bonus == nmax_bonus) grow_bonus();
     double *quat = bonus[nlocal_bonus].quat;
     double *inertia = bonus[nlocal_bonus].inertia;
     quat[0] = buf[m++];
     quat[1] = buf[m++];
     quat[2] = buf[m++];
     quat[3] = buf[m++];
     inertia[0] = buf[m++];
     inertia[1] = buf[m++];
     inertia[2] = buf[m++];
     bonus[nlocal_bonus].ninteger = static_cast<int> (buf[m++]);
     bonus[nlocal_bonus].ndouble = static_cast<int> (buf[m++]);
-    memory->create(bonus[nlocal_bonus].ivalue,bonus[nlocal_bonus].ninteger,
-                   "body:ivalue");
-    memory->create(bonus[nlocal_bonus].dvalue,bonus[nlocal_bonus].ndouble,
-                   "body:dvalue");
+    bonus[nlocal_bonus].ivalue = icp->get(bonus[nlocal_bonus].ninteger,
+					  bonus[nlocal_bonus].iindex);
+    bonus[nlocal_bonus].dvalue = dcp->get(bonus[nlocal_bonus].ndouble,
+					  bonus[nlocal_bonus].dindex);
     memcpy(bonus[nlocal_bonus].ivalue,&buf[m],
            bonus[nlocal_bonus].ninteger*sizeof(int));
     if (intdoubleratio == 1) m += bonus[nlocal_bonus].ninteger;
     else m += (bonus[nlocal_bonus].ninteger+1)/2;
     memcpy(bonus[nlocal_bonus].dvalue,&buf[m],
            bonus[nlocal_bonus].ndouble*sizeof(double));
     m += bonus[nlocal_bonus].ndouble;
     bonus[nlocal_bonus].ilocal = nlocal;
     body[nlocal] = nlocal_bonus++;
   }
 
   double **extra = atom->extra;
   if (atom->nextra_store) {
     int size = static_cast<int> (buf[0]) - m;
     for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++];
   }
 
   atom->nlocal++;
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AtomVecBody::write_restart_settings(FILE *fp)
 {
   fwrite(&nargcopy,sizeof(int),1,fp);
   for (int i = 0; i < nargcopy; i++) {
     int n = strlen(argcopy[i]) + 1;
     fwrite(&n,sizeof(int),1,fp);
     fwrite(argcopy[i],sizeof(char),n,fp);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AtomVecBody::read_restart_settings(FILE *fp)
 {
   int n;
 
   int me = comm->me;
   if (me == 0) fread(&nargcopy,sizeof(int),1,fp);
   MPI_Bcast(&nargcopy,1,MPI_INT,0,world);
   argcopy = new char*[nargcopy];
     
   for (int i = 0; i < nargcopy; i++) {
     if (me == 0) fread(&n,sizeof(int),1,fp);
     MPI_Bcast(&n,1,MPI_INT,0,world);
     argcopy[i] = new char[n];
     if (me == 0) fread(argcopy[i],sizeof(char),n,fp);
     MPI_Bcast(argcopy[i],n,MPI_CHAR,0,world);
   }
 
   copyflag = 0;
   settings(nargcopy,argcopy);
 }
 
 /* ----------------------------------------------------------------------
    create one atom of itype at coord
    set other values to defaults
 ------------------------------------------------------------------------- */
 
 void AtomVecBody::create_atom(int itype, double *coord)
 {
   int nlocal = atom->nlocal;
   if (nlocal == nmax) grow(0);
 
   tag[nlocal] = 0;
   type[nlocal] = itype;
   x[nlocal][0] = coord[0];
   x[nlocal][1] = coord[1];
   x[nlocal][2] = coord[2];
   mask[nlocal] = 1;
   image[nlocal] = ((tagint) IMGMAX << IMG2BITS) |
     ((tagint) IMGMAX << IMGBITS) | IMGMAX;
   v[nlocal][0] = 0.0;
   v[nlocal][1] = 0.0;
   v[nlocal][2] = 0.0;
 
   rmass[nlocal] = 1.0;
   angmom[nlocal][0] = 0.0;
   angmom[nlocal][1] = 0.0;
   angmom[nlocal][2] = 0.0;
   body[nlocal] = -1;
 
   atom->nlocal++;
 }
 
 /* ----------------------------------------------------------------------
    unpack one line from Atoms section of data file
    initialize other atom quantities
 ------------------------------------------------------------------------- */
 
 void AtomVecBody::data_atom(double *coord, tagint imagetmp, char **values)
 {
   int nlocal = atom->nlocal;
   if (nlocal == nmax) grow(0);
 
   tag[nlocal] = atoi(values[0]);
   if (tag[nlocal] <= 0)
     error->one(FLERR,"Invalid atom ID in Atoms section of data file");
 
   type[nlocal] = atoi(values[1]);
   if (type[nlocal] <= 0 || type[nlocal] > atom->ntypes)
     error->one(FLERR,"Invalid atom type in Atoms section of data file");
 
   body[nlocal] = atoi(values[2]);
   if (body[nlocal] == 0) body[nlocal] = -1;
   else if (body[nlocal] == 1) body[nlocal] = 0;
   else error->one(FLERR,"Invalid atom type in Atoms section of data file");
 
   rmass[nlocal] = atof(values[3]);
   if (rmass[nlocal] <= 0.0)
     error->one(FLERR,"Invalid density in Atoms section of data file");
 
   x[nlocal][0] = coord[0];
   x[nlocal][1] = coord[1];
   x[nlocal][2] = coord[2];
 
   image[nlocal] = imagetmp;
 
   mask[nlocal] = 1;
   v[nlocal][0] = 0.0;
   v[nlocal][1] = 0.0;
   v[nlocal][2] = 0.0;
   angmom[nlocal][0] = 0.0;
   angmom[nlocal][1] = 0.0;
   angmom[nlocal][2] = 0.0;
 
   atom->nlocal++;
 }
 
 /* ----------------------------------------------------------------------
    unpack hybrid quantities from one line in Atoms section of data file
    initialize other atom quantities for this sub-style
 ------------------------------------------------------------------------- */
 
 int AtomVecBody::data_atom_hybrid(int nlocal, char **values)
 {
   body[nlocal] = atoi(values[0]);
   if (body[nlocal] == 0) body[nlocal] = -1;
   else if (body[nlocal] == 1) body[nlocal] = 0;
   else error->one(FLERR,"Invalid atom type in Atoms section of data file");
 
   rmass[nlocal] = atof(values[1]);
   if (rmass[nlocal] <= 0.0)
     error->one(FLERR,"Invalid density in Atoms section of data file");
 
   return 2;
 }
 
 /* ----------------------------------------------------------------------
    unpack one body from Bodies section of data file
 ------------------------------------------------------------------------- */
 
 void AtomVecBody::data_body(int m, int ninteger, int ndouble, 
                              char **ivalues, char **dvalues)
 {
   if (body[m]) error->one(FLERR,"Assigning body parameters to non-body atom");
   if (nlocal_bonus == nmax_bonus) grow_bonus();
   bptr->data_body(nlocal_bonus,ninteger,ndouble,ivalues,dvalues);
   bonus[nlocal_bonus].ilocal = m;
   body[m] = nlocal_bonus++;
 }
 
 /* ----------------------------------------------------------------------
    unpack one tri from Velocities section of data file
 ------------------------------------------------------------------------- */
 
 void AtomVecBody::data_vel(int m, char **values)
 {
   v[m][0] = atof(values[0]);
   v[m][1] = atof(values[1]);
   v[m][2] = atof(values[2]);
   angmom[m][0] = atof(values[3]);
   angmom[m][1] = atof(values[4]);
   angmom[m][2] = atof(values[5]);
 }
 
 /* ----------------------------------------------------------------------
    unpack hybrid quantities from one body in Velocities section of data file
 ------------------------------------------------------------------------- */
 
 int AtomVecBody::data_vel_hybrid(int m, char **values)
 {
   angmom[m][0] = atof(values[0]);
   angmom[m][1] = atof(values[1]);
   angmom[m][2] = atof(values[2]);
   return 3;
 }
 
 /* ----------------------------------------------------------------------
    return # of bytes of allocated memory
 ------------------------------------------------------------------------- */
 
 bigint AtomVecBody::memory_usage()
 {
   bigint bytes = 0;
 
   if (atom->memcheck("tag")) bytes += memory->usage(tag,nmax);
   if (atom->memcheck("type")) bytes += memory->usage(type,nmax);
   if (atom->memcheck("mask")) bytes += memory->usage(mask,nmax);
   if (atom->memcheck("image")) bytes += memory->usage(image,nmax);
   if (atom->memcheck("x")) bytes += memory->usage(x,nmax,3);
   if (atom->memcheck("v")) bytes += memory->usage(v,nmax,3);
   if (atom->memcheck("f")) bytes += memory->usage(f,nmax*comm->nthreads,3);
 
   if (atom->memcheck("rmass")) bytes += memory->usage(rmass,nmax);
   if (atom->memcheck("angmom")) bytes += memory->usage(angmom,nmax,3);
   if (atom->memcheck("torque")) bytes += 
                                   memory->usage(torque,nmax*comm->nthreads,3);
   if (atom->memcheck("body")) bytes += memory->usage(body,nmax);
 
   bytes += nmax_bonus*sizeof(Bonus);
+  bytes += icp->size + dcp->size;
 
   int nall = nlocal_bonus + nghost_bonus;
   for (int i = 0; i < nall; i++) {
     bytes += bonus[i].ninteger * sizeof(int);
     bytes += bonus[i].ndouble * sizeof(double);
   }
 
   return bytes;
 }
 
 /* ----------------------------------------------------------------------
    debug method for sanity checking of own/bonus data pointers
 ------------------------------------------------------------------------- */
 
 /*
 void AtomVecBody::check(int flag)
 {
   for (int i = 0; i < atom->nlocal; i++) {
     if (atom->body[i] >= 0 && atom->body[i] >= nlocal_bonus) {
       printf("Proc %d, step %ld, flag %d\n",comm->me,update->ntimestep,flag);
       error->one(FLERR,"BAD AAA");
     }
   }
   for (int i = atom->nlocal; i < atom->nlocal+atom->nghost; i++) {
     if (atom->body[i] >= 0 && 
         (atom->body[i] < nlocal_bonus || 
          atom->body[i] >= nlocal_bonus+nghost_bonus)) {
       printf("Proc %d, step %ld, flag %d\n",comm->me,update->ntimestep,flag);
       error->one(FLERR,"BAD BBB");
     }
   }
   for (int i = 0; i < nlocal_bonus; i++) {
     if (bonus[i].ilocal < 0 || bonus[i].ilocal >= atom->nlocal) {
       printf("Proc %d, step %ld, flag %d\n",comm->me,update->ntimestep,flag);
       error->one(FLERR,"BAD CCC");
     }
   }
   for (int i = 0; i < nlocal_bonus; i++) {
     if (atom->body[bonus[i].ilocal] != i) {
       printf("Proc %d, step %ld, flag %d\n",comm->me,update->ntimestep,flag);
       error->one(FLERR,"BAD DDD");
     }
   }
   for (int i = nlocal_bonus; i < nlocal_bonus+nghost_bonus; i++) {
     if (bonus[i].ilocal < atom->nlocal || 
         bonus[i].ilocal >= atom->nlocal+atom->nghost) {
       printf("Proc %d, step %ld, flag %d\n",comm->me,update->ntimestep,flag);
       error->one(FLERR,"BAD EEE");
     }
   }
   for (int i = nlocal_bonus; i < nlocal_bonus+nghost_bonus; i++) {
     if (atom->body[bonus[i].ilocal] != i) {
       printf("Proc %d, step %ld, flag %d\n",comm->me,update->ntimestep,flag);
       error->one(FLERR,"BAD FFF");
     }
   }
 }
 */
diff --git a/src/atom_vec_body.h b/src/atom_vec_body.h
index 7bc8201be..34ed5b3c9 100644
--- a/src/atom_vec_body.h
+++ b/src/atom_vec_body.h
@@ -1,148 +1,147 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef ATOM_CLASS
 
 AtomStyle(body,AtomVecBody)
 
 #else
 
 #ifndef LMP_ATOM_VEC_BODY_H
 #define LMP_ATOM_VEC_BODY_H
 
 #include "atom_vec.h"
+#include "my_pool.h"
 
 namespace LAMMPS_NS {
 
 class AtomVecBody : public AtomVec {
-    friend class Comm;
-
-
-
-
-
  public:
   class Body *bptr;
 
   struct Bonus {
     double quat[4];
     double inertia[3];
     int ninteger,ndouble;
+    int iindex,dindex;
     int *ivalue;
     double *dvalue;
     int ilocal;
   };
   struct Bonus *bonus;
 
   AtomVecBody(class LAMMPS *);
   ~AtomVecBody();
   void settings(int, char **);
   void grow(int);
   void grow_reset();
   void copy(int, int, int);
   int pack_comm(int, int *, double *, int, int *);
   int pack_comm_vel(int, int *, double *, int, int *);
   int pack_comm_hybrid(int, int *, double *);
   void unpack_comm(int, int, double *);
   void unpack_comm_vel(int, int, double *);
   int unpack_comm_hybrid(int, int, double *);
   int pack_reverse(int, int, double *);
   int pack_reverse_hybrid(int, int, double *);
   void unpack_reverse(int, int *, double *);
   int unpack_reverse_hybrid(int, int *, double *);
   int pack_border(int, int *, double *, int, int *);
   int pack_border_vel(int, int *, double *, int, int *);
   int pack_border_hybrid(int, int *, double *);
   void unpack_border(int, int, double *);
   void unpack_border_vel(int, int, double *);
   int unpack_border_hybrid(int, int, double *);
   int pack_exchange(int, double *);
   int unpack_exchange(double *);
   int size_restart();
   int pack_restart(int, double *);
   int unpack_restart(double *);
   void write_restart_settings(FILE *);
   void read_restart_settings(FILE *);
   void create_atom(int, double *);
   void data_atom(double *, tagint, char **);
   int data_atom_hybrid(int, char **);
   void data_vel(int, char **);
   int data_vel_hybrid(int, char **);
   bigint memory_usage();
 
   // manipulate Bonus data structure for extra atom info
 
   void clear_bonus();
   void data_body(int, int, int, char **, char **);
 
  private:
   int *tag,*type,*mask;
   tagint *image;
   double **x,**v,**f;
   double *rmass;
   double **angmom,**torque;
   int *body;
 
   int nlocal_bonus,nghost_bonus,nmax_bonus;
 
   int nargcopy;          // copy of command-line args
   char **argcopy;        // for writing to restart file
   int copyflag;
   int intdoubleratio;    // sizeof(double) / sizeof(int)
 
+  MyPool<int> *icp;
+  MyPool<double> *dcp;
+
   void grow_bonus();
   void copy_bonus(int, int);
   //void check(int);
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Internal error in atom_style body
 
 This error should not occur.  Contact the developers.
 
 E: Invalid atom_style body command
 
 No body style argument was provided.
 
 E: Invalid body style
 
 The choice of body style is unknown.
 
 E: Per-processor system is too big
 
 The number of owned atoms plus ghost atoms on a single
 processor must fit in 32-bit integer.
 
 E: Invalid atom ID in Atoms section of data file
 
 Atom IDs must be positive integers.
 
 E: Invalid atom type in Atoms section of data file
 
 Atom types must range from 1 to specified # of types.
 
 E: Invalid density in Atoms section of data file
 
 Density value cannot be <= 0.0.
 
 E: Assigning body parameters to non-body atom
 
 Self-explanatory.
 
 */
diff --git a/src/body.h b/src/body.h
index 644b3e1e9..9bf9270aa 100644
--- a/src/body.h
+++ b/src/body.h
@@ -1,58 +1,62 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifndef LMP_BODY_H
 #define LMP_BODY_H
 
 #include "pointers.h"
 #include "atom_vec_body.h"
+#include "my_pool.h"
 
 namespace LAMMPS_NS {
 
 class Body : protected Pointers {
  public:
+  MyPool<int> *icp;
+  MyPool<double> *dcp;
+
   char *style;
   int size_forward;           // max extra values packed for comm
   int size_border;            // max extra values packed for border comm
   AtomVecBody *avec;          // ptr to class that stores body bonus info
 
   Body(class LAMMPS *, int, char **);
   virtual ~Body();
 
   // methods implemented by child classes
 
   virtual int pack_comm_body(class AtomVecBody::Bonus *, double *) {return 0;}
   virtual int unpack_comm_body(class AtomVecBody::Bonus *, double *) {return 0;}
   virtual int pack_border_body(class AtomVecBody::Bonus *, double *) {return 0;}
   virtual int unpack_border_body(class AtomVecBody::Bonus *, 
                                  double *) {return 0;}
 
   virtual void data_body(int, int, int, char **, char **) = 0;
   virtual int noutrow(int) = 0;
   virtual int noutcol() = 0;
   virtual void output(int, int, double *) = 0;
 };
 
 }
 
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 */
diff --git a/src/fix_adapt.h b/src/fix_adapt.h
index f813e3845..88d94d169 100644
--- a/src/fix_adapt.h
+++ b/src/fix_adapt.h
@@ -1,111 +1,107 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef FIX_CLASS
 
 FixStyle(adapt,FixAdapt)
 
 #else
 
 #ifndef LMP_FIX_ADAPT_H
 #define LMP_FIX_ADAPT_H
 
 #include "fix.h"
 
 namespace LAMMPS_NS {
 
 class FixAdapt : public Fix {
  public:
   int diamflag;        // 1 if atom diameters will vary, for AtomVecGranular
   int chgflag; 
 
   FixAdapt(class LAMMPS *, int, char **);
   ~FixAdapt();
   int setmask();
   void init();
   void setup_pre_force(int);
   void pre_force(int);
   void post_run();
 
  private:
   int nadapt,resetflag,scaleflag;
   int anypair;
 
   struct Adapt {
     int which,ivar;
     char *var;
     char *pstyle,*pparam;
     int ilo,ihi,jlo,jhi;
     int pdim;
     double *scalar,scalar_orig;
     double **array,**array_orig;
     int aparam;
   };
 
   Adapt *adapt;
   double *kspace_scale;
 
   void change_settings();
   void restore_settings();
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Variable name for fix adapt does not exist
 
 Self-explanatory.
 
 E: Variable for fix adapt is invalid style
 
 Only equal-style variables can be used.
 
 E: Fix adapt pair style does not exist
 
 Self-explanatory
 
 E: Fix adapt pair style param not supported
 
 The pair style does not know about the parameter you specified.
 
 E: Fix adapt type pair range is not valid for pair hybrid sub-style
 
 Self-explanatory.
 
 E: Fix adapt kspace style does not exist
 
 Self-explanatory.
 
 E: Fix adapt requires atom attribute diameter
 
 The atom style being used does not specify an atom diameter.
 
 E: Fix adapt requires atom attribute charge
 
-UNDOCUMENTED
-
-U: Fix adapt requires atom attribute charge 
-
 The atom style being used does not specify an atom charge.
 
 */
diff --git a/src/fix_balance.h b/src/fix_balance.h
index a1e52ef32..0f3202c9b 100644
--- a/src/fix_balance.h
+++ b/src/fix_balance.h
@@ -1,91 +1,87 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef FIX_CLASS
 
 FixStyle(balance,FixBalance)
 
 #else
 
 #ifndef LMP_FIX_BALANCE_H
 #define LMP_FIX_BALANCE_H
 
 #include "stdio.h"
 #include "fix.h"
 
 namespace LAMMPS_NS {
 
 class FixBalance : public Fix {
  public:
   FixBalance(class LAMMPS *, int, char **);
   ~FixBalance();
   int setmask();
   void init();
   void setup(int);
   void setup_pre_exchange();
   void pre_exchange();
   void pre_neighbor();
   double compute_scalar();
   double compute_vector(int);
   double memory_usage();
 
  private:
   int nevery,nitermax;
   char bstr[3];
   double thresh;
   FILE *fp;
 
   double imbnow;                // current imbalance factor
   double imbprev;               // imbalance factor before last rebalancing
   double imbfinal;              // imbalance factor after last rebalancing
   int maxperproc;               // max atoms on any processor
   int itercount;                // iteration count of last call to Balance
   int kspace_flag;              // 1 if KSpace solver defined
   int pending;
 
   class Balance *balance;
   class Irregular *irregular;
 
   void rebalance();
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Fix balance string is invalid
 
 The string can only contain the characters "x", "y", or "z".
 
 E: Fix balance string is invalid for 2d simulation
 
 The string cannot contain the letter "z".
 
 E: Cannot open fix balance output file
 
 Self-explanatory.
 
-U: Cannot yet use fix balance with PPPM
-
-This is a current limitation of LAMMPS.
-
 */
diff --git a/src/fix_box_relax.h b/src/fix_box_relax.h
index 278c5eea3..e29889df0 100644
--- a/src/fix_box_relax.h
+++ b/src/fix_box_relax.h
@@ -1,200 +1,196 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef FIX_CLASS
 
 FixStyle(box/relax,FixBoxRelax)
 
 #else
 
 #ifndef LMP_FIX_BOX_RELAX_H
 #define LMP_FIX_BOX_RELAX_H
 
 #include "fix.h"
 
 namespace LAMMPS_NS {
 
 class FixBoxRelax : public Fix {
  public:
   FixBoxRelax(class LAMMPS *, int, char **);
   ~FixBoxRelax();
   int setmask();
   void init();
 
   double min_energy(double *);
   void min_store();
   void min_clearstore();
   void min_pushstore();
   void min_popstore();
   int min_reset_ref();
   void min_step(double, double *);
   double max_alpha(double *);
   int min_dof();
 
   int modify_param(int, char **);
 
  private:
   int p_flag[6];
   int pstyle,pcouple,allremap;
   int dimension;
   double p_target[6],p_current[6];
   double vol0,xprdinit,yprdinit,zprdinit;
   double vmax,pv2e,pflagsum;
   int kspace_flag;
 
   int current_lifo;              // LIFO stack pointer
   double boxlo0[2][3];           // box bounds at start of line search
   double boxhi0[2][3];
   double boxtilt0[2][3];         // xy,xz,yz tilts at start of line search
   double ds[6];                  // increment in scale matrix
 
   int scaleyz;                   // 1 if yz scaled with lz
   int scalexz;                   // 1 if xz scaled with lz
   int scalexy;                   // 1 if xy scaled with ly
 
   double fixedpoint[3];          // Location of dilation fixed-point
 
   char *id_temp,*id_press;
   class Compute *temperature,*pressure;
   int tflag,pflag;
 
   int nrigid;
   int *rfix;
 
   double sigma[6];                 // scaled target stress
   double utsigma[3];               // weighting for upper-tri elements
                                    // of modified sigma
   int sigmamod_flag;               // 1 if modified sigma to be used
   double fdev[6];                  // Deviatoric force on cell
   int deviatoric_flag;             // 0 if target stress tensor is hydrostatic
   double h0[6];                    // h_inv of reference (zero strain) box
   double h0_inv[6];                // h_inv of reference (zero strain) box
   int nreset_h0;                   // interval for resetting h0
   double p_hydro;                  // hydrostatic component of target stress
 
   void remap();
   void couple();
 
   void compute_sigma();
   void compute_deviatoric();
   double compute_strain_energy();
   void compute_press_target();
   double compute_scalar();
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Invalid fix box/relax command for a 2d simulation
 
 Fix box/relax styles involving the z dimension cannot be used in
 a 2d simulation.
 
 E: Invalid fix box/relax command pressure settings
 
 If multiple dimensions are coupled, those dimensions must be specified.
 
 E: Cannot use fix box/relax on a non-periodic dimension
 
 When specifying a diagonal pressure component, the dimension must be
 periodic.
 
 E: Cannot use fix box/relax on a 2nd non-periodic dimension
 
 When specifying an off-diagonal pressure component, the 2nd of the two
 dimensions must be periodic.  E.g. if the xy component is specified,
 then the y dimension must be periodic.
 
 E: Cannot use fix box/relax with tilt factor scaling on a 2nd non-periodic dimension
 
-UNDOCUMENTED
+When specifying scaling on a tilt factor component, the 2nd of the two
+dimensions must be periodic.  E.g. if the xy component is specified,
+then the y dimension must be periodic.
 
 E: Cannot use fix box/relax with both relaxation and scaling on a tilt factor
 
 When specifying scaling on a tilt factor component, that component can not
 also be controlled by the barostat. E.g. if scalexy yes is specified and
 also keyword tri or xy, this is wrong.
 
 E: Can not specify Pxy/Pxz/Pyz in fix box/relax with non-triclinic box
 
 Only triclinic boxes can be used with off-diagonal pressure components.
 See the region prism command for details.
 
 E: Invalid fix box/relax pressure settings
 
 Settings for coupled dimensions must be the same.
 
 E: Temperature ID for fix box/relax does not exist
 
 Self-explanatory.
 
 E: Pressure ID for fix box/relax does not exist
 
 The compute ID needed to compute pressure for the fix does not
 exist.
 
 E: Attempt to push beyond stack limit in fix box/relax
 
 Internal LAMMPS error.  Please report it to the developers.
 
 E: Attempt to pop empty stack in fix box/relax
 
 Internal LAMMPS error.  Please report it to the developers.
 
 E: Fix box/relax generated negative box length
 
 The pressure being applied is likely too large.  Try applying
 it incrementally, to build to the high pressure.
 
 E: Could not find fix_modify temperature ID
 
 The compute ID for computing temperature does not exist.
 
 E: Fix_modify temperature ID does not compute temperature
 
 The compute ID assigned to the fix must compute temperature.
 
 W: Temperature for fix modify is not for group all
 
 The temperature compute is being used with a pressure calculation
 which does operate on group all, so this may be inconsistent.
 
 E: Pressure ID for fix modify does not exist
 
 Self-explanatory.
 
 E: Could not find fix_modify pressure ID
 
 The compute ID for computing pressure does not exist.
 
 E: Fix_modify pressure ID does not compute pressure
 
 The compute ID assigned to the fix must compute pressure.
 
-U: Cannot use fix box/relax with tilt factor scaling on a 2nd non-periodic dimension"
-
-When specifying scaling on a tilt factor component, the 2nd of the two
-dimensions must be periodic.  E.g. if the xy component is specified,
-then the y dimension must be periodic.
-
 */
diff --git a/src/fix_heat.cpp b/src/fix_heat.cpp
index 9449669d8..50b7a7ab8 100644
--- a/src/fix_heat.cpp
+++ b/src/fix_heat.cpp
@@ -1,338 +1,338 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Paul Crozier (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdlib.h"
 #include "string.h"
 #include "fix_heat.h"
 #include "atom.h"
 #include "domain.h"
 #include "region.h"
 #include "group.h"
 #include "force.h"
 #include "update.h"
 #include "modify.h"
 #include "input.h"
 #include "variable.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
 enum{CONSTANT,EQUAL,ATOM};
 
 /* ---------------------------------------------------------------------- */
 
 FixHeat::FixHeat(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
 {
   if (narg < 4) error->all(FLERR,"Illegal fix heat command");
 
   scalar_flag = 1;
   global_freq = 1;
   extscalar = 0;
 
   nevery = atoi(arg[3]);
   if (nevery <= 0) error->all(FLERR,"Illegal fix heat command");
 
   hstr = NULL;
 
   if (strstr(arg[4],"v_") == arg[4]) {
     int n = strlen(&arg[4][2]) + 1;
     hstr = new char[n];
     strcpy(hstr,&arg[4][2]);
   } else {
     heat_input = atof(arg[4]);
     hstyle = CONSTANT;
   }
 
   // optional args
 
   iregion = -1;
   idregion = NULL;
 
   int iarg = 5;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"region") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix heat command");
       iregion = domain->find_region(arg[iarg+1]);
       if (iregion == -1)
         error->all(FLERR,"Region ID for fix heat does not exist");
       int n = strlen(arg[iarg+1]) + 1;
       idregion = new char[n];
       strcpy(idregion,arg[iarg+1]);
       iarg += 2;
     } else error->all(FLERR,"Illegal fix heat command");
   }
 
   scale = 1.0;
 
   maxatom = 0;
   vheat = NULL;
   vscale = NULL;
 }
 
 /* ---------------------------------------------------------------------- */
 
 FixHeat::~FixHeat()
 {
   delete [] hstr;
   delete [] idregion;
   memory->destroy(vheat);
   memory->destroy(vscale);
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixHeat::setmask()
 {
   int mask = 0;
   mask |= END_OF_STEP;
   return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixHeat::init()
 {
   // set index and check validity of region
 
   if (iregion >= 0) {
     iregion = domain->find_region(idregion);
     if (iregion == -1)
       error->all(FLERR,"Region ID for fix heat does not exist");
   }
 
   // check variable
 
   if (hstr) {
     hvar = input->variable->find(hstr);
     if (hvar < 0) 
       error->all(FLERR,"Variable name for fix heat does not exist");
     if (input->variable->equalstyle(hvar)) hstyle = EQUAL;
-    else if (input->variable->equalstyle(hvar)) hstyle = ATOM;
+    else if (input->variable->atomstyle(hvar)) hstyle = ATOM;
     else error->all(FLERR,"Variable for fix heat is invalid style");
   }
 
   // cannot have 0 atoms in group
 
   if (group->count(igroup) == 0)
     error->all(FLERR,"Fix heat group has no atoms");
   masstotal = group->mass(igroup);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixHeat::end_of_step()
 {
   int i;
   double heat,ke,massone;
   double vsub[3],vcm[3];
   Region *region;
 
   double **x = atom->x;
   double **v = atom->v;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
   int *type = atom->type;
   double *mass = atom->mass;
   double *rmass = atom->rmass;
 
   // reallocate per-atom arrays if necessary
 
   if (hstyle == ATOM && atom->nlocal > maxatom) {
     maxatom = atom->nmax;
     memory->destroy(vheat);
     memory->destroy(vscale);
     memory->create(vheat,maxatom,"heat:vheat");
     memory->create(vscale,maxatom,"heat:vscale");
   }
 
   // evaluate variable
 
   if (hstyle != CONSTANT) {
     modify->clearstep_compute();
     if (hstyle == EQUAL) heat_input = input->variable->compute_equal(hvar);
     else input->variable->compute_atom(hvar,igroup,vheat,1,0);
     modify->addstep_compute(update->ntimestep + nevery);
   }
 
   // vcm = center-of-mass velocity of scaled atoms
 
   if (iregion < 0) {
     ke = group->ke(igroup)*force->ftm2v;
     group->vcm(igroup,masstotal,vcm);
   } else {
     masstotal = group->mass(igroup,iregion);
     if (masstotal == 0.0) error->all(FLERR,"Fix heat group has no atoms");
     ke = group->ke(igroup,iregion)*force->ftm2v;
     group->vcm(igroup,masstotal,vcm,iregion);
   }
   double vcmsq = vcm[0]*vcm[0] + vcm[1]*vcm[1] + vcm[2]*vcm[2];
 
 
   // add heat via scale factor on velocities for CONSTANT and EQUAL cases
   // scale = velocity scale factor to accomplish eflux change in energy
   // vsub = velocity subtracted from each atom to preserve momentum
   // overall KE cannot go negative
 
   if (hstyle != ATOM) {
     heat = heat_input*nevery*update->dt*force->ftm2v;
     double escale = 
       (ke + heat - 0.5*vcmsq*masstotal)/(ke - 0.5*vcmsq*masstotal);
     if (escale < 0.0) error->all(FLERR,"Fix heat kinetic energy went negative");
     scale = sqrt(escale);
     vsub[0] = (scale-1.0) * vcm[0];
     vsub[1] = (scale-1.0) * vcm[1];
     vsub[2] = (scale-1.0) * vcm[2];
 
     if (iregion < 0) {
       for (i = 0; i < nlocal; i++)
         if (mask[i] & groupbit) {
           v[i][0] = scale*v[i][0] - vsub[0];
           v[i][1] = scale*v[i][1] - vsub[1];
           v[i][2] = scale*v[i][2] - vsub[2];
         }
     } else {
       region = domain->regions[iregion];
       for (int i = 0; i < nlocal; i++)
         if (mask[i] & groupbit && region->match(x[i][0],x[i][1],x[i][2])) {
           v[i][0] = scale*v[i][0] - vsub[0];
           v[i][1] = scale*v[i][1] - vsub[1];
           v[i][2] = scale*v[i][2] - vsub[2];
         }
     }
 
   // add heat via per-atom scale factor on velocities for ATOM case
   // vscale = velocity scale factor to accomplish eflux change in energy
   // vsub = velocity subtracted from each atom to preserve momentum
   // KE of an atom cannot go negative
 
   } else {
     vsub[0] = vsub[1] = vsub[2] = 0.0;
     if (iregion < 0) {
       for (i = 0; i < nlocal; i++) {
         if (mask[i] & groupbit) {
           heat = vheat[i]*nevery*update->dt*force->ftm2v;
           vscale[i] = 
             (ke + heat - 0.5*vcmsq*masstotal)/(ke - 0.5*vcmsq*masstotal);
           if (vscale[i] < 0.0) 
             error->all(FLERR,
                        "Fix heat kinetic energy of an atom went negative");
           scale = sqrt(vscale[i]);
           if (rmass) massone = rmass[i];
           else massone = mass[type[i]];
           vsub[0] += (scale-1.0) * v[i][0]*massone;
           vsub[1] += (scale-1.0) * v[i][1]*massone;
           vsub[2] += (scale-1.0) * v[i][2]*massone;
         }
       }
 
       vsub[0] /= masstotal;
       vsub[1] /= masstotal;
       vsub[2] /= masstotal;
 
       for (i = 0; i < nlocal; i++)
         if (mask[i] & groupbit) {
           scale = sqrt(vscale[i]);
           v[i][0] = scale*v[i][0] - vsub[0];
           v[i][1] = scale*v[i][1] - vsub[1];
           v[i][2] = scale*v[i][2] - vsub[2];
         }
 
     } else {
       region = domain->regions[iregion];
       for (i = 0; i < nlocal; i++) {
         if (mask[i] & groupbit && region->match(x[i][0],x[i][1],x[i][2])) {
           heat = vheat[i]*nevery*update->dt*force->ftm2v;
           vscale[i] = 
             (ke + heat - 0.5*vcmsq*masstotal)/(ke - 0.5*vcmsq*masstotal);
           if (vscale[i] < 0.0) 
             error->all(FLERR,
                        "Fix heat kinetic energy of an atom went negative");
           scale = sqrt(vscale[i]);
           if (rmass) massone = rmass[i];
           else massone = mass[type[i]];
           vsub[0] += (scale-1.0) * v[i][0]*massone;
           vsub[1] += (scale-1.0) * v[i][1]*massone;
           vsub[2] += (scale-1.0) * v[i][2]*massone;
         }
       }
 
       vsub[0] /= masstotal;
       vsub[1] /= masstotal;
       vsub[2] /= masstotal;
 
       for (i = 0; i < nlocal; i++)
         if (mask[i] & groupbit && region->match(x[i][0],x[i][1],x[i][2])) {
           scale = sqrt(vscale[i]);
           v[i][0] = scale*v[i][0] - vsub[0];
           v[i][1] = scale*v[i][1] - vsub[1];
           v[i][2] = scale*v[i][2] - vsub[2];
         }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double FixHeat::compute_scalar()
 {
   double average_scale = scale;
   if (hstyle == ATOM) {  
     double scale_sum = 0.0;
     int ncount = 0;
     int *mask = atom->mask;
     double **x = atom->x;
     int nlocal = atom->nlocal;
     if (iregion < 0) {
       for (int i = 0; i < nlocal; i++) {
         if (mask[i] & groupbit) {
           scale_sum += sqrt(vscale[i]);
           ncount++;
         }
       }
     } else {
       Region *region;
       region = domain->regions[iregion];
       for (int i = 0; i < nlocal; i++) {
         if (mask[i] & groupbit && region->match(x[i][0],x[i][1],x[i][2])) {
           scale_sum += sqrt(vscale[i]);
           ncount++;
         }
       }
     }
     double scale_sum_all = 0.0;
     int ncount_all = 0;
     MPI_Allreduce(&scale_sum,&scale_sum_all,1,MPI_DOUBLE,MPI_SUM,world);
     MPI_Allreduce(&ncount,&ncount_all,1,MPI_INT,MPI_SUM,world);
     if (ncount_all == 0) average_scale = 0.0;
     else average_scale = scale_sum_all/static_cast<double>(ncount_all);
   }
   return average_scale;
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 double FixHeat::memory_usage()
 {
   double bytes = 0.0;
   if (hstyle == ATOM) bytes = atom->nmax*2 * sizeof(double);
   return bytes;
 }
diff --git a/src/fix_heat.h b/src/fix_heat.h
index 42db8e404..d55b26bf4 100644
--- a/src/fix_heat.h
+++ b/src/fix_heat.h
@@ -1,89 +1,90 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef FIX_CLASS
 
 FixStyle(heat,FixHeat)
 
 #else
 
 #ifndef LMP_FIX_HEAT_H
 #define LMP_FIX_HEAT_H
 
 #include "fix.h"
 
 namespace LAMMPS_NS {
 
 class FixHeat : public Fix {
  public:
   FixHeat(class LAMMPS *, int, char **);
   ~FixHeat();
   int setmask();
   void init();
   void end_of_step();
   double compute_scalar();
   double memory_usage();
 
  private:
   int iregion;
   double heat_input;
   double masstotal;
   double scale;
   char *idregion;
   char *hstr;
   int hstyle,hvar;
 
   int maxatom;
   double *vheat;
   double *vscale;
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Region ID for fix heat does not exist
 
 Self-explanatory.
 
 E: Variable name for fix heat does not exist
 
-UNDOCUMENTED
+Self-explanatory.
 
 E: Variable for fix heat is invalid style
 
-UNDOCUMENTED
+Only equal-style or atom-style variables can be used.
 
 E: Fix heat group has no atoms
 
 Self-explanatory.
 
 E: Fix heat kinetic energy went negative
 
 This will cause the velocity rescaling about to be performed by fix
 heat to be invalid.
 
 E: Fix heat kinetic energy of an atom went negative
 
-UNDOCUMENTED
+This will cause the velocity rescaling about to be performed by fix
+heat to be invalid.
 
 */
diff --git a/src/fix_move.h b/src/fix_move.h
index 80257c3aa..cb3dfc13f 100644
--- a/src/fix_move.h
+++ b/src/fix_move.h
@@ -1,139 +1,135 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef FIX_CLASS
 
 FixStyle(move,FixMove)
 
 #else
 
 #ifndef LMP_FIX_MOVE_H
 #define LMP_FIX_MOVE_H
 
 #include "stdio.h"
 #include "fix.h"
 
 namespace LAMMPS_NS {
 
 class FixMove : public Fix {
  public:
   FixMove(class LAMMPS *, int, char **);
   ~FixMove();
   int setmask();
   void init();
   void initial_integrate(int);
   void final_integrate();
   void initial_integrate_respa(int, int, int);
   void final_integrate_respa(int, int);
 
   double memory_usage();
   void write_restart(FILE *);
   void restart(char *);
   void grow_arrays(int);
   void copy_arrays(int, int, int);
   void set_arrays(int);
   int pack_exchange(int, double *);
   int unpack_exchange(int, double *);
   int pack_restart(int, double *);
   void unpack_restart(int, int);
   int maxsize_restart();
   int size_restart(int);
 
   void reset_dt();
 
  private:
   char *xvarstr,*yvarstr,*zvarstr,*vxvarstr,*vyvarstr,*vzvarstr;
   int mstyle;
   int vxflag,vyflag,vzflag,axflag,ayflag,azflag;
   double vx,vy,vz,ax,ay,az;
   double period,omega_rotate;
   double point[3],axis[3],runit[3];
   double dt,dtv,dtf;
   int xvar,yvar,zvar,vxvar,vyvar,vzvar;
   int xvarstyle,yvarstyle,zvarstyle,vxvarstyle,vyvarstyle,vzvarstyle;
   int omega_flag,nlevels_respa;
   int time_origin;
 
   double **xoriginal;         // original coords of atoms
   int displaceflag,velocityflag;
   int maxatom;
   double **displace,**velocity;
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Fix move cannot set linear z motion for 2d problem
 
 Self-explanatory.
 
 E: Fix move cannot set wiggle z motion for 2d problem
 
 Self-explanatory.
 
 E: Fix move cannot rotate aroung non z-axis for 2d problem
 
 Self-explanatory.
 
 E: Fix move cannot define z or vz variable for 2d problem
 
 Self-explanatory.
 
 W: Fix move does not update angular momentum
 
 Atoms store this quantity, but fix move does not (yet) update it.
 
 W: Fix move does not update quaternions
 
 Atoms store this quantity, but fix move does not (yet) update it.
 
 E: Use of fix move with undefined lattice
 
 Must use lattice command with fix move command if units option is
 set to lattice.
 
 E: Zero length rotation vector with fix move
 
-UNDOCUMENTED
+Self-explanatory.
 
 E: Variable name for fix move does not exist
 
 Self-explanatory.
 
 E: Variable for fix move is invalid style
 
 Only equal-style variables can be used.
 
 E: Cannot add atoms to fix move variable
 
 Atoms can not be added afterwards to this fix option.
 
 E: Resetting timestep is not allowed with fix move
 
 This is because fix move is moving atoms based on elapsed time.
 
-U: Fix move cannot have 0 length rotation vector
-
-Self-explanatory.
-
 */
diff --git a/src/fix_nh.cpp b/src/fix_nh.cpp
index a97026e77..72c8a897e 100644
--- a/src/fix_nh.cpp
+++ b/src/fix_nh.cpp
@@ -1,2262 +1,2262 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Mark Stevens (SNL), Aidan Thompson (SNL)
 ------------------------------------------------------------------------- */
 
 #include "string.h"
 #include "stdlib.h"
 #include "math.h"
 #include "fix_nh.h"
 #include "math_extra.h"
 #include "atom.h"
 #include "force.h"
 #include "group.h"
 #include "comm.h"
 #include "irregular.h"
 #include "modify.h"
 #include "fix_deform.h"
 #include "compute.h"
 #include "kspace.h"
 #include "update.h"
 #include "respa.h"
 #include "domain.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
 #define DELTAFLIP 0.1
 #define TILTMAX 1.5
 
 enum{NOBIAS,BIAS};
 enum{NONE,XYZ,XY,YZ,XZ};
 enum{ISO,ANISO,TRICLINIC};
 
 /* ----------------------------------------------------------------------
    NVT,NPH,NPT integrators for improved Nose-Hoover equations of motion
  ---------------------------------------------------------------------- */
 
 FixNH::FixNH(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
 {
   if (narg < 4) error->all(FLERR,"Illegal fix nvt/npt/nph command");
 
   restart_global = 1;
   time_integrate = 1;
   scalar_flag = 1;
   vector_flag = 1;
   global_freq = 1;
   extscalar = 1;
   extvector = 0;
 
   // default values
 
   pcouple = NONE;
   drag = 0.0;
   allremap = 1;
   id_dilate = NULL;
   mtchain = mpchain = 3;
   nc_tchain = nc_pchain = 1;
   mtk_flag = 1;
   deviatoric_flag = 0;
   nreset_h0 = 0;
   eta_mass_flag = 1;
   omega_mass_flag = 0;
   etap_mass_flag = 0;
   flipflag = 1;
 
   // turn on tilt factor scaling, whenever applicable
 
   dimension = domain->dimension;
 
   scaleyz = scalexz = scalexy = 0;
   if (domain->yperiodic && domain->xy != 0.0) scalexy = 1;
   if (domain->zperiodic && dimension == 3) {
     if (domain->yz != 0.0) scaleyz = 1;
     if (domain->xz != 0.0) scalexz = 1;
   }
 
   // set fixed-point to default = center of cell
 
   fixedpoint[0] = 0.5*(domain->boxlo[0]+domain->boxhi[0]);
   fixedpoint[1] = 0.5*(domain->boxlo[1]+domain->boxhi[1]);
   fixedpoint[2] = 0.5*(domain->boxlo[2]+domain->boxhi[2]);
 
   // used by FixNVTSllod to preserve non-default value
 
   mtchain_default_flag = 1;
 
   tstat_flag = 0;
   double t_period = 0.0;
 
   double p_period[6];
   for (int i = 0; i < 6; i++) {
     p_start[i] = p_stop[i] = p_period[i] = p_target[i] = 0.0;
     p_flag[i] = 0;
   }
 
   // process keywords
 
   int iarg = 3;
 
   while (iarg < narg) {
     if (strcmp(arg[iarg],"temp") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       tstat_flag = 1;
       t_start = atof(arg[iarg+1]);
       t_target = t_start;
       t_stop = atof(arg[iarg+2]);
       t_period = atof(arg[iarg+3]);
       if (t_start < 0.0 || t_stop <= 0.0)
         error->all(FLERR,
                    "Target temperature for fix nvt/npt/nph cannot be 0.0");
       iarg += 4;
 
     } else if (strcmp(arg[iarg],"iso") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       pcouple = XYZ;
       p_start[0] = p_start[1] = p_start[2] = atof(arg[iarg+1]);
       p_stop[0] = p_stop[1] = p_stop[2] = atof(arg[iarg+2]);
       p_period[0] = p_period[1] = p_period[2] = atof(arg[iarg+3]);
       p_flag[0] = p_flag[1] = p_flag[2] = 1;
       if (dimension == 2) {
         p_start[2] = p_stop[2] = p_period[2] = 0.0;
         p_flag[2] = 0;
       }
       iarg += 4;
     } else if (strcmp(arg[iarg],"aniso") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       pcouple = NONE;
       p_start[0] = p_start[1] = p_start[2] = atof(arg[iarg+1]);
       p_stop[0] = p_stop[1] = p_stop[2] = atof(arg[iarg+2]);
       p_period[0] = p_period[1] = p_period[2] = atof(arg[iarg+3]);
       p_flag[0] = p_flag[1] = p_flag[2] = 1;
       if (dimension == 2) {
         p_start[2] = p_stop[2] = p_period[2] = 0.0;
         p_flag[2] = 0;
       }
       iarg += 4;
     } else if (strcmp(arg[iarg],"tri") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       pcouple = NONE;
       scalexy = scalexz = scaleyz = 0;
       p_start[0] = p_start[1] = p_start[2] = atof(arg[iarg+1]);
       p_stop[0] = p_stop[1] = p_stop[2] = atof(arg[iarg+2]);
       p_period[0] = p_period[1] = p_period[2] = atof(arg[iarg+3]);
       p_flag[0] = p_flag[1] = p_flag[2] = 1;
       p_start[3] = p_start[4] = p_start[5] = 0.0;
       p_stop[3] = p_stop[4] = p_stop[5] = 0.0;
       p_period[3] = p_period[4] = p_period[5] = atof(arg[iarg+3]);
       p_flag[3] = p_flag[4] = p_flag[5] = 1;
       if (dimension == 2) {
         p_start[2] = p_stop[2] = p_period[2] = 0.0;
         p_flag[2] = 0;
         p_start[3] = p_stop[3] = p_period[3] = 0.0;
         p_flag[3] = 0;
         p_start[4] = p_stop[4] = p_period[4] = 0.0;
         p_flag[4] = 0;
       }
       iarg += 4;
     } else if (strcmp(arg[iarg],"x") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       p_start[0] = atof(arg[iarg+1]);
       p_stop[0] = atof(arg[iarg+2]);
       p_period[0] = atof(arg[iarg+3]);
       p_flag[0] = 1;
       deviatoric_flag = 1;
       iarg += 4;
     } else if (strcmp(arg[iarg],"y") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       p_start[1] = atof(arg[iarg+1]);
       p_stop[1] = atof(arg[iarg+2]);
       p_period[1] = atof(arg[iarg+3]);
       p_flag[1] = 1;
       deviatoric_flag = 1;
       iarg += 4;
     } else if (strcmp(arg[iarg],"z") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       p_start[2] = atof(arg[iarg+1]);
       p_stop[2] = atof(arg[iarg+2]);
       p_period[2] = atof(arg[iarg+3]);
       p_flag[2] = 1;
       deviatoric_flag = 1;
       iarg += 4;
       if (dimension == 2)
         error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation");
 
     } else if (strcmp(arg[iarg],"yz") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       p_start[3] = atof(arg[iarg+1]);
       p_stop[3] = atof(arg[iarg+2]);
       p_period[3] = atof(arg[iarg+3]);
       p_flag[3] = 1;
       deviatoric_flag = 1;
       scaleyz = 0;
       iarg += 4;
       if (dimension == 2)
         error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation");
     } else if (strcmp(arg[iarg],"xz") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       p_start[4] = atof(arg[iarg+1]);
       p_stop[4] = atof(arg[iarg+2]);
       p_period[4] = atof(arg[iarg+3]);
       p_flag[4] = 1;
       deviatoric_flag = 1;
       scalexz = 0;
       iarg += 4;
       if (dimension == 2)
         error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation");
     } else if (strcmp(arg[iarg],"xy") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       p_start[5] = atof(arg[iarg+1]);
       p_stop[5] = atof(arg[iarg+2]);
       p_period[5] = atof(arg[iarg+3]);
       p_flag[5] = 1;
       deviatoric_flag = 1;
       scalexy = 0;
       iarg += 4;
 
     } else if (strcmp(arg[iarg],"couple") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       if (strcmp(arg[iarg+1],"xyz") == 0) pcouple = XYZ;
       else if (strcmp(arg[iarg+1],"xy") == 0) pcouple = XY;
       else if (strcmp(arg[iarg+1],"yz") == 0) pcouple = YZ;
       else if (strcmp(arg[iarg+1],"xz") == 0) pcouple = XZ;
       else if (strcmp(arg[iarg+1],"none") == 0) pcouple = NONE;
       else error->all(FLERR,"Illegal fix nvt/npt/nph command");
       iarg += 2;
 
     } else if (strcmp(arg[iarg],"drag") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       drag = atof(arg[iarg+1]);
       if (drag < 0.0) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"dilate") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       if (strcmp(arg[iarg+1],"all") == 0) allremap = 1;
       else {
         allremap = 0;
         delete [] id_dilate;
         int n = strlen(arg[iarg+1]) + 1;
         id_dilate = new char[n];
         strcpy(id_dilate,arg[iarg+1]);
         int idilate = group->find(id_dilate);
         if (idilate == -1)
           error->all(FLERR,"Fix nvt/npt/nph dilate group ID does not exist");
       }
       iarg += 2;
 
     } else if (strcmp(arg[iarg],"tchain") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       mtchain = atoi(arg[iarg+1]);
       // used by FixNVTSllod to preserve non-default value
       mtchain_default_flag = 0;
       if (mtchain < 1) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"pchain") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       mpchain = atoi(arg[iarg+1]);
       if (mpchain < 0) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"mtk") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       if (strcmp(arg[iarg+1],"yes") == 0) mtk_flag = 1;
       else if (strcmp(arg[iarg+1],"no") == 0) mtk_flag = 0;
       else error->all(FLERR,"Illegal fix nvt/npt/nph command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"tloop") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       nc_tchain = atoi(arg[iarg+1]);
       if (nc_tchain < 0) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"ploop") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       nc_pchain = atoi(arg[iarg+1]);
       if (nc_pchain < 0) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"nreset") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       nreset_h0 = atoi(arg[iarg+1]);
       if (nreset_h0 < 0) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"scalexy") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       if (strcmp(arg[iarg+1],"yes") == 0) scalexy = 1;
       else if (strcmp(arg[iarg+1],"no") == 0) scalexy = 0;
       else error->all(FLERR,"Illegal fix nvt/npt/nph command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"scalexz") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       if (strcmp(arg[iarg+1],"yes") == 0) scalexz = 1;
       else if (strcmp(arg[iarg+1],"no") == 0) scalexz = 0;
       else error->all(FLERR,"Illegal fix nvt/npt/nph command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"scaleyz") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       if (strcmp(arg[iarg+1],"yes") == 0) scaleyz = 1;
       else if (strcmp(arg[iarg+1],"no") == 0) scaleyz = 0;
       else error->all(FLERR,"Illegal fix nvt/npt/nph command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"flip") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       if (strcmp(arg[iarg+1],"yes") == 0) flipflag = 1;
       else if (strcmp(arg[iarg+1],"no") == 0) flipflag = 0;
       else error->all(FLERR,"Illegal fix nvt/npt/nph command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"fixedpoint") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       fixedpoint[0] = atof(arg[iarg+1]);
       fixedpoint[1] = atof(arg[iarg+2]);
       fixedpoint[2] = atof(arg[iarg+3]);
       iarg += 4;
     } else error->all(FLERR,"Illegal fix nvt/npt/nph command");
   }
 
   // error checks
 
   if (dimension == 2 && (p_flag[2] || p_flag[3] || p_flag[4]))
     error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation");
   if (dimension == 2 && (pcouple == YZ || pcouple == XZ))
     error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation");
   if (dimension == 2 && (scalexz == 1 || scaleyz == 1 ))
     error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation");
 
   if (pcouple == XYZ && (p_flag[0] == 0 || p_flag[1] == 0))
     error->all(FLERR,"Invalid fix nvt/npt/nph command pressure settings");
   if (pcouple == XYZ && dimension == 3 && p_flag[2] == 0)
     error->all(FLERR,"Invalid fix nvt/npt/nph command pressure settings");
   if (pcouple == XY && (p_flag[0] == 0 || p_flag[1] == 0))
     error->all(FLERR,"Invalid fix nvt/npt/nph command pressure settings");
   if (pcouple == YZ && (p_flag[1] == 0 || p_flag[2] == 0))
     error->all(FLERR,"Invalid fix nvt/npt/nph command pressure settings");
   if (pcouple == XZ && (p_flag[0] == 0 || p_flag[2] == 0))
     error->all(FLERR,"Invalid fix nvt/npt/nph command pressure settings");
 
   // require periodicity in tensile dimension
 
   if (p_flag[0] && domain->xperiodic == 0)
     error->all(FLERR,"Cannot use fix nvt/npt/nph on a non-periodic dimension");
   if (p_flag[1] && domain->yperiodic == 0)
     error->all(FLERR,"Cannot use fix nvt/npt/nph on a non-periodic dimension");
   if (p_flag[2] && domain->zperiodic == 0)
     error->all(FLERR,"Cannot use fix nvt/npt/nph on a non-periodic dimension");
 
   // require periodicity in 2nd dim of off-diagonal tilt component
 
   if (p_flag[3] && domain->zperiodic == 0)
     error->all(FLERR,
                "Cannot use fix nvt/npt/nph on a 2nd non-periodic dimension");
   if (p_flag[4] && domain->zperiodic == 0)
     error->all(FLERR,
                "Cannot use fix nvt/npt/nph on a 2nd non-periodic dimension");
   if (p_flag[5] && domain->yperiodic == 0)
     error->all(FLERR,
                "Cannot use fix nvt/npt/nph on a 2nd non-periodic dimension");
 
   if (scaleyz == 1 && domain->zperiodic == 0)
     error->all(FLERR,"Cannot use fix nvt/npt/nph "
                "with yz scaling when z is non-periodic dimension");
   if (scalexz == 1 && domain->zperiodic == 0)
     error->all(FLERR,"Cannot use fix nvt/npt/nph "
                "with xz scaling when z is non-periodic dimension");
   if (scalexy == 1 && domain->yperiodic == 0)
     error->all(FLERR,"Cannot use fix nvt/npt/nph "
                "with xy scaling when y is non-periodic dimension");
 
   if (p_flag[3] && scaleyz == 1)
     error->all(FLERR,"Cannot use fix nvt/npt/nph with "
                "both yz dynamics and yz scaling");
   if (p_flag[4] && scalexz == 1)
     error->all(FLERR,"Cannot use fix nvt/npt/nph with "
                "both xz dynamics and xz scaling");
   if (p_flag[5] && scalexy == 1)
     error->all(FLERR,"Cannot use fix nvt/npt/nph with "
                "both xy dynamics and xy scaling");
 
   if (!domain->triclinic && (p_flag[3] || p_flag[4] || p_flag[5]))
     error->all(FLERR,"Can not specify Pxy/Pxz/Pyz in "
                "fix nvt/npt/nph with non-triclinic box");
 
   if (pcouple == XYZ && dimension == 3 &&
       (p_start[0] != p_start[1] || p_start[0] != p_start[2] ||
        p_stop[0] != p_stop[1] || p_stop[0] != p_stop[2] ||
        p_period[0] != p_period[1] || p_period[0] != p_period[2]))
     error->all(FLERR,"Invalid fix nvt/npt/nph pressure settings");
   if (pcouple == XYZ && dimension == 2 &&
       (p_start[0] != p_start[1] || p_stop[0] != p_stop[1] ||
        p_period[0] != p_period[1]))
     error->all(FLERR,"Invalid fix nvt/npt/nph pressure settings");
   if (pcouple == XY &&
       (p_start[0] != p_start[1] || p_stop[0] != p_stop[1] ||
        p_period[0] != p_period[1]))
     error->all(FLERR,"Invalid fix nvt/npt/nph pressure settings");
   if (pcouple == YZ &&
       (p_start[1] != p_start[2] || p_stop[1] != p_stop[2] ||
        p_period[1] != p_period[2]))
     error->all(FLERR,"Invalid fix nvt/npt/nph pressure settings");
   if (pcouple == XZ &&
       (p_start[0] != p_start[2] || p_stop[0] != p_stop[2] ||
        p_period[0] != p_period[2]))
     error->all(FLERR,"Invalid fix nvt/npt/nph pressure settings");
 
   if ((tstat_flag && t_period <= 0.0) ||
       (p_flag[0] && p_period[0] <= 0.0) ||
       (p_flag[1] && p_period[1] <= 0.0) ||
       (p_flag[2] && p_period[2] <= 0.0) ||
       (p_flag[3] && p_period[3] <= 0.0) ||
       (p_flag[4] && p_period[4] <= 0.0) ||
       (p_flag[5] && p_period[5] <= 0.0))
     error->all(FLERR,"Fix nvt/npt/nph damping parameters must be > 0.0");
 
   // set pstat_flag and box change and restart_pbc variables
 
   pstat_flag = 0;
   for (int i = 0; i < 6; i++)
     if (p_flag[i]) pstat_flag = 1;
 
   if (pstat_flag) {
     box_change = 1;
     if (p_flag[0] || p_flag[1] || p_flag[2]) box_change_size = 1;
     if (p_flag[3] || p_flag[4] || p_flag[5]) box_change_shape = 1;
     no_change_box = 1;
     if (allremap == 0) restart_pbc = 1;
   }
 
   // pstyle = TRICLINIC if any off-diagonal term is controlled -> 6 dof
   // else pstyle = ISO if XYZ coupling or XY coupling in 2d -> 1 dof
   // else pstyle = ANISO -> 3 dof
 
   if (p_flag[3] || p_flag[4] || p_flag[5]) pstyle = TRICLINIC;
   else if (pcouple == XYZ || (dimension == 2 && pcouple == XY)) pstyle = ISO;
   else pstyle = ANISO;
 
   // pre_exchange only required if flips can occur due to shape changes
 
   pre_exchange_flag = 0;
   if (flipflag && (p_flag[3] || p_flag[4] || p_flag[5])) pre_exchange_flag = 1;
   if (flipflag && (domain->yz != 0.0 || domain->xz != 0.0 || domain->xy != 0.0))
     pre_exchange_flag = 1;
 
   // convert input periods to frequencies
 
   t_freq = 0.0;
   p_freq[0] = p_freq[1] = p_freq[2] = p_freq[3] = p_freq[4] = p_freq[5] = 0.0;
 
   if (tstat_flag) t_freq = 1.0 / t_period;
   if (p_flag[0]) p_freq[0] = 1.0 / p_period[0];
   if (p_flag[1]) p_freq[1] = 1.0 / p_period[1];
   if (p_flag[2]) p_freq[2] = 1.0 / p_period[2];
   if (p_flag[3]) p_freq[3] = 1.0 / p_period[3];
   if (p_flag[4]) p_freq[4] = 1.0 / p_period[4];
   if (p_flag[5]) p_freq[5] = 1.0 / p_period[5];
 
   // Nose/Hoover temp and pressure init
 
   size_vector = 0;
 
   if (tstat_flag) {
     int ich;
     eta = new double[mtchain];
 
     // add one extra dummy thermostat, set to zero
 
     eta_dot = new double[mtchain+1];
     eta_dot[mtchain] = 0.0;
     eta_dotdot = new double[mtchain];
     for (ich = 0; ich < mtchain; ich++) {
       eta[ich] = eta_dot[ich] = eta_dotdot[ich] = 0.0;
     }
     eta_mass = new double[mtchain];
     size_vector += 2*2*mtchain;
   }
 
   if (pstat_flag) {
     omega[0] = omega[1] = omega[2] = 0.0;
     omega_dot[0] = omega_dot[1] = omega_dot[2] = 0.0;
     omega_mass[0] = omega_mass[1] = omega_mass[2] = 0.0;
     omega[3] = omega[4] = omega[5] = 0.0;
     omega_dot[3] = omega_dot[4] = omega_dot[5] = 0.0;
     omega_mass[3] = omega_mass[4] = omega_mass[5] = 0.0;
     if (pstyle == ISO) size_vector += 2*2*1;
     else if (pstyle == ANISO) size_vector += 2*2*3;
     else if (pstyle == TRICLINIC) size_vector += 2*2*6;
 
     if (mpchain) {
       int ich;
       etap = new double[mpchain];
 
       // add one extra dummy thermostat, set to zero
 
       etap_dot = new double[mpchain+1];
       etap_dot[mpchain] = 0.0;
       etap_dotdot = new double[mpchain];
       for (ich = 0; ich < mpchain; ich++) {
         etap[ich] = etap_dot[ich] =
           etap_dotdot[ich] = 0.0;
       }
       etap_mass = new double[mpchain];
       size_vector += 2*2*mpchain;
     }
 
     if (deviatoric_flag) size_vector += 1;
   }
 
   nrigid = 0;
   rfix = NULL;
 
   if (pre_exchange_flag) irregular = new Irregular(lmp);
   else irregular = NULL;
 
   // initialize vol0,t0 to zero to signal uninitialized
   // values then assigned in init(), if necessary
 
   vol0 = t0 = 0.0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 FixNH::~FixNH()
 {
   delete [] id_dilate;
   delete [] rfix;
 
   delete irregular;
 
   // delete temperature and pressure if fix created them
 
   if (tflag) modify->delete_compute(id_temp);
   delete [] id_temp;
 
   if (tstat_flag) {
     delete [] eta;
     delete [] eta_dot;
     delete [] eta_dotdot;
     delete [] eta_mass;
   }
 
   if (pstat_flag) {
     if (pflag) modify->delete_compute(id_press);
     delete [] id_press;
     if (mpchain) {
       delete [] etap;
       delete [] etap_dot;
       delete [] etap_dotdot;
       delete [] etap_mass;
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixNH::setmask()
 {
   int mask = 0;
   mask |= INITIAL_INTEGRATE;
   mask |= FINAL_INTEGRATE;
   mask |= THERMO_ENERGY;
   mask |= INITIAL_INTEGRATE_RESPA;
   mask |= FINAL_INTEGRATE_RESPA;
   if (pre_exchange_flag) mask |= PRE_EXCHANGE;
   return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixNH::init()
 {
   // recheck that dilate group has not been deleted
 
   if (allremap == 0) {
     int idilate = group->find(id_dilate);
     if (idilate == -1)
       error->all(FLERR,"Fix nvt/npt/nph dilate group ID does not exist");
     dilate_group_bit = group->bitmask[idilate];
   }
 
   // ensure no conflict with fix deform
 
   if (pstat_flag)
     for (int i = 0; i < modify->nfix; i++)
       if (strcmp(modify->fix[i]->style,"deform") == 0) {
         int *dimflag = ((FixDeform *) modify->fix[i])->dimflag;
         if ((p_flag[0] && dimflag[0]) || (p_flag[1] && dimflag[1]) ||
             (p_flag[2] && dimflag[2]) || (p_flag[3] && dimflag[3]) ||
             (p_flag[4] && dimflag[4]) || (p_flag[5] && dimflag[5]))
           error->all(FLERR,"Cannot use fix npt and fix deform on "
                      "same component of stress tensor");
       }
 
   // set temperature and pressure ptrs
 
   int icompute = modify->find_compute(id_temp);
   if (icompute < 0)
-    error->all(FLERR,"Temperature ID for fix nvt/nph/npt does not exist");
+    error->all(FLERR,"Temperature ID for fix nvt/npt does not exist");
   temperature = modify->compute[icompute];
 
   if (temperature->tempbias) which = BIAS;
   else which = NOBIAS;
 
   if (pstat_flag) {
     icompute = modify->find_compute(id_press);
     if (icompute < 0)
       error->all(FLERR,"Pressure ID for fix npt/nph does not exist");
     pressure = modify->compute[icompute];
   }
 
   // set timesteps and frequencies
 
   dtv = update->dt;
   dtf = 0.5 * update->dt * force->ftm2v;
   dthalf = 0.5 * update->dt;
   dt4 = 0.25 * update->dt;
   dt8 = 0.125 * update->dt;
   dto = dthalf;
 
   p_freq_max = 0.0;
   if (pstat_flag) {
     p_freq_max = MAX(p_freq[0],p_freq[1]);
     p_freq_max = MAX(p_freq_max,p_freq[2]);
     if (pstyle == TRICLINIC) {
       p_freq_max = MAX(p_freq_max,p_freq[3]);
       p_freq_max = MAX(p_freq_max,p_freq[4]);
       p_freq_max = MAX(p_freq_max,p_freq[5]);
     }
     pdrag_factor = 1.0 - (update->dt * p_freq_max * drag / nc_pchain);
   }
 
   if (tstat_flag)
     tdrag_factor = 1.0 - (update->dt * t_freq * drag / nc_tchain);
 
   // tally the number of dimensions that are barostatted
   // set initial volume and reference cell, if not already done
 
   if (pstat_flag) {
     pdim = p_flag[0] + p_flag[1] + p_flag[2];
     if (vol0 == 0.0) {
       if (dimension == 3) vol0 = domain->xprd * domain->yprd * domain->zprd;
       else vol0 = domain->xprd * domain->yprd;
       h0_inv[0] = domain->h_inv[0];
       h0_inv[1] = domain->h_inv[1];
       h0_inv[2] = domain->h_inv[2];
       h0_inv[3] = domain->h_inv[3];
       h0_inv[4] = domain->h_inv[4];
       h0_inv[5] = domain->h_inv[5];
     }
   }
 
   boltz = force->boltz;
   nktv2p = force->nktv2p;
 
   if (force->kspace) kspace_flag = 1;
   else kspace_flag = 0;
 
   if (strstr(update->integrate_style,"respa")) {
     nlevels_respa = ((Respa *) update->integrate)->nlevels;
     step_respa = ((Respa *) update->integrate)->step;
     dto = 0.5*step_respa[0];
   }
 
   // detect if any rigid fixes exist so rigid bodies move when box is remapped
   // rfix[] = indices to each fix rigid
 
   delete [] rfix;
   nrigid = 0;
   rfix = NULL;
 
   for (int i = 0; i < modify->nfix; i++)
     if (modify->fix[i]->rigid_flag) nrigid++;
   if (nrigid) {
     rfix = new int[nrigid];
     nrigid = 0;
     for (int i = 0; i < modify->nfix; i++)
       if (modify->fix[i]->rigid_flag) rfix[nrigid++] = i;
   }
 }
 
 /* ----------------------------------------------------------------------
    compute T,P before integrator starts
 ------------------------------------------------------------------------- */
 
 void FixNH::setup(int vflag)
 {
   // initialize some quantities that were not available earlier
 
   tdof = temperature->dof;
 
   // t_target is needed by NPH and NPT in compute_scalar()
   // If no thermostat or using fix nphug,
   // t_target must be defined by other means.
 
   if (tstat_flag && strcmp(style,"nphug") != 0) {
     compute_temp_target();
   } else if (pstat_flag) {
 
     // t0 = reference temperature for masses
     // cannot be done in init() b/c temperature cannot be called there
     // is b/c Modify::init() inits computes after fixes due to dof dependence
     // guesstimate a unit-dependent t0 if actual T = 0.0
     // if it was read in from a restart file, leave it be
 
     if (t0 == 0.0) {
       t0 = temperature->compute_scalar();
       if (t0 == 0.0) {
         if (strcmp(update->unit_style,"lj") == 0) t0 = 1.0;
         else t0 = 300.0;
       }
     }
     t_target = t0;
   }
 
   if (pstat_flag) compute_press_target();
 
   t_current = temperature->compute_scalar();
   if (pstat_flag) {
     if (pstyle == ISO) pressure->compute_scalar();
     else pressure->compute_vector();
     couple();
     pressure->addstep(update->ntimestep+1);
   }
 
   // masses and initial forces on thermostat variables
 
   if (tstat_flag) {
     eta_mass[0] = tdof * boltz * t_target / (t_freq*t_freq);
     for (int ich = 1; ich < mtchain; ich++)
       eta_mass[ich] = boltz * t_target / (t_freq*t_freq);
     for (int ich = 1; ich < mtchain; ich++) {
       eta_dotdot[ich] = (eta_mass[ich-1]*eta_dot[ich-1]*eta_dot[ich-1] -
                          boltz * t_target) / eta_mass[ich];
     }
   }
 
   // masses and initial forces on barostat variables
 
   if (pstat_flag) {
     double kt = boltz * t_target;
     double nkt = atom->natoms * kt;
 
     for (int i = 0; i < 3; i++)
       if (p_flag[i])
         omega_mass[i] = nkt/(p_freq[i]*p_freq[i]);
 
     if (pstyle == TRICLINIC) {
       for (int i = 3; i < 6; i++)
         if (p_flag[i]) omega_mass[i] = nkt/(p_freq[i]*p_freq[i]);
     }
 
   // masses and initial forces on barostat thermostat variables
 
     if (mpchain) {
       etap_mass[0] = boltz * t_target / (p_freq_max*p_freq_max);
       for (int ich = 1; ich < mpchain; ich++)
         etap_mass[ich] = boltz * t_target / (p_freq_max*p_freq_max);
       for (int ich = 1; ich < mpchain; ich++)
         etap_dotdot[ich] =
           (etap_mass[ich-1]*etap_dot[ich-1]*etap_dot[ich-1] -
            boltz * t_target) / etap_mass[ich];
     }
 
   }
 }
 
 /* ----------------------------------------------------------------------
    1st half of Verlet update
 ------------------------------------------------------------------------- */
 
 void FixNH::initial_integrate(int vflag)
 {
   // update eta_press_dot
 
   if (pstat_flag && mpchain) nhc_press_integrate();
 
   // update eta_dot
 
   if (tstat_flag) {
     compute_temp_target();
     nhc_temp_integrate();
   }
 
   // need to recompute pressure to account for change in KE
   // t_current is up-to-date, but compute_temperature is not
   // compute appropriately coupled elements of mvv_current
 
   if (pstat_flag) {
     if (pstyle == ISO) {
       temperature->compute_scalar();
       pressure->compute_scalar();
     } else {
       temperature->compute_vector();
       pressure->compute_vector();
     }
     couple();
     pressure->addstep(update->ntimestep+1);
   }
 
   if (pstat_flag) {
     compute_press_target();
     nh_omega_dot();
     nh_v_press();
   }
 
   nve_v();
 
   // remap simulation box by 1/2 step
 
   if (pstat_flag) remap();
 
   nve_x();
 
   // remap simulation box by 1/2 step
   // redo KSpace coeffs since volume has changed
 
   if (pstat_flag) {
     remap();
     if (kspace_flag) force->kspace->setup();
   }
 }
 
 /* ----------------------------------------------------------------------
    2nd half of Verlet update
 ------------------------------------------------------------------------- */
 
 void FixNH::final_integrate()
 {
   nve_v();
 
   if (pstat_flag) nh_v_press();
 
   // compute new T,P
   // compute appropriately coupled elements of mvv_current
 
   t_current = temperature->compute_scalar();
   if (pstat_flag) {
     if (pstyle == ISO) pressure->compute_scalar();
     else pressure->compute_vector();
     couple();
     pressure->addstep(update->ntimestep+1);
   }
 
   if (pstat_flag) nh_omega_dot();
 
   // update eta_dot
   // update eta_press_dot
 
   if (tstat_flag) nhc_temp_integrate();
   if (pstat_flag && mpchain) nhc_press_integrate();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixNH::initial_integrate_respa(int vflag, int ilevel, int iloop)
 {
   // set timesteps by level
 
   dtv = step_respa[ilevel];
   dtf = 0.5 * step_respa[ilevel] * force->ftm2v;
   dthalf = 0.5 * step_respa[ilevel];
 
   // outermost level - update eta_dot and omega_dot, apply to v
   // all other levels - NVE update of v
   // x,v updates only performed for atoms in group
 
   if (ilevel == nlevels_respa-1) {
 
     // update eta_press_dot
 
     if (pstat_flag && mpchain) nhc_press_integrate();
 
     // update eta_dot
 
     if (tstat_flag) {
       compute_temp_target();
       nhc_temp_integrate();
     }
 
     // recompute pressure to account for change in KE
     // t_current is up-to-date, but compute_temperature is not
     // compute appropriately coupled elements of mvv_current
 
     if (pstat_flag) {
       if (pstyle == ISO) {
         temperature->compute_scalar();
         pressure->compute_scalar();
       } else {
                temperature->compute_vector();
         pressure->compute_vector();
       }
       couple();
       pressure->addstep(update->ntimestep+1);
     }
 
     if (pstat_flag) {
       compute_press_target();
       nh_omega_dot();
       nh_v_press();
     }
 
     nve_v();
 
   } else nve_v();
 
   // innermost level - also update x only for atoms in group
   // if barostat, perform 1/2 step remap before and after
 
   if (ilevel == 0) {
     if (pstat_flag) remap();
     nve_x();
     if (pstat_flag) remap();
   }
 
   // if barostat, redo KSpace coeffs at outermost level,
   // since volume has changed
 
   if (ilevel == nlevels_respa-1 && kspace_flag && pstat_flag)
     force->kspace->setup();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixNH::final_integrate_respa(int ilevel, int iloop)
 {
   // set timesteps by level
 
   dtf = 0.5 * step_respa[ilevel] * force->ftm2v;
   dthalf = 0.5 * step_respa[ilevel];
 
   // outermost level - update eta_dot and omega_dot, apply via final_integrate
   // all other levels - NVE update of v
 
   if (ilevel == nlevels_respa-1) final_integrate();
   else nve_v();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixNH::couple()
 {
   double *tensor = pressure->vector;
 
   if (pstyle == ISO)
     p_current[0] = p_current[1] = p_current[2] = pressure->scalar;
   else if (pcouple == XYZ) {
     double ave = 1.0/3.0 * (tensor[0] + tensor[1] + tensor[2]);
     p_current[0] = p_current[1] = p_current[2] = ave;
   } else if (pcouple == XY) {
     double ave = 0.5 * (tensor[0] + tensor[1]);
     p_current[0] = p_current[1] = ave;
     p_current[2] = tensor[2];
   } else if (pcouple == YZ) {
     double ave = 0.5 * (tensor[1] + tensor[2]);
     p_current[1] = p_current[2] = ave;
     p_current[0] = tensor[0];
   } else if (pcouple == XZ) {
     double ave = 0.5 * (tensor[0] + tensor[2]);
     p_current[0] = p_current[2] = ave;
     p_current[1] = tensor[1];
   } else {
     p_current[0] = tensor[0];
     p_current[1] = tensor[1];
     p_current[2] = tensor[2];
   }
 
   // switch order from xy-xz-yz to Voigt
 
   if (pstyle == TRICLINIC) {
     p_current[3] = tensor[5];
     p_current[4] = tensor[4];
     p_current[5] = tensor[3];
   }
 }
 
 /* ----------------------------------------------------------------------
    change box size
    remap all atoms or dilate group atoms depending on allremap flag
    if rigid bodies exist, scale rigid body centers-of-mass
 ------------------------------------------------------------------------- */
 
 void FixNH::remap()
 {
   int i;
   double oldlo,oldhi;
   double expfac;
 
   double **x = atom->x;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
   double *h = domain->h;
 
   // omega is not used, except for book-keeping
 
   for (int i = 0; i < 6; i++) omega[i] += dto*omega_dot[i];
 
   // convert pertinent atoms and rigid bodies to lamda coords
 
   if (allremap) domain->x2lamda(nlocal);
   else {
     for (i = 0; i < nlocal; i++)
       if (mask[i] & dilate_group_bit)
         domain->x2lamda(x[i],x[i]);
   }
 
   if (nrigid)
     for (i = 0; i < nrigid; i++)
       modify->fix[rfix[i]]->deform(0);
 
   // reset global and local box to new size/shape
 
   // this operation corresponds to applying the
   // translate and scale operations
   // corresponding to the solution of the following ODE:
   //
   // h_dot = omega_dot * h
   //
   // where h_dot, omega_dot and h are all upper-triangular
   // 3x3 tensors. In Voigt notation, the elements of the
   // RHS product tensor are:
   // h_dot = [0*0, 1*1, 2*2, 1*3+3*2, 0*4+5*3+4*2, 0*5+5*1]
   //
   // Ordering of operations preserves time symmetry.
 
   double dto2 = dto/2.0;
   double dto4 = dto/4.0;
   double dto8 = dto/8.0;
 
   // off-diagonal components, first half
 
   if (pstyle == TRICLINIC) {
 
     if (p_flag[4]) {
       expfac = exp(dto8*omega_dot[0]);
       h[4] *= expfac;
       h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]);
       h[4] *= expfac;
     }
 
     if (p_flag[3]) {
       expfac = exp(dto4*omega_dot[1]);
       h[3] *= expfac;
       h[3] += dto2*(omega_dot[3]*h[2]);
       h[3] *= expfac;
     }
 
     if (p_flag[5]) {
       expfac = exp(dto4*omega_dot[0]);
       h[5] *= expfac;
       h[5] += dto2*(omega_dot[5]*h[1]);
       h[5] *= expfac;
     }
 
     if (p_flag[4]) {
       expfac = exp(dto8*omega_dot[0]);
       h[4] *= expfac;
       h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]);
       h[4] *= expfac;
     }
   }
 
   // scale diagonal components
   // scale tilt factors with cell, if set
 
   if (p_flag[0]) {
     oldlo = domain->boxlo[0];
     oldhi = domain->boxhi[0];
     expfac = exp(dto*omega_dot[0]);
     domain->boxlo[0] = (oldlo-fixedpoint[0])*expfac + fixedpoint[0];
     domain->boxhi[0] = (oldhi-fixedpoint[0])*expfac + fixedpoint[0];
   }
 
   if (p_flag[1]) {
     oldlo = domain->boxlo[1];
     oldhi = domain->boxhi[1];
     expfac = exp(dto*omega_dot[1]);
     domain->boxlo[1] = (oldlo-fixedpoint[1])*expfac + fixedpoint[1];
     domain->boxhi[1] = (oldhi-fixedpoint[1])*expfac + fixedpoint[1];
     if (scalexy) h[5] *= expfac;
   }
 
   if (p_flag[2]) {
     oldlo = domain->boxlo[2];
     oldhi = domain->boxhi[2];
     expfac = exp(dto*omega_dot[2]);
     domain->boxlo[2] = (oldlo-fixedpoint[2])*expfac + fixedpoint[2];
     domain->boxhi[2] = (oldhi-fixedpoint[2])*expfac + fixedpoint[2];
     if (scalexz) h[4] *= expfac;
     if (scaleyz) h[3] *= expfac;
   }
 
   // off-diagonal components, second half
 
   if (pstyle == TRICLINIC) {
 
     if (p_flag[4]) {
       expfac = exp(dto8*omega_dot[0]);
       h[4] *= expfac;
       h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]);
       h[4] *= expfac;
     }
 
     if (p_flag[3]) {
       expfac = exp(dto4*omega_dot[1]);
       h[3] *= expfac;
       h[3] += dto2*(omega_dot[3]*h[2]);
       h[3] *= expfac;
     }
 
     if (p_flag[5]) {
       expfac = exp(dto4*omega_dot[0]);
       h[5] *= expfac;
       h[5] += dto2*(omega_dot[5]*h[1]);
       h[5] *= expfac;
     }
 
     if (p_flag[4]) {
       expfac = exp(dto8*omega_dot[0]);
       h[4] *= expfac;
       h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]);
       h[4] *= expfac;
     }
 
   }
 
   domain->yz = h[3];
   domain->xz = h[4];
   domain->xy = h[5];
 
   // tilt factor to cell length ratio can not exceed TILTMAX in one step
 
   if (domain->yz < -TILTMAX*domain->yprd ||
       domain->yz > TILTMAX*domain->yprd ||
       domain->xz < -TILTMAX*domain->xprd ||
       domain->xz > TILTMAX*domain->xprd ||
       domain->xy < -TILTMAX*domain->xprd ||
       domain->xy > TILTMAX*domain->xprd)
     error->all(FLERR,"Fix npt/nph has tilted box too far in one step - "
                "periodic cell is too far from equilibrium state");
 
   domain->set_global_box();
   domain->set_local_box();
 
   // convert pertinent atoms and rigid bodies back to box coords
 
   if (allremap) domain->lamda2x(nlocal);
   else {
     for (i = 0; i < nlocal; i++)
       if (mask[i] & dilate_group_bit)
         domain->lamda2x(x[i],x[i]);
   }
 
   if (nrigid)
     for (i = 0; i < nrigid; i++)
       modify->fix[rfix[i]]->deform(1);
 }
 
 /* ----------------------------------------------------------------------
    pack entire state of Fix into one write
 ------------------------------------------------------------------------- */
 
 void FixNH::write_restart(FILE *fp)
 {
   int nsize = size_restart_global();
 
   double *list;
   memory->create(list,nsize,"nh:list");
 
   int n = pack_restart_data(list);
 
   if (comm->me == 0) {
     int size = nsize * sizeof(double);
     fwrite(&size,sizeof(int),1,fp);
     fwrite(list,sizeof(double),nsize,fp);
   }
 
   memory->destroy(list);
 }
 
 /* ----------------------------------------------------------------------
     calculate the number of data to be packed
 ------------------------------------------------------------------------- */
 
 int FixNH::size_restart_global()
 {
   int nsize = 2;
   if (tstat_flag) nsize += 1 + 2*mtchain;
   if (pstat_flag) {
     nsize += 16 + 2*mpchain;
     if (deviatoric_flag) nsize += 6;
   }
 
   return nsize;
 }
 
 /* ----------------------------------------------------------------------
    pack restart data
 ------------------------------------------------------------------------- */
 
 int FixNH::pack_restart_data(double *list)
 {
   int n = 0;
 
   list[n++] = tstat_flag;
   if (tstat_flag) {
     list[n++] = mtchain;
     for (int ich = 0; ich < mtchain; ich++)
       list[n++] = eta[ich];
     for (int ich = 0; ich < mtchain; ich++)
       list[n++] = eta_dot[ich];
   }
 
   list[n++] = pstat_flag;
   if (pstat_flag) {
     list[n++] = omega[0];
     list[n++] = omega[1];
     list[n++] = omega[2];
     list[n++] = omega[3];
     list[n++] = omega[4];
     list[n++] = omega[5];
     list[n++] = omega_dot[0];
     list[n++] = omega_dot[1];
     list[n++] = omega_dot[2];
     list[n++] = omega_dot[3];
     list[n++] = omega_dot[4];
     list[n++] = omega_dot[5];
     list[n++] = vol0;
     list[n++] = t0;
     list[n++] = mpchain;
     if (mpchain) {
       for (int ich = 0; ich < mpchain; ich++)
         list[n++] = etap[ich];
       for (int ich = 0; ich < mpchain; ich++)
         list[n++] = etap_dot[ich];
     }
 
     list[n++] = deviatoric_flag;
     if (deviatoric_flag) {
       list[n++] = h0_inv[0];
       list[n++] = h0_inv[1];
       list[n++] = h0_inv[2];
       list[n++] = h0_inv[3];
       list[n++] = h0_inv[4];
       list[n++] = h0_inv[5];
     }
   }
 
   return n;
 }
 
 /* ----------------------------------------------------------------------
    use state info from restart file to restart the Fix
 ------------------------------------------------------------------------- */
 
 void FixNH::restart(char *buf)
 {
   int n = 0;
   double *list = (double *) buf;
   int flag = static_cast<int> (list[n++]);
   if (flag) {
     int m = static_cast<int> (list[n++]);
     if (tstat_flag && m == mtchain) {
       for (int ich = 0; ich < mtchain; ich++)
         eta[ich] = list[n++];
       for (int ich = 0; ich < mtchain; ich++)
         eta_dot[ich] = list[n++];
     } else n += 2*m;
   }
   flag = static_cast<int> (list[n++]);
   if (flag) {
     omega[0] = list[n++];
     omega[1] = list[n++];
     omega[2] = list[n++];
     omega[3] = list[n++];
     omega[4] = list[n++];
     omega[5] = list[n++];
     omega_dot[0] = list[n++];
     omega_dot[1] = list[n++];
     omega_dot[2] = list[n++];
     omega_dot[3] = list[n++];
     omega_dot[4] = list[n++];
     omega_dot[5] = list[n++];
     vol0 = list[n++];
     t0 = list[n++];
     int m = static_cast<int> (list[n++]);
     if (pstat_flag && m == mpchain) {
       for (int ich = 0; ich < mpchain; ich++)
         etap[ich] = list[n++];
       for (int ich = 0; ich < mpchain; ich++)
         etap_dot[ich] = list[n++];
     } else n+=2*m;
     flag = static_cast<int> (list[n++]);
     if (flag) {
       h0_inv[0] = list[n++];
       h0_inv[1] = list[n++];
       h0_inv[2] = list[n++];
       h0_inv[3] = list[n++];
       h0_inv[4] = list[n++];
       h0_inv[5] = list[n++];
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixNH::modify_param(int narg, char **arg)
 {
   if (strcmp(arg[0],"temp") == 0) {
     if (narg < 2) error->all(FLERR,"Illegal fix_modify command");
     if (tflag) {
       modify->delete_compute(id_temp);
       tflag = 0;
     }
     delete [] id_temp;
     int n = strlen(arg[1]) + 1;
     id_temp = new char[n];
     strcpy(id_temp,arg[1]);
 
     int icompute = modify->find_compute(arg[1]);
     if (icompute < 0)
       error->all(FLERR,"Could not find fix_modify temperature ID");
     temperature = modify->compute[icompute];
 
     if (temperature->tempflag == 0)
       error->all(FLERR,
                  "Fix_modify temperature ID does not compute temperature");
     if (temperature->igroup != 0 && comm->me == 0)
       error->warning(FLERR,"Temperature for fix modify is not for group all");
 
     // reset id_temp of pressure to new temperature ID
 
     if (pstat_flag) {
       icompute = modify->find_compute(id_press);
       if (icompute < 0)
         error->all(FLERR,"Pressure ID for fix modify does not exist");
       modify->compute[icompute]->reset_extra_compute_fix(id_temp);
     }
 
     return 2;
 
   } else if (strcmp(arg[0],"press") == 0) {
     if (narg < 2) error->all(FLERR,"Illegal fix_modify command");
     if (!pstat_flag) error->all(FLERR,"Illegal fix_modify command");
     if (pflag) {
       modify->delete_compute(id_press);
       pflag = 0;
     }
     delete [] id_press;
     int n = strlen(arg[1]) + 1;
     id_press = new char[n];
     strcpy(id_press,arg[1]);
 
     int icompute = modify->find_compute(arg[1]);
     if (icompute < 0) error->all(FLERR,"Could not find fix_modify pressure ID");
     pressure = modify->compute[icompute];
 
     if (pressure->pressflag == 0)
       error->all(FLERR,"Fix_modify pressure ID does not compute pressure");
     return 2;
   }
 
   return 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double FixNH::compute_scalar()
 {
   int i;
   double volume;
   double energy;
   double kt = boltz * t_target;
   double lkt_press = kt;
   int ich;
   if (dimension == 3) volume = domain->xprd * domain->yprd * domain->zprd;
   else volume = domain->xprd * domain->yprd;
 
   energy = 0.0;
 
   // thermostat chain energy is equivalent to Eq. (2) in
   // Martyna, Tuckerman, Tobias, Klein, Mol Phys, 87, 1117
   // Sum(0.5*p_eta_k^2/Q_k,k=1,M) + L*k*T*eta_1 + Sum(k*T*eta_k,k=2,M),
   // where L = tdof
   //       M = mtchain
   //       p_eta_k = Q_k*eta_dot[k-1]
   //       Q_1 = L*k*T/t_freq^2
   //       Q_k = k*T/t_freq^2, k > 1
 
   if (tstat_flag) {
     energy += ke_target * eta[0] + 0.5*eta_mass[0]*eta_dot[0]*eta_dot[0];
     for (ich = 1; ich < mtchain; ich++)
       energy += kt * eta[ich] + 0.5*eta_mass[ich]*eta_dot[ich]*eta_dot[ich];
   }
 
   // barostat energy is equivalent to Eq. (8) in
   // Martyna, Tuckerman, Tobias, Klein, Mol Phys, 87, 1117
   // Sum(0.5*p_omega^2/W + P*V),
   // where N = natoms
   //       p_omega = W*omega_dot
   //       W = N*k*T/p_freq^2
   //       sum is over barostatted dimensions
 
   if (pstat_flag) {
     for (i = 0; i < 3; i++)
       if (p_flag[i])
         energy += 0.5*omega_dot[i]*omega_dot[i]*omega_mass[i] +
           p_hydro*(volume-vol0) / (pdim*nktv2p);
 
     if (pstyle == TRICLINIC) {
       for (i = 3; i < 6; i++)
         if (p_flag[i])
           energy += 0.5*omega_dot[i]*omega_dot[i]*omega_mass[i];
     }
 
     // extra contributions from thermostat chain for barostat
 
     if (mpchain) {
       energy += lkt_press * etap[0] + 0.5*etap_mass[0]*etap_dot[0]*etap_dot[0];
       for (ich = 1; ich < mpchain; ich++)
         energy += kt * etap[ich] +
           0.5*etap_mass[ich]*etap_dot[ich]*etap_dot[ich];
     }
 
     // extra contribution from strain energy
 
     if (deviatoric_flag) energy += compute_strain_energy();
   }
 
   return energy;
 }
 
 /* ----------------------------------------------------------------------
    return a single element of the following vectors, in this order:
       eta[tchain], eta_dot[tchain], omega[ndof], omega_dot[ndof]
       etap[pchain], etap_dot[pchain], PE_eta[tchain], KE_eta_dot[tchain]
       PE_omega[ndof], KE_omega_dot[ndof], PE_etap[pchain], KE_etap_dot[pchain]
       PE_strain[1]
   if no thermostat exists, related quantities are omitted from the list
   if no barostat exists, related quantities are omitted from the list
   ndof = 1,3,6 degrees of freedom for pstyle = ISO,ANISO,TRI
 ------------------------------------------------------------------------- */
 
 double FixNH::compute_vector(int n)
 {
   int ilen;
 
   if (tstat_flag) {
     ilen = mtchain;
     if (n < ilen) return eta[n];
     n -= ilen;
     ilen = mtchain;
     if (n < ilen) return eta_dot[n];
     n -= ilen;
   }
 
   if (pstat_flag) {
     if (pstyle == ISO) {
       ilen = 1;
       if (n < ilen) return omega[n];
       n -= ilen;
     } else if (pstyle == ANISO) {
       ilen = 3;
       if (n < ilen) return omega[n];
       n -= ilen;
     } else {
       ilen = 6;
       if (n < ilen) return omega[n];
       n -= ilen;
     }
 
     if (pstyle == ISO) {
       ilen = 1;
       if (n < ilen) return omega_dot[n];
       n -= ilen;
     } else if (pstyle == ANISO) {
       ilen = 3;
       if (n < ilen) return omega_dot[n];
       n -= ilen;
     } else {
       ilen = 6;
       if (n < ilen) return omega_dot[n];
       n -= ilen;
     }
 
     if (mpchain) {
       ilen = mpchain;
       if (n < ilen) return etap[n];
       n -= ilen;
       ilen = mpchain;
       if (n < ilen) return etap_dot[n];
       n -= ilen;
     }
   }
 
   double volume;
   double kt = boltz * t_target;
   double lkt_press = kt;
   int ich;
   if (dimension == 3) volume = domain->xprd * domain->yprd * domain->zprd;
   else volume = domain->xprd * domain->yprd;
 
   if (tstat_flag) {
     ilen = mtchain;
     if (n < ilen) {
       ich = n;
       if (ich == 0)
         return ke_target * eta[0];
       else
         return kt * eta[ich];
     }
     n -= ilen;
     ilen = mtchain;
     if (n < ilen) {
       ich = n;
       if (ich == 0)
         return 0.5*eta_mass[0]*eta_dot[0]*eta_dot[0];
       else
         return 0.5*eta_mass[ich]*eta_dot[ich]*eta_dot[ich];
     }
     n -= ilen;
   }
 
   if (pstat_flag) {
     if (pstyle == ISO) {
       ilen = 1;
       if (n < ilen)
         return p_hydro*(volume-vol0) / nktv2p;
       n -= ilen;
     } else if (pstyle == ANISO) {
       ilen = 3;
       if (n < ilen)
         if (p_flag[n])
           return p_hydro*(volume-vol0) / (pdim*nktv2p);
         else
           return 0.0;
       n -= ilen;
     } else {
       ilen = 6;
       if (n < ilen)
         if (n > 2) return 0.0;
         else if (p_flag[n])
           return p_hydro*(volume-vol0) / (pdim*nktv2p);
         else
           return 0.0;
       n -= ilen;
     }
 
     if (pstyle == ISO) {
       ilen = 1;
       if (n < ilen)
         return pdim*0.5*omega_dot[n]*omega_dot[n]*omega_mass[n];
       n -= ilen;
     } else if (pstyle == ANISO) {
       ilen = 3;
       if (n < ilen)
         if (p_flag[n])
           return 0.5*omega_dot[n]*omega_dot[n]*omega_mass[n];
         else return 0.0;
       n -= ilen;
     } else {
       ilen = 6;
       if (n < ilen)
         if (p_flag[n])
           return 0.5*omega_dot[n]*omega_dot[n]*omega_mass[n];
         else return 0.0;
       n -= ilen;
     }
 
     if (mpchain) {
       ilen = mpchain;
       if (n < ilen) {
         ich = n;
         if (ich == 0) return lkt_press * etap[0];
         else return kt * etap[ich];
       }
       n -= ilen;
       ilen = mpchain;
       if (n < ilen) {
         ich = n;
         if (ich == 0)
           return 0.5*etap_mass[0]*etap_dot[0]*etap_dot[0];
         else
           return 0.5*etap_mass[ich]*etap_dot[ich]*etap_dot[ich];
       }
       n -= ilen;
     }
 
     if (deviatoric_flag) {
       ilen = 1;
       if (n < ilen)
         return compute_strain_energy();
       n -= ilen;
     }
   }
 
   return 0.0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixNH::reset_target(double t_new)
 {
   t_target = t_start = t_stop = t_new;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixNH::reset_dt()
 {
   dtv = update->dt;
   dtf = 0.5 * update->dt * force->ftm2v;
   dthalf = 0.5 * update->dt;
   dt4 = 0.25 * update->dt;
   dt8 = 0.125 * update->dt;
   dto = dthalf;
 
   // If using respa, then remap is performed in innermost level
 
   if (strstr(update->integrate_style,"respa"))
     dto = 0.5*step_respa[0];
 
   if (pstat_flag)
     pdrag_factor = 1.0 - (update->dt * p_freq_max * drag / nc_pchain);
 
   if (tstat_flag)
     tdrag_factor = 1.0 - (update->dt * t_freq * drag / nc_tchain);
 }
 
 /* ----------------------------------------------------------------------
    extract thermostat properties
 ------------------------------------------------------------------------- */
 
 void *FixNH::extract(const char *str, int &dim)
 {
   dim=0;
   if (strcmp(str,"t_target") == 0) {
     return &t_target;
   }
   return NULL;
 }
 
 /* ----------------------------------------------------------------------
    perform half-step update of chain thermostat variables
 ------------------------------------------------------------------------- */
 
 void FixNH::nhc_temp_integrate()
 {
   int ich;
   double expfac;
   double kecurrent = tdof * boltz * t_current;
 
   // Update masses, to preserve initial freq, if flag set
 
   if (eta_mass_flag) {
     eta_mass[0] = tdof * boltz * t_target / (t_freq*t_freq);
     for (int ich = 1; ich < mtchain; ich++)
       eta_mass[ich] = boltz * t_target / (t_freq*t_freq);
   }
 
   if (eta_mass[0] > 0.0)
     eta_dotdot[0] = (kecurrent - ke_target)/eta_mass[0];
   else eta_dotdot[0] = 0.0;
 
   double ncfac = 1.0/nc_tchain;
   for (int iloop = 0; iloop < nc_tchain; iloop++) {
 
     for (ich = mtchain-1; ich > 0; ich--) {
       expfac = exp(-ncfac*dt8*eta_dot[ich+1]);
       eta_dot[ich] *= expfac;
       eta_dot[ich] += eta_dotdot[ich] * ncfac*dt4;
       eta_dot[ich] *= tdrag_factor;
       eta_dot[ich] *= expfac;
     }
 
     expfac = exp(-ncfac*dt8*eta_dot[1]);
     eta_dot[0] *= expfac;
     eta_dot[0] += eta_dotdot[0] * ncfac*dt4;
     eta_dot[0] *= tdrag_factor;
     eta_dot[0] *= expfac;
 
     factor_eta = exp(-ncfac*dthalf*eta_dot[0]);
     nh_v_temp();
 
     // rescale temperature due to velocity scaling
     // should not be necessary to explicitly recompute the temperature
 
     t_current *= factor_eta*factor_eta;
     kecurrent = tdof * boltz * t_current;
 
     if (eta_mass[0] > 0.0)
       eta_dotdot[0] = (kecurrent - ke_target)/eta_mass[0];
     else eta_dotdot[0] = 0.0;
 
     for (ich = 0; ich < mtchain; ich++)
       eta[ich] += ncfac*dthalf*eta_dot[ich];
 
     eta_dot[0] *= expfac;
     eta_dot[0] += eta_dotdot[0] * ncfac*dt4;
     eta_dot[0] *= expfac;
 
     for (ich = 1; ich < mtchain; ich++) {
       expfac = exp(-ncfac*dt8*eta_dot[ich+1]);
       eta_dot[ich] *= expfac;
       eta_dotdot[ich] = (eta_mass[ich-1]*eta_dot[ich-1]*eta_dot[ich-1]
                          - boltz * t_target)/eta_mass[ich];
       eta_dot[ich] += eta_dotdot[ich] * ncfac*dt4;
       eta_dot[ich] *= expfac;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    perform half-step update of chain thermostat variables for barostat
    scale barostat velocities
 ------------------------------------------------------------------------- */
 
 void FixNH::nhc_press_integrate()
 {
   int ich,i;
   double expfac,factor_etap,kecurrent;
   double kt = boltz * t_target;
   double lkt_press = kt;
 
   // Update masses, to preserve initial freq, if flag set
 
   if (omega_mass_flag) {
     double nkt = atom->natoms * kt;
     for (int i = 0; i < 3; i++)
       if (p_flag[i])
         omega_mass[i] = nkt/(p_freq[i]*p_freq[i]);
 
     if (pstyle == TRICLINIC) {
       for (int i = 3; i < 6; i++)
         if (p_flag[i]) omega_mass[i] = nkt/(p_freq[i]*p_freq[i]);
     }
   }
 
   if (etap_mass_flag) {
     if (mpchain) {
       etap_mass[0] = boltz * t_target / (p_freq_max*p_freq_max);
       for (int ich = 1; ich < mpchain; ich++)
         etap_mass[ich] = boltz * t_target / (p_freq_max*p_freq_max);
       for (int ich = 1; ich < mpchain; ich++)
         etap_dotdot[ich] =
           (etap_mass[ich-1]*etap_dot[ich-1]*etap_dot[ich-1] -
            boltz * t_target) / etap_mass[ich];
     }
   }
 
   kecurrent = 0.0;
   for (i = 0; i < 3; i++)
     if (p_flag[i]) kecurrent += omega_mass[i]*omega_dot[i]*omega_dot[i];
 
   if (pstyle == TRICLINIC) {
     for (i = 3; i < 6; i++)
       if (p_flag[i]) kecurrent += omega_mass[i]*omega_dot[i]*omega_dot[i];
   }
 
   etap_dotdot[0] = (kecurrent - lkt_press)/etap_mass[0];
 
   double ncfac = 1.0/nc_pchain;
   for (int iloop = 0; iloop < nc_pchain; iloop++) {
 
     for (ich = mpchain-1; ich > 0; ich--) {
       expfac = exp(-ncfac*dt8*etap_dot[ich+1]);
       etap_dot[ich] *= expfac;
       etap_dot[ich] += etap_dotdot[ich] * ncfac*dt4;
       etap_dot[ich] *= pdrag_factor;
       etap_dot[ich] *= expfac;
     }
 
     expfac = exp(-ncfac*dt8*etap_dot[1]);
     etap_dot[0] *= expfac;
     etap_dot[0] += etap_dotdot[0] * ncfac*dt4;
     etap_dot[0] *= pdrag_factor;
     etap_dot[0] *= expfac;
 
     for (ich = 0; ich < mpchain; ich++)
       etap[ich] += ncfac*dthalf*etap_dot[ich];
 
     factor_etap = exp(-ncfac*dthalf*etap_dot[0]);
     for (i = 0; i < 3; i++)
       if (p_flag[i]) omega_dot[i] *= factor_etap;
 
     if (pstyle == TRICLINIC) {
       for (i = 3; i < 6; i++)
         if (p_flag[i]) omega_dot[i] *= factor_etap;
     }
 
     kecurrent = 0.0;
     for (i = 0; i < 3; i++)
       if (p_flag[i]) kecurrent += omega_mass[i]*omega_dot[i]*omega_dot[i];
 
     if (pstyle == TRICLINIC) {
       for (i = 3; i < 6; i++)
         if (p_flag[i]) kecurrent += omega_mass[i]*omega_dot[i]*omega_dot[i];
     }
 
     etap_dotdot[0] = (kecurrent - lkt_press)/etap_mass[0];
 
     etap_dot[0] *= expfac;
     etap_dot[0] += etap_dotdot[0] * ncfac*dt4;
     etap_dot[0] *= expfac;
 
     for (ich = 1; ich < mpchain; ich++) {
       expfac = exp(-ncfac*dt8*etap_dot[ich+1]);
       etap_dot[ich] *= expfac;
       etap_dotdot[ich] =
         (etap_mass[ich-1]*etap_dot[ich-1]*etap_dot[ich-1] - boltz*t_target) /
         etap_mass[ich];
       etap_dot[ich] += etap_dotdot[ich] * ncfac*dt4;
       etap_dot[ich] *= expfac;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    perform half-step barostat scaling of velocities
 -----------------------------------------------------------------------*/
 
 void FixNH::nh_v_press()
 {
   double factor[3];
   double **v = atom->v;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
   if (igroup == atom->firstgroup) nlocal = atom->nfirst;
 
   factor[0] = exp(-dt4*(omega_dot[0]+mtk_term2));
   factor[1] = exp(-dt4*(omega_dot[1]+mtk_term2));
   factor[2] = exp(-dt4*(omega_dot[2]+mtk_term2));
 
   if (which == NOBIAS) {
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
         v[i][0] *= factor[0];
         v[i][1] *= factor[1];
         v[i][2] *= factor[2];
         if (pstyle == TRICLINIC) {
           v[i][0] += -dthalf*(v[i][1]*omega_dot[5] + v[i][2]*omega_dot[4]);
           v[i][1] += -dthalf*v[i][2]*omega_dot[3];
         }
         v[i][0] *= factor[0];
         v[i][1] *= factor[1];
         v[i][2] *= factor[2];
       }
     }
   } else if (which == BIAS) {
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
         temperature->remove_bias(i,v[i]);
         v[i][0] *= factor[0];
         v[i][1] *= factor[1];
         v[i][2] *= factor[2];
         if (pstyle == TRICLINIC) {
           v[i][0] += -dthalf*(v[i][1]*omega_dot[5] + v[i][2]*omega_dot[4]);
           v[i][1] += -dthalf*v[i][2]*omega_dot[3];
         }
         v[i][0] *= factor[0];
         v[i][1] *= factor[1];
         v[i][2] *= factor[2];
         temperature->restore_bias(i,v[i]);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    perform half-step update of velocities
 -----------------------------------------------------------------------*/
 
 void FixNH::nve_v()
 {
   double dtfm;
   double **v = atom->v;
   double **f = atom->f;
   double *rmass = atom->rmass;
   double *mass = atom->mass;
   int *type = atom->type;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
   if (igroup == atom->firstgroup) nlocal = atom->nfirst;
 
   if (rmass) {
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
         dtfm = dtf / rmass[i];
         v[i][0] += dtfm*f[i][0];
         v[i][1] += dtfm*f[i][1];
         v[i][2] += dtfm*f[i][2];
       }
     }
   } else {
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
         dtfm = dtf / mass[type[i]];
         v[i][0] += dtfm*f[i][0];
         v[i][1] += dtfm*f[i][1];
         v[i][2] += dtfm*f[i][2];
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    perform full-step update of positions
 -----------------------------------------------------------------------*/
 
 void FixNH::nve_x()
 {
   double **x = atom->x;
   double **v = atom->v;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
   if (igroup == atom->firstgroup) nlocal = atom->nfirst;
 
   // x update by full step only for atoms in group
 
   for (int i = 0; i < nlocal; i++) {
     if (mask[i] & groupbit) {
       x[i][0] += dtv * v[i][0];
       x[i][1] += dtv * v[i][1];
       x[i][2] += dtv * v[i][2];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    perform half-step thermostat scaling of velocities
 -----------------------------------------------------------------------*/
 
 void FixNH::nh_v_temp()
 {
   double **v = atom->v;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
   if (igroup == atom->firstgroup) nlocal = atom->nfirst;
 
   if (which == NOBIAS) {
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
         v[i][0] *= factor_eta;
         v[i][1] *= factor_eta;
         v[i][2] *= factor_eta;
       }
     }
   } else if (which == BIAS) {
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
         temperature->remove_bias(i,v[i]);
         v[i][0] *= factor_eta;
         v[i][1] *= factor_eta;
         v[i][2] *= factor_eta;
         temperature->restore_bias(i,v[i]);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    compute sigma tensor
    needed whenever p_target or h0_inv changes
 -----------------------------------------------------------------------*/
 
 void FixNH::compute_sigma()
 {
   // if nreset_h0 > 0, reset vol0 and h0_inv
   // every nreset_h0 timesteps
 
   if (nreset_h0 > 0) {
     int delta = update->ntimestep - update->beginstep;
     if (delta % nreset_h0 == 0) {
       if (dimension == 3) vol0 = domain->xprd * domain->yprd * domain->zprd;
       else vol0 = domain->xprd * domain->yprd;
       h0_inv[0] = domain->h_inv[0];
       h0_inv[1] = domain->h_inv[1];
       h0_inv[2] = domain->h_inv[2];
       h0_inv[3] = domain->h_inv[3];
       h0_inv[4] = domain->h_inv[4];
       h0_inv[5] = domain->h_inv[5];
     }
   }
 
   // generate upper-triangular half of
   // sigma = vol0*h0inv*(p_target-p_hydro)*h0inv^t
   // units of sigma are are PV/L^2 e.g. atm.A
   //
   // [ 0 5 4 ]   [ 0 5 4 ] [ 0 5 4 ] [ 0 - - ]
   // [ 5 1 3 ] = [ - 1 3 ] [ 5 1 3 ] [ 5 1 - ]
   // [ 4 3 2 ]   [ - - 2 ] [ 4 3 2 ] [ 4 3 2 ]
 
   sigma[0] =
     vol0*(h0_inv[0]*((p_target[0]-p_hydro)*h0_inv[0] +
                      p_target[5]*h0_inv[5]+p_target[4]*h0_inv[4]) +
           h0_inv[5]*(p_target[5]*h0_inv[0] +
                      (p_target[1]-p_hydro)*h0_inv[5]+p_target[3]*h0_inv[4]) +
           h0_inv[4]*(p_target[4]*h0_inv[0]+p_target[3]*h0_inv[5] +
                      (p_target[2]-p_hydro)*h0_inv[4]));
   sigma[1] =
     vol0*(h0_inv[1]*((p_target[1]-p_hydro)*h0_inv[1] +
                      p_target[3]*h0_inv[3]) +
           h0_inv[3]*(p_target[3]*h0_inv[1] +
                      (p_target[2]-p_hydro)*h0_inv[3]));
   sigma[2] =
     vol0*(h0_inv[2]*((p_target[2]-p_hydro)*h0_inv[2]));
   sigma[3] =
     vol0*(h0_inv[1]*(p_target[3]*h0_inv[2]) +
           h0_inv[3]*((p_target[2]-p_hydro)*h0_inv[2]));
   sigma[4] =
     vol0*(h0_inv[0]*(p_target[4]*h0_inv[2]) +
           h0_inv[5]*(p_target[3]*h0_inv[2]) +
           h0_inv[4]*((p_target[2]-p_hydro)*h0_inv[2]));
   sigma[5] =
     vol0*(h0_inv[0]*(p_target[5]*h0_inv[1]+p_target[4]*h0_inv[3]) +
           h0_inv[5]*((p_target[1]-p_hydro)*h0_inv[1]+p_target[3]*h0_inv[3]) +
           h0_inv[4]*(p_target[3]*h0_inv[1]+(p_target[2]-p_hydro)*h0_inv[3]));
 }
 
 /* ----------------------------------------------------------------------
    compute strain energy
 -----------------------------------------------------------------------*/
 
 double FixNH::compute_strain_energy()
 {
   // compute strain energy = 0.5*Tr(sigma*h*h^t) in energy units
 
   double* h = domain->h;
   double d0,d1,d2;
 
   d0 =
     sigma[0]*(h[0]*h[0]+h[5]*h[5]+h[4]*h[4]) +
     sigma[5]*(          h[1]*h[5]+h[3]*h[4]) +
     sigma[4]*(                    h[2]*h[4]);
   d1 =
     sigma[5]*(          h[5]*h[1]+h[4]*h[3]) +
     sigma[1]*(          h[1]*h[1]+h[3]*h[3]) +
     sigma[3]*(                    h[2]*h[3]);
   d2 =
     sigma[4]*(                    h[4]*h[2]) +
     sigma[3]*(                    h[3]*h[2]) +
     sigma[2]*(                    h[2]*h[2]);
 
   double energy = 0.5*(d0+d1+d2)/nktv2p;
   return energy;
 }
 
 /* ----------------------------------------------------------------------
    compute deviatoric barostat force = h*sigma*h^t
 -----------------------------------------------------------------------*/
 
 void FixNH::compute_deviatoric()
 {
   // generate upper-triangular part of h*sigma*h^t
   // units of fdev are are PV, e.g. atm*A^3
   // [ 0 5 4 ]   [ 0 5 4 ] [ 0 5 4 ] [ 0 - - ]
   // [ 5 1 3 ] = [ - 1 3 ] [ 5 1 3 ] [ 5 1 - ]
   // [ 4 3 2 ]   [ - - 2 ] [ 4 3 2 ] [ 4 3 2 ]
 
   double* h = domain->h;
 
   fdev[0] =
     h[0]*(sigma[0]*h[0]+sigma[5]*h[5]+sigma[4]*h[4]) +
     h[5]*(sigma[5]*h[0]+sigma[1]*h[5]+sigma[3]*h[4]) +
     h[4]*(sigma[4]*h[0]+sigma[3]*h[5]+sigma[2]*h[4]);
   fdev[1] =
     h[1]*(              sigma[1]*h[1]+sigma[3]*h[3]) +
     h[3]*(              sigma[3]*h[1]+sigma[2]*h[3]);
   fdev[2] =
     h[2]*(                            sigma[2]*h[2]);
   fdev[3] =
     h[1]*(                            sigma[3]*h[2]) +
     h[3]*(                            sigma[2]*h[2]);
   fdev[4] =
     h[0]*(                            sigma[4]*h[2]) +
     h[5]*(                            sigma[3]*h[2]) +
     h[4]*(                            sigma[2]*h[2]);
   fdev[5] =
     h[0]*(              sigma[5]*h[1]+sigma[4]*h[3]) +
     h[5]*(              sigma[1]*h[1]+sigma[3]*h[3]) +
     h[4]*(              sigma[3]*h[1]+sigma[2]*h[3]);
 }
 
 /* ----------------------------------------------------------------------
    compute target temperature and kinetic energy
 -----------------------------------------------------------------------*/
 
 void FixNH::compute_temp_target()
 {
   double delta = update->ntimestep - update->beginstep;
   if (delta != 0.0) delta /= update->endstep - update->beginstep;
 
   t_target = t_start + delta * (t_stop-t_start);
   ke_target = tdof * boltz * t_target;
 }
 
 /* ----------------------------------------------------------------------
    compute hydrostatic target pressure
 -----------------------------------------------------------------------*/
 
 void FixNH::compute_press_target()
 {
   double delta = update->ntimestep - update->beginstep;
   if (delta != 0.0) delta /= update->endstep - update->beginstep;
 
   p_hydro = 0.0;
   for (int i = 0; i < 3; i++)
     if (p_flag[i]) {
       p_target[i] = p_start[i] + delta * (p_stop[i]-p_start[i]);
       p_hydro += p_target[i];
     }
   p_hydro /= pdim;
 
   if (pstyle == TRICLINIC)
     for (int i = 3; i < 6; i++)
       p_target[i] = p_start[i] + delta * (p_stop[i]-p_start[i]);
 
   // if deviatoric, recompute sigma each time p_target changes
 
   if (deviatoric_flag) compute_sigma();
 }
 
 /* ----------------------------------------------------------------------
    update omega_dot, omega
 -----------------------------------------------------------------------*/
 
 void FixNH::nh_omega_dot()
 {
   double f_omega,volume;
 
   if (dimension == 3) volume = domain->xprd*domain->yprd*domain->zprd;
   else volume = domain->xprd*domain->yprd;
 
   if (deviatoric_flag) compute_deviatoric();
 
   mtk_term1 = 0.0;
   if (mtk_flag)
     if (pstyle == ISO) {
       mtk_term1 = tdof * boltz * t_current;
       mtk_term1 /= pdim * atom->natoms;
     } else {
       double *mvv_current = temperature->vector;
       for (int i = 0; i < 3; i++)
         if (p_flag[i])
           mtk_term1 += mvv_current[i];
       mtk_term1 /= pdim * atom->natoms;
     }
 
   for (int i = 0; i < 3; i++)
     if (p_flag[i]) {
       f_omega = (p_current[i]-p_hydro)*volume /
         (omega_mass[i] * nktv2p) + mtk_term1 / omega_mass[i];
       if (deviatoric_flag) f_omega -= fdev[i]/(omega_mass[i] * nktv2p);
       omega_dot[i] += f_omega*dthalf;
       omega_dot[i] *= pdrag_factor;
     }
 
   mtk_term2 = 0.0;
   if (mtk_flag) {
     for (int i = 0; i < 3; i++)
       if (p_flag[i])
         mtk_term2 += omega_dot[i];
     mtk_term2 /= pdim * atom->natoms;
   }
 
   if (pstyle == TRICLINIC) {
     for (int i = 3; i < 6; i++) {
       if (p_flag[i]) {
         f_omega = p_current[i]*volume/(omega_mass[i] * nktv2p);
         if (deviatoric_flag)
           f_omega -= fdev[i]/(omega_mass[i] * nktv2p);
         omega_dot[i] += f_omega*dthalf;
         omega_dot[i] *= pdrag_factor;
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
   if any tilt ratios exceed limits, set flip = 1 and compute new tilt values
   do not flip in x or y if non-periodic (can tilt but not flip)
     this is b/c the box length would be changed (dramatically) by flip
   if yz tilt exceeded, adjust C vector by one B vector
   if xz tilt exceeded, adjust C vector by one A vector
   if xy tilt exceeded, adjust B vector by one A vector
   check yz first since it may change xz, then xz check comes after
   if any flip occurs, create new box in domain
   image_flip() adjusts image flags due to box shape change induced by flip
   remap() puts atoms outside the new box back into the new box
   perform irregular on atoms in lamda coords to migrate atoms to new procs
   important that image_flip comes before remap, since remap may change
     image flags to new values, making eqs in doc of Domain:image_flip incorrect
 ------------------------------------------------------------------------- */
 
 void FixNH::pre_exchange()
 {
   double xprd = domain->xprd;
   double yprd = domain->yprd;
 
   // flip is only triggered when tilt exceeds 0.5 by DELTAFLIP
   // this avoids immediate re-flipping due to tilt oscillations
 
   double xtiltmax = (0.5+DELTAFLIP)*xprd;
   double ytiltmax = (0.5+DELTAFLIP)*yprd;
 
   int flipxy,flipxz,flipyz;
   flipxy = flipxz = flipyz = 0;
 
   if (domain->yperiodic) {
     if (domain->yz < -ytiltmax) {
       domain->yz += yprd;
       domain->xz += domain->xy;
       flipyz = 1;
     } else if (domain->yz >= ytiltmax) {
       domain->yz -= yprd;
       domain->xz -= domain->xy;
       flipyz = -1;
     }
   }
 
   if (domain->xperiodic) {
     if (domain->xz < -xtiltmax) {
       domain->xz += xprd;
       flipxz = 1;
     } else if (domain->xz >= xtiltmax) {
       domain->xz -= xprd;
       flipxz = -1;
     }
     if (domain->xy < -xtiltmax) {
       domain->xy += xprd;
       flipxy = 1;
     } else if (domain->xy >= xtiltmax) {
       domain->xy -= xprd;
       flipxy = -1;
     }
   }
 
   int flip = 0;
   if (flipxy || flipxz || flipyz) flip = 1;
 
   if (flip) {
     domain->set_global_box();
     domain->set_local_box();
 
     domain->image_flip(flipxy,flipxz,flipyz);
 
     double **x = atom->x;
     tagint *image = atom->image;
     int nlocal = atom->nlocal;
     for (int i = 0; i < nlocal; i++) domain->remap(x[i],image[i]);
 
     domain->x2lamda(atom->nlocal);
     irregular->migrate_atoms();
     domain->lamda2x(atom->nlocal);
   }
 }
diff --git a/src/fix_nh.h b/src/fix_nh.h
index 80a71b884..e32d9c266 100644
--- a/src/fix_nh.h
+++ b/src/fix_nh.h
@@ -1,259 +1,259 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifndef LMP_FIX_NH_H
 #define LMP_FIX_NH_H
 
 #include "fix.h"
 
 namespace LAMMPS_NS {
 
 class FixNH : public Fix {
  public:
   FixNH(class LAMMPS *, int, char **);
   virtual ~FixNH();
   int setmask();
   virtual void init();
   virtual void setup(int);
   virtual void initial_integrate(int);
   virtual void final_integrate();
   void initial_integrate_respa(int, int, int);
   void final_integrate_respa(int, int);
   void pre_exchange();
   double compute_scalar();
   virtual double compute_vector(int);
   void write_restart(FILE *);
   virtual int pack_restart_data(double *); // pack restart data
   virtual void restart(char *);
   int modify_param(int, char **);
   void reset_target(double);
   void reset_dt();
   virtual void *extract(const char*,int &);
 
  protected:
   int dimension,which;
   double dtv,dtf,dthalf,dt4,dt8,dto;
   double boltz,nktv2p,tdof;
   double vol0;                      // reference volume
   double t0;                        // reference temperature
                                     // used for barostat mass
   double t_start,t_stop;
   double t_current,t_target,ke_target;
   double t_freq;
 
   int tstat_flag;                   // 1 if control T
   int pstat_flag;                   // 1 if control P
 
   int pstyle,pcouple,allremap;
   int p_flag[6];                   // 1 if control P on this dim, 0 if not
   double p_start[6],p_stop[6];
   double p_freq[6],p_target[6];
   double omega[6],omega_dot[6];
   double omega_mass[6];
   double p_current[6];
   double drag,tdrag_factor;        // drag factor on particle thermostat
   double pdrag_factor;             // drag factor on barostat
   int kspace_flag;                 // 1 if KSpace invoked, 0 if not
   int nrigid;                      // number of rigid fixes
   int dilate_group_bit;            // mask for dilation group
   int *rfix;                       // indices of rigid fixes
   char *id_dilate;                 // group name to dilate
   class Irregular *irregular;      // for migrating atoms after box flips
 
   int nlevels_respa;
   double *step_respa;
 
   char *id_temp,*id_press;
   class Compute *temperature,*pressure;
   int tflag,pflag;
 
   double *eta,*eta_dot;            // chain thermostat for particles
   double *eta_dotdot;
   double *eta_mass;
   int mtchain;                     // length of chain
   int mtchain_default_flag;        // 1 = mtchain is default
 
   double *etap;                    // chain thermostat for barostat
   double *etap_dot;
   double *etap_dotdot;
   double *etap_mass;
   int mpchain;                     // length of chain
 
   int mtk_flag;                    // 0 if using Hoover barostat
   int pdim;                        // number of barostatted dims
   double p_freq_max;               // maximum barostat frequency
 
   double p_hydro;                  // hydrostatic target pressure
 
   int nc_tchain,nc_pchain;
   double factor_eta;
   double sigma[6];                 // scaled target stress
   double fdev[6];                  // deviatoric force on barostat
   int deviatoric_flag;             // 0 if target stress tensor is hydrostatic
   double h0_inv[6];                // h_inv of reference (zero strain) box
   int nreset_h0;                   // interval for resetting h0
 
   double mtk_term1,mtk_term2;      // Martyna-Tobias-Klein corrections
 
   int eta_mass_flag;               // 1 if eta_mass updated, 0 if not.
   int omega_mass_flag;             // 1 if omega_mass updated, 0 if not.
   int etap_mass_flag;              // 1 if etap_mass updated, 0 if not.
 
   int scaleyz;                     // 1 if yz scaled with lz
   int scalexz;                     // 1 if xz scaled with lz
   int scalexy;                     // 1 if xy scaled with ly
   int flipflag;                    // 1 if box flips are invoked as needed
 
   int pre_exchange_flag;           // set if pre_exchange needed for box flips
 
   double fixedpoint[3];            // location of dilation fixed-point
 
   void couple();
   void remap();
   void nhc_temp_integrate();
   void nhc_press_integrate();
 
   virtual void nve_x();            // may be overwritten by child classes
   virtual void nve_v();
   virtual void nh_v_press();
   virtual void nh_v_temp();
   virtual void compute_temp_target();
   virtual int size_restart_global();
 
   void compute_sigma();
   void compute_deviatoric();
   double compute_strain_energy();
   void compute_press_target();
   void nh_omega_dot();
 };
 
 }
 
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Target temperature for fix nvt/npt/nph cannot be 0.0
 
 Self-explanatory.
 
 E: Invalid fix nvt/npt/nph command for a 2d simulation
 
 Cannot control z dimension in a 2d model.
 
 E: Fix nvt/npt/nph dilate group ID does not exist
 
 Self-explanatory.
 
 E: Invalid fix nvt/npt/nph command pressure settings
 
 If multiple dimensions are coupled, those dimensions must be
 specified.
 
 E: Cannot use fix nvt/npt/nph on a non-periodic dimension
 
 When specifying a diagonal pressure component, the dimension must be
 periodic.
 
 E: Cannot use fix nvt/npt/nph on a 2nd non-periodic dimension
 
 When specifying an off-diagonal pressure component, the 2nd of the two
 dimensions must be periodic.  E.g. if the xy component is specified,
 then the y dimension must be periodic.
 
 E: Cannot use fix nvt/npt/nph with yz scaling when z is non-periodic dimension
 
 The 2nd dimension in the barostatted tilt factor must be periodic.
 
 E: Cannot use fix nvt/npt/nph with xz scaling when z is non-periodic dimension
 
 The 2nd dimension in the barostatted tilt factor must be periodic.
 
 E: Cannot use fix nvt/npt/nph with xy scaling when y is non-periodic dimension
 
 The 2nd dimension in the barostatted tilt factor must be periodic.
 
 E: Cannot use fix nvt/npt/nph with both yz dynamics and yz scaling
 
 Self-explanatory.
 
 E: Cannot use fix nvt/npt/nph with both xz dynamics and xz scaling
 
 Self-explanatory.
 
 E: Cannot use fix nvt/npt/nph with both xy dynamics and xy scaling
 
 Self-explanatory.
 
 E: Can not specify Pxy/Pxz/Pyz in fix nvt/npt/nph with non-triclinic box
 
 Only triclinic boxes can be used with off-diagonal pressure components.
 See the region prism command for details.
 
 E: Invalid fix nvt/npt/nph pressure settings
 
 Settings for coupled dimensions must be the same.
 
 E: Fix nvt/npt/nph damping parameters must be > 0.0
 
 Self-explanatory.
 
 E: Cannot use fix npt and fix deform on same component of stress tensor
 
 This would be changing the same box dimension twice.
 
-E: Temperature ID for fix nvt/nph/npt does not exist
+E: Temperature ID for fix nvt/npt does not exist
 
 Self-explanatory.
 
 E: Pressure ID for fix npt/nph does not exist
 
 Self-explanatory.
 
 E: Fix npt/nph has tilted box too far in one step - periodic cell is too far from equilibrium state
 
 Self-explanatory.  The change in the box tilt is too extreme
 on a short timescale.
 
 E: Could not find fix_modify temperature ID
 
 The compute ID for computing temperature does not exist.
 
 E: Fix_modify temperature ID does not compute temperature
 
 The compute ID assigned to the fix must compute temperature.
 
 W: Temperature for fix modify is not for group all
 
 The temperature compute is being used with a pressure calculation
 which does operate on group all, so this may be inconsistent.
 
 E: Pressure ID for fix modify does not exist
 
 Self-explanatory.
 
 E: Could not find fix_modify pressure ID
 
 The compute ID for computing pressure does not exist.
 
 E: Fix_modify pressure ID does not compute pressure
 
 The compute ID assigned to the fix must compute pressure.
 
 */
diff --git a/src/fix_wall.h b/src/fix_wall.h
index 2eced3a00..c5b610e38 100644
--- a/src/fix_wall.h
+++ b/src/fix_wall.h
@@ -1,106 +1,106 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifndef LMP_FIX_WALL_H
 #define LMP_FIX_WALL_H
 
 #include "fix.h"
 
 namespace LAMMPS_NS {
 
 class FixWall : public Fix {
  public:
   int nwall;
   int wallwhich[6];
   double coord0[6];
   int xflag;           // 1 if any wall position is a variable
   int xstyle[6];
   int xindex[6];
   char *xstr[6];
 
   FixWall(class LAMMPS *, int, char **);
   virtual ~FixWall();
   int setmask();
   virtual void init();
   void setup(int);
   void min_setup(int);
   void pre_force(int);
   void post_force(int);
   void post_force_respa(int, int, int);
   void min_post_force(int);
   double compute_scalar();
   double compute_vector(int);
 
   virtual void precompute(int) = 0;
   virtual void wall_particle(int, int, double) = 0;
 
  protected:
   double epsilon[6],sigma[6],cutoff[6];
   double ewall[7],ewall_all[7];
   double xscale,yscale,zscale;
   int estyle[6],sstyle[6],wstyle[6];
   int eindex[6],sindex[6];
   char *estr[6],*sstr[6];
   int varflag;                // 1 if any wall position,epsilon,sigma is a var
   int eflag;                  // per-wall flag for energy summation
   int nlevels_respa;
   double dt;
   int fldflag;
 };
 
 }
 
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Wall defined twice in fix wall command
 
 Self-explanatory.
 
 E: Fix wall cutoff <= 0.0
 
 Self-explanatory.
 
 E: Cannot use fix wall zlo/zhi for a 2d simulation
 
 Self-explanatory.
 
 E: Cannot use fix wall in periodic dimension
 
 Self-explanatory.
 
 E: Use of fix wall with undefined lattice
 
 Must use lattice command with fix wall command if units option is set
 to lattice.
 
 E: Variable name for fix wall does not exist
 
 Self-explanatory.
 
 E: Variable for fix wall is invalid style
 
 Only equal-style variables can be used.
 
 E: Variable evaluation in fix wall gave bad value
 
-UNDOCUMENTED
+The returned value for epsilon or sigma < 0.0.
 
 */
diff --git a/src/group.h b/src/group.h
index a5b4a954e..31a27932d 100644
--- a/src/group.h
+++ b/src/group.h
@@ -1,131 +1,131 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifndef LMP_GROUP_H
 #define LMP_GROUP_H
 
 #include "stdio.h"
 #include "pointers.h"
 
 namespace LAMMPS_NS {
 
 class Group : protected Pointers {
  public:
   int ngroup;                  // # of defined groups
   char **names;                // name of each group
   int *bitmask;                // one-bit mask for each group
   int *inversemask;            // inverse mask for each group
 
   Group(class LAMMPS *);
   ~Group();
   void assign(int, char **);         // assign atoms to a group
   void create(char *, int *);        // add flagged atoms to a group
   int find(const char *);            // lookup name in list of groups
   void write_restart(FILE *);
   void read_restart(FILE *);
 
   bigint count(int);                       // count atoms in group
   bigint count(int,int);                   // count atoms in group & region
   double mass(int);                        // total mass of atoms in group
   double mass(int,int);
   double charge(int);                      // total charge of atoms in group
   double charge(int,int);
   void bounds(int, double *);              // bounds of atoms in group
   void bounds(int, double *, int);
   void xcm(int, double, double *);         // center-of-mass coords of group
   void xcm(int, double, double *, int);
   void vcm(int, double, double *);         // center-of-mass velocity of group
   void vcm(int, double, double *, int);
   void fcm(int, double *);                 // total force on group
   void fcm(int, double *, int);
   double ke(int);                          // kinetic energy of group
   double ke(int, int);
   double gyration(int, double, double *);  // radius-of-gyration of group
   double gyration(int, double, double *, int);
   void angmom(int, double *, double *);    // angular momentum of group
   void angmom(int, double *, double *, int);
   void torque(int, double *, double *);    // torque on group
   void torque(int, double *, double *, int);
   void inertia(int, double *, double [3][3]);     // inertia tensor
   void inertia(int, double *, double [3][3], int);
   void omega(double *, double [3][3], double *);  // angular velocity
 
  private:
   int me;
 
   int find_unused();
 };
 
 }
 
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Group command before simulation box is defined
 
 The group command cannot be used before a read_data, read_restart, or
 create_box command.
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Could not find group delete group ID
 
 Self-explanatory.
 
 E: Cannot delete group all
 
 Self-explanatory.
 
 E: Cannot delete group currently used by a fix
 
 Self-explanatory.
 
 E: Cannot delete group currently used by a compute
 
 Self-explanatory.
 
 E: Cannot delete group currently used by a dump
 
 Self-explanatory.
 
 E: Cannot delete group currently used by atom_modify first
 
 Self-explanatory.
 
 E: Too many groups
 
 The maximum number of atom groups (including the "all" group) is
 given by MAX_GROUP in group.cpp and is 32.
 
 E: Group region ID does not exist
 
 A region ID used in the group command does not exist.
 
 E: Variable name for group does not exist
 
-UNDOCUMENTED
+Self-explanatory.
 
 E: Variable for group is invalid style
 
-UNDOCUMENTED
+Only atom-style variables can be used.
 
 E: Group ID does not exist
 
 A group ID used in the group command does not exist.
 
 */
diff --git a/src/input.h b/src/input.h
index d218b902c..48bd5114c 100644
--- a/src/input.h
+++ b/src/input.h
@@ -1,335 +1,328 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifndef LMP_INPUT_H
 #define LMP_INPUT_H
 
 #include "stdio.h"
 #include "pointers.h"
 
 namespace LAMMPS_NS {
 
 class Input : protected Pointers {
  public:
   int narg;                    // # of command args
   char **arg;                  // parsed args for command
   class Variable *variable;    // defined variables
 
   Input(class LAMMPS *, int, char **);
   ~Input();
   void file();                   // process all input
   void file(const char *);       // process an input script
   char *one(const char *);       // process a single command
   void substitute(char *&, char *&, int &, int &, int);  
                                  // substitute for variables in a string
 
  private:
   int me;                      // proc ID
   char *command;               // ptr to current command
   int maxarg;                  // max # of args in arg
   char *line,*copy,*work;      // input line & copy and work string
   int maxline,maxcopy,maxwork; // max lengths of char strings
   int echo_screen;             // 0 = no, 1 = yes
   int echo_log;                // 0 = no, 1 = yes
   int nfile,maxfile;           // current # and max # of open input files
   int label_active;            // 0 = no label, 1 = looking for label
   char *labelstr;              // label string being looked for
   int jump_skip;               // 1 if skipping next jump, 0 otherwise
 
   FILE **infiles;              // list of open input files
 
   void parse();                          // parse an input text line
   char *nextword(char *, char **);       // find next word in string with quotes
   void reallocate(char *&, int &, int);  // reallocate a char string
   int execute_command();                 // execute a single command
 
   void clear();                // input script commands
   void echo();
   void ifthenelse();
   void include();
   void jump();
   void label();
   void log();
   void next_command();
   void partition();
   void print();
   void quit();
   void shell();
   void variable_command();
 
   void angle_coeff();          // LAMMPS commands
   void angle_style();
   void atom_modify();
   void atom_style();
   void bond_coeff();
   void bond_style();
   void boundary();
   void box();
   void communicate();
   void compute();
   void compute_modify();
   void dielectric();
   void dihedral_coeff();
   void dihedral_style();
   void dimension();
   void dump();
   void dump_modify();
   void fix();
   void fix_modify();
   void group_command();
   void improper_coeff();
   void improper_style();
   void kspace_modify();
   void kspace_style();
   void lattice();
   void mass();
   void min_modify();
   void min_style();
   void neigh_modify();
   void neighbor_command();
   void newton();
   void package();
   void pair_coeff();
   void pair_modify();
   void pair_style();
   void pair_write();
   void processors();
   void region();
   void reset_timestep();
   void restart();
   void run_style();
   void special_bonds();
   void suffix();
   void thermo();
   void thermo_modify();
   void thermo_style();
   void timestep();
   void uncompute();
   void undump();
   void unfix();
   void units();
 };
 
 }
 
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Label wasn't found in input script
 
 Self-explanatory.
 
 E: Unknown command: %s
 
 The command is not known to LAMMPS.  Check the input script.
 
 E: Another input script is already being processed
 
 Cannot attempt to open a 2nd input script, when the original file is
 still being processed.
 
 E: Cannot open input script %s
 
 Self-explanatory.
 
 E: Unbalanced quotes in input line
 
 No matching end double quote was found following a leading double
 quote.
 
 E: Input line quote not followed by whitespace
 
 An end quote must be followed by whitespace.
 
 E: Invalid variable name
 
 Variable name used in an input script line is invalid.
 
 E: Substitution for illegal variable
 
 Input script line contained a variable that could not be substituted
 for.
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Cannot open logfile %s
 
 The LAMMPS log file specified in the input script cannot be opened.
 Check that the path and name are correct.
 
 E: Angle_coeff command before simulation box is defined
 
 The angle_coeff command cannot be used before a read_data,
 read_restart, or create_box command.
 
 E: Angle_coeff command before angle_style is defined
 
 Coefficients cannot be set in the data file or via the angle_coeff
 command until an angle_style has been assigned.
 
 E: Angle_coeff command when no angles allowed
 
 The chosen atom style does not allow for angles to be defined.
 
 E: Angle_style command when no angles allowed
 
 The chosen atom style does not allow for angles to be defined.
 
 E: Atom_style command after simulation box is defined
 
 The atom_style command cannot be used after a read_data,
 read_restart, or create_box command.
 
 E: Bond_coeff command before simulation box is defined
 
 The bond_coeff command cannot be used before a read_data,
 read_restart, or create_box command.
 
 E: Bond_coeff command before bond_style is defined
 
 Coefficients cannot be set in the data file or via the bond_coeff
 command until an bond_style has been assigned.
 
 E: Bond_coeff command when no bonds allowed
 
 The chosen atom style does not allow for bonds to be defined.
 
 E: Bond_style command when no bonds allowed
 
 The chosen atom style does not allow for bonds to be defined.
 
 E: Boundary command after simulation box is defined
 
 The boundary command cannot be used after a read_data, read_restart,
 or create_box command.
 
 E: Box command after simulation box is defined
 
-UNDOCUMENTED
+The box command cannot be used after a read_data, read_restart, or
+create_box command.
 
 E: Dihedral_coeff command before simulation box is defined
 
 The dihedral_coeff command cannot be used before a read_data,
 read_restart, or create_box command.
 
 E: Dihedral_coeff command before dihedral_style is defined
 
 Coefficients cannot be set in the data file or via the dihedral_coeff
 command until an dihedral_style has been assigned.
 
 E: Dihedral_coeff command when no dihedrals allowed
 
 The chosen atom style does not allow for dihedrals to be defined.
 
 E: Dihedral_style command when no dihedrals allowed
 
 The chosen atom style does not allow for dihedrals to be defined.
 
 E: Dimension command after simulation box is defined
 
 The dimension command cannot be used after a read_data,
 read_restart, or create_box command.
 
 E: Improper_coeff command before simulation box is defined
 
 The improper_coeff command cannot be used before a read_data,
 read_restart, or create_box command.
 
 E: Improper_coeff command before improper_style is defined
 
 Coefficients cannot be set in the data file or via the improper_coeff
 command until an improper_style has been assigned.
 
 E: Improper_coeff command when no impropers allowed
 
 The chosen atom style does not allow for impropers to be defined.
 
 E: Improper_style command when no impropers allowed
 
 The chosen atom style does not allow for impropers to be defined.
 
 E: KSpace style has not yet been set
 
 Cannot use kspace_modify command until a kspace style is set.
 
 E: Mass command before simulation box is defined
 
 The mass command cannot be used before a read_data, read_restart, or
 create_box command.
 
 E: Min_style command before simulation box is defined
 
 The min_style command cannot be used before a read_data, read_restart,
 or create_box command.
 
 E: Newton bond change after simulation box is defined
 
 The newton command cannot be used to change the newton bond value
 after a read_data, read_restart, or create_box command.
 
 E: Package command after simulation box is defined
 
 The package command cannot be used afer a read_data, read_restart, or
 create_box command.
 
 E: Package cuda command without USER-CUDA installed
 
 The USER-CUDA package must be installed via "make yes-user-cuda"
 before LAMMPS is built.
 
 E: Pair_coeff command before simulation box is defined
 
 The pair_coeff command cannot be used before a read_data,
 read_restart, or create_box command.
 
 E: Pair_coeff command before pair_style is defined
 
 Self-explanatory.
 
 E: Pair_modify command before pair_style is defined
 
 Self-explanatory.
 
 E: Pair_write command before pair_style is defined
 
 Self-explanatory.
 
 E: Processors command after simulation box is defined
 
 The processors command cannot be used after a read_data, read_restart,
 or create_box command.
 
 E: Run_style command before simulation box is defined
 
 The run_style command cannot be used before a read_data,
 read_restart, or create_box command.
 
 E: Units command after simulation box is defined
 
 The units command cannot be used after a read_data, read_restart, or
 create_box command.
 
-U: Input line too long: %s
-
-This is a hard (very large) limit defined in the input.cpp file.
-
-U: Input line too long after variable substitution
-
-This is a hard (very large) limit defined in the input.cpp file.
-
 */
diff --git a/src/kspace.h b/src/kspace.h
index dc2d84771..c8662b53d 100644
--- a/src/kspace.h
+++ b/src/kspace.h
@@ -1,187 +1,191 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifndef LMP_KSPACE_H
 #define LMP_KSPACE_H
 
 #include "pointers.h"
 
 #ifdef FFT_SINGLE
 typedef float FFT_SCALAR;
 #define MPI_FFT_SCALAR MPI_FLOAT
 #else
 typedef double FFT_SCALAR;
 #define MPI_FFT_SCALAR MPI_DOUBLE
 #endif
 
 namespace LAMMPS_NS {
 
 class KSpace : protected Pointers {
   friend class ThrOMP;
   friend class FixOMP;
  public:
   double energy;                  // accumulated energy
   double energy_1,energy_6;
   double virial[6];               // accumlated virial
   double *eatom,**vatom;          // accumulated per-atom energy/virial
   double e2group;                 // accumulated group-group energy
   double f2group[3];              // accumulated group-group force
 
   int ewaldflag;                 // 1 if a Ewald solver
   int pppmflag;                  // 1 if a PPPM solver
   int msmflag;                   // 1 if a MSM solver
   int dispersionflag;            // 1 if a LJ/dispersion solver
   int tip4pflag;                 // 1 if a TIP4P solver
 
   double g_ewald,g_ewald_6;
   int nx_pppm,ny_pppm,nz_pppm;           // global FFT grid for Coulombics
   int nx_pppm_6,ny_pppm_6,nz_pppm_6;     // global FFT grid for dispersion
   int nx_msm_max,ny_msm_max,nz_msm_max;
 
   int group_group_enable;         // 1 if style supports group/group calculation
 
   unsigned int datamask;
   unsigned int datamask_ext;
 
   int compute_flag;               // 0 if skip compute()
   int fftbench;                   // 0 if skip FFT timing
 
   KSpace(class LAMMPS *, int, char **);
   virtual ~KSpace();
   void modify_params(int, char **);
   void *extract(const char *);
   void compute_dummy(int, int);
 
   // general child-class methods
 
   virtual void init() = 0;
   virtual void setup() = 0;
   virtual void setup_grid() {};
   virtual void compute(int, int) = 0;
   virtual void compute_group_group(int, int, int) {};
 
   virtual void pack_forward(int, FFT_SCALAR *, int, int *) {};
   virtual void unpack_forward(int, FFT_SCALAR *, int, int *) {};
   virtual void pack_reverse(int, FFT_SCALAR *, int, int *) {};
   virtual void unpack_reverse(int, FFT_SCALAR *, int, int *) {};
 
   virtual int timing(int, double &, double &) {return 0;}
   virtual int timing_1d(int, double &) {return 0;}
   virtual int timing_3d(int, double &) {return 0;}
   virtual double memory_usage() {return 0.0;}
 
 /* ----------------------------------------------------------------------
    compute gamma for MSM and pair styles
    see Eq 4 from Parallel Computing 35 (2009) 164177
 ------------------------------------------------------------------------- */
 
   double gamma(const double &rho) const {
     if (rho <= 1.0) {
       const int split_order = order/2;
       const double rho2 = rho*rho;
       double g = gcons[split_order][0];
       double rho_n = rho2;
       for (int n=1; n<=split_order; n++) {
         g += gcons[split_order][n]*rho_n;
         rho_n *= rho2;
       }
       return g;
     } else
       return (1.0/rho);
   }
 
 /* ----------------------------------------------------------------------
    compute the derivative of gamma for MSM and pair styles
    see Eq 4 from Parallel Computing 35 (2009) 164-177
 ------------------------------------------------------------------------- */
 
   double dgamma(const double &rho) const {
     if (rho <= 1.0) {
       const int split_order = order/2;
       const double rho2 = rho*rho;
       double dg = dgcons[split_order][0]*rho;
       double rho_n = rho*rho2;
       for (int n=1; n<split_order; n++) {
         dg += dgcons[split_order][n]*rho_n;
         rho_n *= rho2;
       }
       return dg;
     } else
       return (-1.0/rho/rho);
   }
 
  protected:
   int gridflag,gridflag_6;
   int gewaldflag,gewaldflag_6;
   int order,order_6;
   int minorder,overlap_allowed;
   int differentiation_flag;
   int slabflag;
   int adjust_cutoff_flag;
   int suffix_flag;                  // suffix compatibility flag
   double scale;
   double slab_volfactor;
   double **gcons,**dgcons;          // accumulated per-atom energy/virial
 
   double accuracy;                  // accuracy of KSpace solver (force units)
   double accuracy_absolute;         // user-specifed accuracy in force units
   double accuracy_relative;         // user-specified dimensionless accuracy
                                     // accurary = acc_rel * two_charge_force
   double two_charge_force;          // force in user units of two point
                                     // charges separated by 1 Angstrom
 
   int evflag,evflag_atom;
   int eflag_either,eflag_global,eflag_atom;
   int vflag_either,vflag_global,vflag_atom;
   int maxeatom,maxvatom;
 
   void pair_check();
   void ev_setup(int, int);
   double estimate_table_accuracy(double, double);
 };
 
 }
 
 #endif
 
 /* ERROR/WARNING messages:
 
 E: KSpace solver requires a pair style
 
-UNDOCUMENTED
+No pair style is defined.
 
 E: KSpace style is incompatible with Pair style
 
-UNDOCUMENTED
+The Ewald and PPPM solvers work with pair styles with a "coul/long" in
+their name.  The MSM solver with pair styles with a "coul/msm" in
+their name, The dispersion solvers with pair styles with a "lj/long"
+in the name.  The TIP4P solvers with pair styles with a "tip4p" in the
+name.
 
 W: For better accuracy use 'pair_modify table 0'
 
 The user-specified force accuracy cannot be achieved unless the table
 feature is disabled by using 'pair_modify table 0'.
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Bad kspace_modify slab parameter
 
 Kspace_modify value for the slab/volume keyword must be >= 2.0.
 
 W: Kspace_modify slab param < 2.0 may cause unphysical behavior
 
 The kspace_modify slab parameter should be larger to insure periodic
 grids padded with empty space do not overlap.
 
 */
diff --git a/src/lammps.h b/src/lammps.h
index 18cc21f5d..0feb015b3 100644
--- a/src/lammps.h
+++ b/src/lammps.h
@@ -1,157 +1,153 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifndef LMP_LAMMPS_H
 #define LMP_LAMMPS_H
 
 #include "stdio.h"
 
 namespace LAMMPS_NS {
 
 class LAMMPS {
  public:
                                  // ptrs to fundamental LAMMPS classes
   class Memory *memory;          // memory allocation functions
   class Error *error;            // error handling
   class Universe *universe;      // universe of processors
   class Input *input;            // input script processing
                                  // ptrs to top-level LAMMPS-specific classes
   class Atom *atom;              // atom-based quantities
   class Update *update;          // integrators/minimizers
   class Neighbor *neighbor;      // neighbor lists
   class Comm *comm;              // inter-processor communication
   class Domain *domain;          // simulation box
   class Force *force;            // inter-particle forces
   class Modify *modify;          // fixes and computes
   class Group *group;            // groups of atoms
   class Output *output;          // thermo/dump/restart
   class Timer *timer;            // CPU timing info
 
   MPI_Comm world;                // MPI communicator
   FILE *infile;                  // infile
   FILE *screen;                  // screen output
   FILE *logfile;                 // logfile
 
   char *suffix;                  // suffix to add to input script style names
   int suffix_enable;             // 1 if suffix enabled, 0 if disabled
   class Cuda *cuda;              // CUDA accelerator class
 
   LAMMPS(int, char **, MPI_Comm);
   ~LAMMPS();
   void create();
   void post_create();
   void init();
   void destroy();
 
   void print_styles();
 };
 
 }
 
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Invalid command-line argument
 
 One or more command-line arguments is invalid.  Check the syntax of
 the command you are using to launch LAMMPS.
 
 E: Cannot use -reorder after -partition
 
 Self-explanatory.  See doc page discussion of command-line switches.
 
 E: Processor partitions are inconsistent
 
 The total number of processors in all partitions must match the number
 of processors LAMMPS is running on.
 
 E: Must use -in switch with multiple partitions
 
 A multi-partition simulation cannot read the input script from stdin.
 The -in command-line option must be used to specify a file.
 
 E: Can only use -pscreen with multiple partitions
 
 Self-explanatory.  See doc page discussion of command-line switches.
 
 E: Can only use -plog with multiple partitions
 
 Self-explanatory.  See doc page discussion of command-line switches.
 
 E: Cannot open universe screen file
 
 For a multi-partition run, the master screen file cannot be opened.
 Check that the directory you are running in allows for files to be
 created.
 
 E: Cannot open log.lammps
 
 The default LAMMPS log file cannot be opened.  Check that the
 directory you are running in allows for files to be created.
 
 E: Cannot open universe log file
 
 For a multi-partition run, the master log file cannot be opened.
 Check that the directory you are running in allows for files to be
 created.
 
 E: Cannot open input script %s
 
 Self-explanatory.
 
 E: Cannot open screen file
 
 The screen file specified as a command-line argument cannot be
 opened.  Check that the directory you are running in allows for files
 to be created.
 
 E: Cannot open logfile
 
 The LAMMPS log file named in a command-line argument cannot be opened.
 Check that the path and name are correct.
 
 E: Smallint setting in lmptype.h is invalid
 
 It has to be the size of an integer.
 
 E: Tagint setting in lmptype.h is invalid
 
 Tagint must be as large or larger than smallint.
 
 E: Bigint setting in lmptype.h is invalid
 
 Size of bigint is less than size of tagint.
 
 E: MPI_LMP_TAGINT and tagint in lmptype.h are not compatible
 
 The size of the MPI datatype does not match the size of a tagint.
 
 E: MPI_LMP_BIGINT and bigint in lmptype.h are not compatible
 
 The size of the MPI datatype does not match the size of a bigint.
 
 E: Small, tag, big integers are not sized correctly
 
 See description of these 3 data types in src/lmptype.h.
 
 E: Cannot use -cuda on without USER-CUDA installed
 
 The USER-CUDA package must be installed via "make yes-user-cuda"
 before LAMMPS is built.
 
-U: 64-bit atom IDs are not yet supported
-
-See description of this data type in src/lmptype.h.
-
 */
diff --git a/src/library.h b/src/library.h
index f0666b354..f78c68a3c 100644
--- a/src/library.h
+++ b/src/library.h
@@ -1,58 +1,61 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /*
    C or Fortran style library interface to LAMMPS
    new LAMMPS-specific functions can be added
 */
 
 #include "mpi.h"
 
 /* ifdefs allow this file to be included in a C program */
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 void lammps_open(int, char **, MPI_Comm, void **);
 void lammps_open_no_mpi(int, char **, void **);
 void lammps_close(void *);
 void lammps_file(void *, char *);
 char *lammps_command(void *, char *);
 void lammps_free(void *);
 
 void *lammps_extract_global(void *, char *);
 void *lammps_extract_atom(void *, char *);
 void *lammps_extract_compute(void *, char *, int, int);
 void *lammps_extract_fix(void *, char *, int, int, int, int);
 void *lammps_extract_variable(void *, char *, char *);
 
 int lammps_get_natoms(void *);
 void lammps_gather_atoms(void *, char *, int, int, void *);
 void lammps_scatter_atoms(void *, char *, int, int, void *);
 
 #ifdef __cplusplus
 }
 #endif
 
 /* ERROR/WARNING messages:
 
 W: Library error in lammps_gather_atoms
 
-UNDOCUMENTED
+This library function cannot be used if atom IDs are not defined
+or are not consecutively numbered.
 
 W: Library error in lammps_scatter_atoms
 
-UNDOCUMENTED
+This library function cannot be used if atom IDs are not defined or
+are not consecutively numbered, or if no atom map is defined.  See the
+atom_modify command for details about atom maps.
 
 */
diff --git a/src/neigh_bond.h b/src/neigh_bond.h
index eddd57bc5..ee752038c 100644
--- a/src/neigh_bond.h
+++ b/src/neigh_bond.h
@@ -1,55 +1,61 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ERROR/WARNING messages:
 
 E: Bond atoms %d %d missing on proc %d at step %ld
 
 The 2nd atom needed to compute a particular bond is missing on this
 processor.  Typically this is because the pairwise cutoff is set too
 short or the bond has blown apart and an atom is too far away.
 
 E: Bond extent > half of periodic box length
 
-UNDOCUMENTED
+This error was detected by the neigh_modify check yes setting.  It is
+an error because the bond atoms are so far apart it is ambiguous how
+it should be defined.
 
 E: Angle atoms %d %d %d missing on proc %d at step %ld
 
 One or more of 3 atoms needed to compute a particular angle are
 missing on this processor.  Typically this is because the pairwise
 cutoff is set too short or the angle has blown apart and an atom is
 too far away.
 
 E: Angle extent > half of periodic box length
 
-UNDOCUMENTED
+This error was detected by the neigh_modify check yes setting.  It is
+an error because the angle atoms are so far apart it is ambiguous how
+it should be defined.
 
 E: Dihedral atoms %d %d %d %d missing on proc %d at step %ld
 
 One or more of 4 atoms needed to compute a particular dihedral are
 missing on this processor.  Typically this is because the pairwise
 cutoff is set too short or the dihedral has blown apart and an atom is
 too far away.
 
 E: Dihedral/improper extent > half of periodic box length
 
-UNDOCUMENTED
+This error was detected by the neigh_modify check yes setting.  It is
+an error because the dihedral atoms are so far apart it is ambiguous
+how it should be defined.
 
 E: Improper atoms %d %d %d %d missing on proc %d at step %ld
 
 One or more of 4 atoms needed to compute a particular improper are
 missing on this processor.  Typically this is because the pairwise
 cutoff is set too short or the improper has blown apart and an atom is
 too far away.
 
 */
diff --git a/src/neighbor.h b/src/neighbor.h
index d53d80666..2c58eb18d 100644
--- a/src/neighbor.h
+++ b/src/neighbor.h
@@ -1,399 +1,405 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifndef LMP_NEIGHBOR_H
 #define LMP_NEIGHBOR_H
 
 #include "pointers.h"
 
 namespace LAMMPS_NS {
 
 class Neighbor : protected Pointers {
   friend class Cuda;
 
  public:
   int style;                       // 0,1,2 = nsq, bin, multi
   int every;                       // build every this many steps
   int delay;                       // delay build for this many steps
   int dist_check;                  // 0 = always build, 1 = only if 1/2 dist
   int ago;                         // how many steps ago neighboring occurred
   int pgsize;                      // size of neighbor page
   int oneatom;                     // max # of neighbors for one atom
   int includegroup;                // only build pairwise lists for this group
   int build_once;                  // 1 if only build lists once per run
   int cudable;                     // GPU <-> CPU communication flag for CUDA
 
   double skin;                     // skin distance
   double cutneighmin;              // min neighbor cutoff for all type pairs
   double cutneighmax;              // max neighbor cutoff for all type pairs
   double *cuttype;                 // for each type, max neigh cut w/ others
 
   bigint ncalls;                   // # of times build has been called
   bigint ndanger;                  // # of dangerous builds
   bigint lastcall;                 // timestep of last neighbor::build() call
 
   int nrequest;                    // requests for pairwise neighbor lists
   class NeighRequest **requests;   // from Pair, Fix, Compute, Command classes
   int maxrequest;
 
   int old_style;                   // previous run info to avoid
   int old_nrequest;                // re-creation of pairwise neighbor lists
   int old_triclinic;
   class NeighRequest **old_requests;
 
   int nlist;                       // pairwise neighbor lists
   class NeighList **lists;
 
   int nbondlist;                   // list of bonds to compute
   int **bondlist;
   int nanglelist;                  // list of angles to compute
   int **anglelist;
   int ndihedrallist;               // list of dihedrals to compute
   int **dihedrallist;
   int nimproperlist;               // list of impropers to compute
   int **improperlist;
 
   Neighbor(class LAMMPS *);
   virtual ~Neighbor();
   virtual void init();
   int request(void *);              // another class requests a neighbor list
   void print_lists_of_lists();      // debug print out
   int decide();                     // decide whether to build or not
   virtual int check_distance();     // check max distance moved since last build
   void setup_bins();                // setup bins based on box and cutoff
   virtual void build(int topoflag=1);  // create all neighbor lists (pair,bond)
   virtual void build_topology();    // create all topology neighbor lists
   void build_one(int);              // create a single neighbor list
   void set(int, char **);           // set neighbor style and skin distance
   void modify_params(int, char**);  // modify parameters that control builds
   bigint memory_usage();
   int exclude_setting();
 
  protected:
   int me,nprocs;
 
   int maxatom;                     // size of atom-based NeighList arrays
   int maxbond,maxangle,maxdihedral,maximproper;   // size of bond lists
   int maxwt;                       // max weighting factor applied + 1
 
   int must_check;                  // 1 if must check other classes to reneigh
   int restart_check;               // 1 if restart enabled, 0 if no
   int fix_check;                   // # of fixes that induce reneigh
   int *fixchecklist;               // which fixes to check
 
   double **cutneighsq;             // neighbor cutneigh sq for each type pair
   double **cutneighghostsq;        // neighbor cutnsq for each ghost type pair
   double cutneighmaxsq;            // cutneighmax squared
   double *cuttypesq;               // cuttype squared
 
   double triggersq;                // trigger = build when atom moves this dist
   int cluster_check;               // 1 if check bond/angle/etc satisfies minimg
 
   double **xhold;                      // atom coords at last neighbor build
   int maxhold;                         // size of xhold array
   int boxcheck;                        // 1 if need to store box size
   double boxlo_hold[3],boxhi_hold[3];  // box size at last neighbor build
   double corners_hold[8][3];           // box corners at last neighbor build
 
   int nbinx,nbiny,nbinz;           // # of global bins
   int *bins;                       // ptr to next atom in each bin
   int maxbin;                      // size of bins array
 
   int *binhead;                    // ptr to 1st atom in each bin
   int maxhead;                     // size of binhead array
 
   int mbins;                       // # of local bins and offset
   int mbinx,mbiny,mbinz;
   int mbinxlo,mbinylo,mbinzlo;
 
   int binsizeflag;                 // user-chosen bin size
   double binsize_user;
 
   double binsizex,binsizey,binsizez;  // actual bin sizes and inverse sizes
   double bininvx,bininvy,bininvz;
 
   int sx,sy,sz,smax;               // bin stencil extents
 
   int dimension;                   // 2/3 for 2d/3d
   int triclinic;                   // 0 if domain is orthog, 1 if triclinic
   int newton_pair;                 // 0 if newton off, 1 if on for pairwise
 
   double *bboxlo,*bboxhi;          // ptrs to full domain bounding box
   double (*corners)[3];            // ptr to 8 corners of triclinic box
 
   double inner[2],middle[2];       // rRESPA cutoffs for extra lists
   double cut_inner_sq;                   // outer cutoff for inner neighbor list
   double cut_middle_sq;            // outer cutoff for middle neighbor list
   double cut_middle_inside_sq;     // inner cutoff for middle neighbor list
 
   int special_flag[4];             // flags for 1-2, 1-3, 1-4 neighbors
 
   int anyghostlist;                // 1 if any non-occasional list
                                    // stores neighbors of ghosts
 
   int exclude;                     // 0 if no type/group exclusions, 1 if yes
 
   int nex_type;                    // # of entries in type exclusion list
   int maxex_type;                  // max # in type list
   int *ex1_type,*ex2_type;         // pairs of types to exclude
   int **ex_type;                   // 2d array of excluded type pairs
 
   int nex_group;                   // # of entries in group exclusion list
   int maxex_group;                 // max # in group list
   int *ex1_group,*ex2_group;       // pairs of group #'s to exclude
   int *ex1_bit,*ex2_bit;           // pairs of group bits to exclude
 
   int nex_mol;                     // # of entries in molecule exclusion list
   int maxex_mol;                   // max # in molecule list
   int *ex_mol_group;               // molecule group #'s to exclude
   int *ex_mol_bit;                 // molecule group bits to exclude
 
   int nblist,nglist,nslist;    // # of pairwise neigh lists of various kinds
   int *blist;                  // lists to build every reneighboring
   int *glist;                  // lists to grow atom arrays every reneigh
   int *slist;                  // lists to grow stencil arrays every reneigh
 
   void bin_atoms();                     // bin all atoms
   double bin_distance(int, int, int);   // distance between binx
   int coord2bin(double *);              // mapping atom coord to a bin
   int coord2bin(double *, int &, int &, int&); // ditto
 
   int exclusion(int, int, int,
                 int, int *, int *) const;  // test for pair exclusion
 
   virtual void choose_build(int, class NeighRequest *);
   void choose_stencil(int, class NeighRequest *);
 
   // pairwise build functions
 
   typedef void (Neighbor::*PairPtr)(class NeighList *);
   PairPtr *pair_build;
 
   void half_nsq_no_newton(class NeighList *);
   void half_nsq_no_newton_ghost(class NeighList *);
   void half_nsq_newton(class NeighList *);
 
   void half_bin_no_newton(class NeighList *);
   void half_bin_no_newton_ghost(class NeighList *);
   void half_bin_newton(class NeighList *);
   void half_bin_newton_tri(class NeighList *);
 
   void half_multi_no_newton(class NeighList *);
   void half_multi_newton(class NeighList *);
   void half_multi_newton_tri(class NeighList *);
 
   void full_nsq(class NeighList *);
   void full_nsq_ghost(class NeighList *);
   void full_bin(class NeighList *);
   void full_bin_ghost(class NeighList *);
   void full_multi(class NeighList *);
 
   void half_from_full_no_newton(class NeighList *);
   void half_from_full_newton(class NeighList *);
   void skip_from(class NeighList *);
   void skip_from_granular(class NeighList *);
   void skip_from_respa(class NeighList *);
   void copy_from(class NeighList *);
 
   void granular_nsq_no_newton(class NeighList *);
   void granular_nsq_newton(class NeighList *);
   void granular_bin_no_newton(class NeighList *);
   void granular_bin_newton(class NeighList *);
   void granular_bin_newton_tri(class NeighList *);
 
   void respa_nsq_no_newton(class NeighList *);
   void respa_nsq_newton(class NeighList *);
   void respa_bin_no_newton(class NeighList *);
   void respa_bin_newton(class NeighList *);
   void respa_bin_newton_tri(class NeighList *);
 
   // include prototypes for multi-threaded neighbor lists
   // builds or their corresponding dummy versions
 
 #define LMP_INSIDE_NEIGHBOR_H
 #include "accelerator_omp.h"
 #undef LMP_INSIDE_NEIGHBOR_H
 
   // pairwise stencil creation functions
 
   typedef void (Neighbor::*StencilPtr)(class NeighList *, int, int, int);
   StencilPtr *stencil_create;
 
   void stencil_half_bin_2d_no_newton(class NeighList *, int, int, int);
   void stencil_half_ghost_bin_2d_no_newton(class NeighList *, int, int, int);
   void stencil_half_bin_3d_no_newton(class NeighList *, int, int, int);
   void stencil_half_ghost_bin_3d_no_newton(class NeighList *, int, int, int);
   void stencil_half_bin_2d_newton(class NeighList *, int, int, int);
   void stencil_half_bin_3d_newton(class NeighList *, int, int, int);
   void stencil_half_bin_2d_newton_tri(class NeighList *, int, int, int);
   void stencil_half_bin_3d_newton_tri(class NeighList *, int, int, int);
 
   void stencil_half_multi_2d_no_newton(class NeighList *, int, int, int);
   void stencil_half_multi_3d_no_newton(class NeighList *, int, int, int);
   void stencil_half_multi_2d_newton(class NeighList *, int, int, int);
   void stencil_half_multi_3d_newton(class NeighList *, int, int, int);
   void stencil_half_multi_2d_newton_tri(class NeighList *, int, int, int);
   void stencil_half_multi_3d_newton_tri(class NeighList *, int, int, int);
 
   void stencil_full_bin_2d(class NeighList *, int, int, int);
   void stencil_full_ghost_bin_2d(class NeighList *, int, int, int);
   void stencil_full_bin_3d(class NeighList *, int, int, int);
   void stencil_full_ghost_bin_3d(class NeighList *, int, int, int);
   void stencil_full_multi_2d(class NeighList *, int, int, int);
   void stencil_full_multi_3d(class NeighList *, int, int, int);
 
   // topology build functions
 
   typedef void (Neighbor::*BondPtr)();   // ptrs to topology build functions
 
   BondPtr bond_build;                 // ptr to bond list functions
   void bond_all();                    // bond list with all bonds
   void bond_partial();                // exclude certain bonds
   void bond_check();
 
   BondPtr angle_build;                // ptr to angle list functions
   void angle_all();                   // angle list with all angles
   void angle_partial();               // exclude certain angles
   void angle_check();
 
   BondPtr dihedral_build;             // ptr to dihedral list functions
   void dihedral_all();                // dihedral list with all dihedrals
   void dihedral_partial();            // exclude certain dihedrals
   void dihedral_check(int, int **);
 
   BondPtr improper_build;             // ptr to improper list functions
   void improper_all();                // improper list with all impropers
   void improper_partial();            // exclude certain impropers
 
   // find_special: determine if atom j is in special list of atom i
   // if it is not, return 0
   // if it is and special flag is 0 (both coeffs are 0.0), return -1
   // if it is and special flag is 1 (both coeffs are 1.0), return 0
   // if it is and special flag is 2 (otherwise), return 1,2,3
   //   for which level of neighbor it is (and which coeff it maps to)
 
   inline int find_special(const int *list, const int *nspecial,
                           const int tag) const {
     const int n1 = nspecial[0];
     const int n2 = nspecial[1];
     const int n3 = nspecial[2];
 
     for (int i = 0; i < n3; i++) {
       if (list[i] == tag) {
         if (i < n1) {
           if (special_flag[1] == 0) return -1;
           else if (special_flag[1] == 1) return 0;
           else return 1;
         } else if (i < n2) {
           if (special_flag[2] == 0) return -1;
           else if (special_flag[2] == 1) return 0;
           else return 2;
         } else {
           if (special_flag[3] == 0) return -1;
           else if (special_flag[3] == 1) return 0;
           else return 3;
         }
       }
     }
     return 0;
   };
 };
 
 }
 
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Neighbor delay must be 0 or multiple of every setting
 
 The delay and every parameters set via the neigh_modify command are
 inconsistent.  If the delay setting is non-zero, then it must be a
 multiple of the every setting.
 
 E: Neighbor page size must be >= 10x the one atom setting
 
 This is required to prevent wasting too much memory.
 
 E: Invalid atom type in neighbor exclusion list
 
 Atom types must range from 1 to Ntypes inclusive.
 
 W: Neighbor exclusions used with KSpace solver may give inconsistent Coulombic energies
 
-UNDOCUMENTED
+This is because excluding specific pair interactions also excludes
+them from long-range interactions which may not be the desired effect.
+The special_bonds command handles this consistently by insuring
+excluded (or weighted) 1-2, 1-3, 1-4 interactions are treated
+consistently by both the short-range pair style and the long-range
+solver.  This is not done for exclusions of charged atom pairs via the
+neigh_modify exclude command.
 
 E: Neighbor include group not allowed with ghost neighbors
 
 This is a current restriction within LAMMPS.
 
 E: Neighbor multi not yet enabled for ghost neighbors
 
 This is a current restriction within LAMMPS.
 
 E: Neighbor multi not yet enabled for granular
 
 Self-explanatory.
 
 E: Neighbor multi not yet enabled for rRESPA
 
 Self-explanatory.
 
 E: Too many local+ghost atoms for neighbor list
 
 The number of nlocal + nghost atoms on a processor
 is limited by the size of a 32-bit integer with 2 bits
 removed for masking 1-2, 1-3, 1-4 neighbors.
 
 W: Building an occasional neighobr list when atoms may have moved too far
 
 This can cause LAMMPS to crash when the neighbor list is built.
 The solution is to check for building the regular neighbor lists
 more frequently.
 
 E: Domain too large for neighbor bins
 
 The domain has become extremely large so that neighbor bins cannot be
 used.  Most likely, one or more atoms have been blown out of the
 simulation box to a great distance.
 
 E: Cannot use neighbor bins - box size << cutoff
 
 Too many neighbor bins will be created.  This typically happens when
 the simulation box is very small in some dimension, compared to the
 neighbor cutoff.  Use the "nsq" style instead of "bin" style.
 
 E: Too many neighbor bins
 
 This is likely due to an immense simulation box that has blown up
 to a large size.
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Invalid group ID in neigh_modify command
 
 A group ID used in the neigh_modify command does not exist.
 
 E: Neigh_modify include group != atom_modify first group
 
 Self-explanatory.
 
 E: Neigh_modify exclude molecule requires atom attribute molecule
 
 Self-explanatory.
 
 */
diff --git a/src/pair.h b/src/pair.h
index 8d85a0e94..3200f9f75 100644
--- a/src/pair.h
+++ b/src/pair.h
@@ -1,290 +1,290 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifndef LMP_PAIR_H
 #define LMP_PAIR_H
 
 #include "pointers.h"
 
 namespace LAMMPS_NS {
 
 class Pair : protected Pointers {
   friend class AngleSDK;
   friend class AngleSDKOMP;
   friend class BondQuartic;
   friend class BondQuarticOMP;
   friend class DihedralCharmm;
   friend class DihedralCharmmOMP;
   friend class FixGPU;
   friend class FixOMP;
   friend class ThrOMP;
 
  public:
   double eng_vdwl,eng_coul;      // accumulated energies
   double virial[6];              // accumulated virial
   double *eatom,**vatom;         // accumulated per-atom energy/virial
 
   double cutforce;               // max cutoff for all atom pairs
   double **cutsq;                // cutoff sq for each atom pair
   int **setflag;                 // 0/1 = whether each i,j has been set
 
   int comm_forward;              // size of forward communication (0 if none)
   int comm_reverse;              // size of reverse communication (0 if none)
   int comm_reverse_off;          // size of reverse comm even if newton off
 
   int single_enable;             // 1 if single() routine exists
   int restartinfo;               // 1 if pair style writes restart info
   int respa_enable;              // 1 if inner/middle/outer rRESPA routines
   int one_coeff;                 // 1 if allows only one coeff * * call
   int no_virial_fdotr_compute;   // 1 if does not invoke virial_fdotr_compute()
   int ghostneigh;                // 1 if pair style needs neighbors of ghosts
   double **cutghost;             // cutoff for each ghost pair
 
   int ewaldflag;                 // 1 if compatible with Ewald solver
   int pppmflag;                  // 1 if compatible with PPPM solver
   int msmflag;                   // 1 if compatible with MSM solver
   int dispersionflag;            // 1 if compatible with LJ/dispersion solver
   int tip4pflag;                 // 1 if compatible with TIP4P solver
 
   int tail_flag;                 // pair_modify flag for LJ tail correction
   double etail,ptail;            // energy/pressure tail corrections
   double etail_ij,ptail_ij;
 
   int evflag;                    // energy,virial settings
   int eflag_either,eflag_global,eflag_atom;
   int vflag_either,vflag_global,vflag_atom;
 
   int ncoultablebits;            // size of Coulomb table, accessed by KSpace
   double tabinnersq;
   double *rtable,*drtable,*ftable,*dftable,*ctable,*dctable;
   double *etable,*detable,*ptable,*dptable,*vtable,*dvtable;
   int ncoulshiftbits,ncoulmask;
 
   int nextra;                    // # of extra quantities pair style calculates
   double *pvector;               // vector of extra pair quantities
 
   int single_extra;              // number of extra single values calculated
   double *svector;               // vector of extra single quantities
 
   class NeighList *list;         // standard neighbor list used by most pairs
   class NeighList *listhalf;     // half list used by some pairs
   class NeighList *listfull;     // full list used by some pairs
   class NeighList *listgranhistory;  // granular history list used by some pairs
   class NeighList *listinner;    // rRESPA lists used by some pairs
   class NeighList *listmiddle;
   class NeighList *listouter;
 
   unsigned int datamask;
   unsigned int datamask_ext;
 
   int compute_flag;              // 0 if skip compute()
 
   Pair(class LAMMPS *);
   virtual ~Pair();
 
   // top-level Pair methods
 
   void init();
   void reinit();
   double mix_energy(double, double, double, double);
   double mix_distance(double, double);
   void write_file(int, char **);
   void init_bitmap(double, double, int, int &, int &, int &, int &);
   virtual void modify_params(int, char **);
   void compute_dummy(int, int);
 
   // need to be public, so can be called by pair_style reaxc
 
   void v_tally(int, double *, double *);
   void ev_tally(int, int, int, int, double, double, double,
                 double, double, double);
   void ev_tally3(int, int, int, double, double,
                  double *, double *, double *, double *);
   void v_tally3(int, int, int, double *, double *, double *, double *);
   void v_tally4(int, int, int, int, double *, double *, double *,
                 double *, double *, double *);
   void ev_tally_xyz(int, int, int, int, double, double,
                     double, double, double, double, double, double);
 
   // general child-class methods
 
   virtual void compute(int, int) = 0;
   virtual void compute_inner() {}
   virtual void compute_middle() {}
   virtual void compute_outer(int, int) {}
 
   virtual double single(int, int, int, int,
                         double, double, double, 
 			double& fforce) {
     fforce = 0.0;
     return 0.0;
   }
 
   virtual void settings(int, char **) = 0;
   virtual void coeff(int, char **) = 0;
 
   virtual void init_style();
   virtual void init_list(int, class NeighList *);
   virtual double init_one(int, int) {return 0.0;}
 
   virtual void init_tables(double, double *);
   virtual void free_tables();
 
   virtual void write_restart(FILE *) {}
   virtual void read_restart(FILE *) {}
   virtual void write_restart_settings(FILE *) {}
   virtual void read_restart_settings(FILE *) {}
 
   virtual int pack_comm(int, int *, double *, int, int *) {return 0;}
   virtual void unpack_comm(int, int, double *) {}
   virtual int pack_reverse_comm(int, int, double *) {return 0;}
   virtual void unpack_reverse_comm(int, int *, double *) {}
   virtual double memory_usage();
 
   // specific child-class methods for certain Pair styles
 
   virtual void *extract(const char *, int &) {return NULL;}
   virtual void swap_eam(double *, double **) {}
   virtual void reset_dt() {}
   virtual void min_xf_pointers(int, double **, double **) {}
   virtual void min_xf_get(int) {}
   virtual void min_x_set(int) {}
 
   virtual unsigned int data_mask() {return datamask;}
   virtual unsigned int data_mask_ext() {return datamask_ext;}
 
  protected:
   enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER};   // mixing options
 
   int allocated;               // 0/1 = whether arrays are allocated
   int suffix_flag;             // suffix compatibility flag
 
                                        // pair_modify settings
   int offset_flag,mix_flag;            // flags for offset and mixing
   double tabinner;                     // inner cutoff for Coulomb table
 
   // custom data type for accessing Coulomb tables
 
   typedef union {int i; float f;} union_int_float_t;
 
   double THIRD;
 
   int vflag_fdotr;
   int maxeatom,maxvatom;
 
   virtual void ev_setup(int, int);
   void ev_unset();
   void ev_tally_full(int, double, double, double, double, double, double);
   void ev_tally_xyz_full(int, double, double,
                          double, double, double, double, double, double);
   void ev_tally4(int, int, int, int, double,
                  double *, double *, double *, double *, double *, double *);
   void ev_tally_tip4p(int, int *, double *, double, double);
   void v_tally2(int, int, double, double *);
   void v_tally_tensor(int, int, int, int,
                       double, double, double, double, double, double);
   void virial_fdotr_compute();
 
   inline int sbmask(int j) {
     return j >> SBBITS & 3;
   }
 };
 
 }
 
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Too many total bits for bitmapped lookup table
 
 Table size specified via pair_modify command is too large.  Note that
 a value of N generates a 2^N size table.
 
 E: Cannot have both pair_modify shift and tail set to yes
 
 These 2 options are contradictory.
 
 E: Cannot use pair tail corrections with 2d simulations
 
 The correction factors are only currently defined for 3d systems.
 
 W: Using pair tail corrections with nonperiodic system
 
 This is probably a bogus thing to do, since tail corrections are
 computed by integrating the density of a periodic system out to
 infinity.
 
 E: All pair coeffs are not set
 
 All pair coefficients must be set in the data file or by the
 pair_coeff command before running a simulation.
 
 E: Pair style requres a KSpace style
 
-UNDOCUMENTED
+No kspace style is defined.
 
 E: Pair style does not support pair_write
 
 The pair style does not have a single() function, so it can
 not be invoked by pair write.
 
 E: Invalid atom types in pair_write command
 
 Atom types must range from 1 to Ntypes inclusive.
 
 E: Invalid style in pair_write command
 
 Self-explanatory.  Check the input script.
 
 E: Invalid cutoffs in pair_write command
 
 Inner cutoff must be larger than 0.0 and less than outer cutoff.
 
 E: Cannot open pair_write file
 
 The specified output file for pair energies and forces cannot be
 opened.  Check that the path and name are correct.
 
 E: Bitmapped lookup tables require int/float be same size
 
 Cannot use pair tables on this machine, because of word sizes.  Use
 the pair_modify command with table 0 instead.
 
 W: Table inner cutoff >= outer cutoff
 
 You specified an inner cutoff for a Coulombic table that is longer
 than the global cutoff.  Probably not what you wanted.
 
 E: Too many exponent bits for lookup table
 
 Table size specified via pair_modify command does not work with your
 machine's floating point representation.
 
 E: Too many mantissa bits for lookup table
 
 Table size specified via pair_modify command does not work with your
 machine's floating point representation.
 
 E: Too few bits for lookup table
 
 Table size specified via pair_modify command does not work with your
 machine's floating point representation.
 
 */
diff --git a/src/pair_coul_dsf.h b/src/pair_coul_dsf.h
index 6fb544937..69825c16b 100644
--- a/src/pair_coul_dsf.h
+++ b/src/pair_coul_dsf.h
@@ -1,70 +1,72 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(coul/dsf,PairCoulDSF)
 
 #else
 
 #ifndef LMP_PAIR_COUL_DSF_H
 #define LMP_PAIR_COUL_DSF_H
 
 #include "pair.h"
 
 namespace LAMMPS_NS {
 
 class PairCoulDSF : public Pair {
  public:
   PairCoulDSF(class LAMMPS *);
   ~PairCoulDSF();
   void compute(int, int);
   void settings(int, char **);
   void coeff(int, char **);
   void init_style();
   double init_one(int, int);
   void write_restart(FILE *);
   void read_restart(FILE *);
   void write_restart_settings(FILE *);
   void read_restart_settings(FILE *);
   double single(int, int, int, int, double, double, double, double &);
   void *extract(const char *, int &);
 
  protected:
   double cut_coul,cut_coulsq;
   double alpha;
   double f_shift,e_shift;
   
   void allocate();
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
-UNDOCUMENTED
+Self-explanatory.  Check the input script syntax and compare to the
+documentation for the command.  You can use -echo screen as a
+command-line option when running LAMMPS to see the offending line.
 
 E: Incorrect args for pair coefficients
 
-UNDOCUMENTED
+Self-explanatory.  Check the input script or data file.
 
 E: Pair style coul/dsf requires atom attribute q
 
 The atom style defined does not have this attribute.
 
 */
diff --git a/src/pair_lj_cut_coul_dsf.h b/src/pair_lj_cut_coul_dsf.h
index 9e59b5400..9a0fa49d4 100644
--- a/src/pair_lj_cut_coul_dsf.h
+++ b/src/pair_lj_cut_coul_dsf.h
@@ -1,75 +1,77 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(lj/cut/coul/dsf,PairLJCutCoulDSF)
 
 #else
 
 #ifndef LMP_PAIR_LJ_CUT_COUL_DSF_H
 #define LMP_PAIR_LJ_CUT_COUL_DSF_H
 
 #include "pair.h"
 
 namespace LAMMPS_NS {
 
 class PairLJCutCoulDSF : public Pair {
  public:
   PairLJCutCoulDSF(class LAMMPS *);
   ~PairLJCutCoulDSF();
   void compute(int, int);
   void settings(int, char **);
   void coeff(int, char **);
   void init_style();
   double init_one(int, int);
   void write_restart(FILE *);
   void read_restart(FILE *);
   void write_restart_settings(FILE *);
   void read_restart_settings(FILE *);
   double single(int, int, int, int, double, double, double, double &);
   void *extract(const char *, int &);
 
  protected:
   double cut_lj_global;
   double **cut_lj,**cut_ljsq;
   double **epsilon,**sigma;
   double **lj1,**lj2,**lj3,**lj4,**offset;
   
   double cut_coul,cut_coulsq;
   double alpha;
   double f_shift,e_shift;
   
   void allocate();
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
-UNDOCUMENTED
+Self-explanatory.  Check the input script syntax and compare to the
+documentation for the command.  You can use -echo screen as a
+command-line option when running LAMMPS to see the offending line.
 
 E: Incorrect args for pair coefficients
 
-UNDOCUMENTED
+Self-explanatory.  Check the input script or data file.
 
 E: Pair style lj/cut/coul/dsf requires atom attribute q
 
-UNDOCUMENTED
+The atom style defined does not have these attributes.
 
 */
diff --git a/src/read_data.h b/src/read_data.h
index 868193b6c..4788cdacf 100644
--- a/src/read_data.h
+++ b/src/read_data.h
@@ -1,422 +1,423 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef COMMAND_CLASS
 
 CommandStyle(read_data,ReadData)
 
 #else
 
 #ifndef LMP_READ_DATA_H
 #define LMP_READ_DATA_H
 
 #include "stdio.h"
 #include "pointers.h"
 
 namespace LAMMPS_NS {
 
 class ReadData : protected Pointers {
  public:
   ReadData(class LAMMPS *);
   ~ReadData();
   void command(int, char **);
 
  private:
   int me;
   char *line,*keyword,*buffer;
   FILE *fp;
   int narg,maxarg,compressed;
   char **arg;
 
   int nfix;           // # of extra fixes that process/store info in data file
   int *fix_index;
   char **fix_header;
   char **fix_section;
 
   bigint nellipsoids;
   class AtomVecEllipsoid *avec_ellipsoid;
   bigint nlines;
   class AtomVecLine *avec_line;
   bigint ntris;
   class AtomVecTri *avec_tri;
   bigint nbodies;
   class AtomVecBody *avec_body;
 
   void open(char *);
   void scan(int &, int &, int &, int &);
   int reallocate(int **, int, int);
   void header(int);
   void parse_keyword(int, int);
   void skip_lines(int);
   void parse_coeffs(char *, const char *, int);
 
   void atoms();
   void velocities();
   void bonus(bigint, class AtomVec *, const char *);
   void bodies();
 
   void bonds();
   void angles();
   void dihedrals();
   void impropers();
 
   void mass();
   void paircoeffs();
   void bondcoeffs();
   void anglecoeffs(int);
   void dihedralcoeffs(int);
   void impropercoeffs(int);
 
   void fix(int, char *, bigint);
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Cannot read_data after simulation box is defined
 
 The read_data command cannot be used after a read_data,
 read_restart, or create_box command.
 
 E: Cannot run 2d simulation with nonperiodic Z dimension
 
 Use the boundary command to make the z dimension periodic in order to
 run a 2d simulation.
 
 E: Fix ID for read_data does not exist
 
 Self-explanatory.
 
 E: Must read Atoms before Velocities
 
 The Atoms section of a data file must come before a Velocities
 section.
 
 E: Invalid data file section: Ellipsoids
 
 Atom style does not allow ellipsoids.
 
 E: Must read Atoms before Ellipsoids
 
 The Atoms section of a data file must come before a Ellipsoids
 section.
 
 E: Invalid data file section: Lines
 
 Atom style does not allow lines.
 
 E: Must read Atoms before Lines
 
 The Atoms section of a data file must come before a Lines section.
 
 E: Invalid data file section: Triangles
 
 Atom style does not allow triangles.
 
 E: Must read Atoms before Triangles
 
 The Atoms section of a data file must come before a Triangles section.
 
 E: Invalid data file section: Bodies
 
-UNDOCUMENTED
+Atom style does not allow bodies.
 
 E: Must read Atoms before Bodies
 
-UNDOCUMENTED
+The Atoms section of a data file must come before a Bodies section.
 
 E: Invalid data file section: Bonds
 
 Atom style does not allow bonds.
 
 E: Must read Atoms before Bonds
 
 The Atoms section of a data file must come before a Bonds section.
 
 E: Invalid data file section: Angles
 
 Atom style does not allow angles.
 
 E: Must read Atoms before Angles
 
 The Atoms section of a data file must come before an Angles section.
 
 E: Invalid data file section: Dihedrals
 
 Atom style does not allow dihedrals.
 
 E: Must read Atoms before Dihedrals
 
 The Atoms section of a data file must come before a Dihedrals section.
 
 E: Invalid data file section: Impropers
 
 Atom style does not allow impropers.
 
 E: Must read Atoms before Impropers
 
 The Atoms section of a data file must come before an Impropers
 section.
 
 E: Must define pair_style before Pair Coeffs
 
 Must use a pair_style command before reading a data file that defines
 Pair Coeffs.
 
 E: Invalid data file section: Bond Coeffs
 
 Atom style does not allow bonds.
 
 E: Must define bond_style before Bond Coeffs
 
 Must use a bond_style command before reading a data file that
 defines Bond Coeffs.
 
 E: Invalid data file section: Angle Coeffs
 
 Atom style does not allow angles.
 
 E: Must define angle_style before Angle Coeffs
 
 Must use an angle_style command before reading a data file that
 defines Angle Coeffs.
 
 E: Invalid data file section: Dihedral Coeffs
 
 Atom style does not allow dihedrals.
 
 E: Must define dihedral_style before Dihedral Coeffs
 
 Must use a dihedral_style command before reading a data file that
 defines Dihedral Coeffs.
 
 E: Invalid data file section: Improper Coeffs
 
 Atom style does not allow impropers.
 
 E: Must define improper_style before Improper Coeffs
 
 Must use an improper_style command before reading a data file that
 defines Improper Coeffs.
 
 E: Invalid data file section: BondBond Coeffs
 
 Atom style does not allow angles.
 
 E: Must define angle_style before BondBond Coeffs
 
 Must use an angle_style command before reading a data file that
 defines Angle Coeffs.
 
 E: Invalid data file section: BondAngle Coeffs
 
 Atom style does not allow angles.
 
 E: Must define angle_style before BondAngle Coeffs
 
 Must use an angle_style command before reading a data file that
 defines Angle Coeffs.
 
 E: Invalid data file section: MiddleBondTorsion Coeffs
 
 Atom style does not allow dihedrals.
 
 E: Must define dihedral_style before MiddleBondTorsion Coeffs
 
 Must use a dihedral_style command before reading a data file that
 defines MiddleBondTorsion Coeffs.
 
 E: Invalid data file section: EndBondTorsion Coeffs
 
 Atom style does not allow dihedrals.
 
 E: Must define dihedral_style before EndBondTorsion Coeffs
 
 Must use a dihedral_style command before reading a data file that
 defines EndBondTorsion Coeffs.
 
 E: Invalid data file section: AngleTorsion Coeffs
 
 Atom style does not allow dihedrals.
 
 E: Must define dihedral_style before AngleTorsion Coeffs
 
 Must use a dihedral_style command before reading a data file that
 defines AngleTorsion Coeffs.
 
 E: Invalid data file section: AngleAngleTorsion Coeffs
 
 Atom style does not allow dihedrals.
 
 E: Must define dihedral_style before AngleAngleTorsion Coeffs
 
 Must use a dihedral_style command before reading a data file that
 defines AngleAngleTorsion Coeffs.
 
 E: Invalid data file section: BondBond13 Coeffs
 
 Atom style does not allow dihedrals.
 
 E: Must define dihedral_style before BondBond13 Coeffs
 
 Must use a dihedral_style command before reading a data file that
 defines BondBond13 Coeffs.
 
 E: Invalid data file section: AngleAngle Coeffs
 
 Atom style does not allow impropers.
 
 E: Must define improper_style before AngleAngle Coeffs
 
 Must use an improper_style command before reading a data file that
 defines AngleAngle Coeffs.
 
 E: Unknown identifier in data file: %s
 
 A section of the data file cannot be read by LAMMPS.
 
 E: No atoms in data file
 
 The header of the data file indicated that atoms would be included,
 but they were not present.
 
 E: Unexpected end of data file
 
 LAMMPS hit the end of the data file while attempting to read a
 section.  Something is wrong with the format of the data file.
 
 E: No ellipsoids allowed with this atom style
 
 Self-explanatory.  Check data file.
 
 E: No lines allowed with this atom style
 
 Self-explanatory.  Check data file.
 
 E: No triangles allowed with this atom style
 
 Self-explanatory.  Check data file.
 
 E: No bodies allowed with this atom style
 
-UNDOCUMENTED
+Self-explanatory.  Check data file.
 
 E: System in data file is too big
 
 See the setting for bigint in the src/lmptype.h file.
 
 E: No bonds allowed with this atom style
 
 Self-explanatory.  Check data file.
 
 E: No angles allowed with this atom style
 
 Self-explanatory.  Check data file.
 
 E: No dihedrals allowed with this atom style
 
 Self-explanatory.  Check data file.
 
 E: No impropers allowed with this atom style
 
 Self-explanatory.  Check data file.
 
 E: Bonds defined but no bond types
 
 The data file header lists bonds but no bond types.
 
 E: Angles defined but no angle types
 
 The data file header lists angles but no angle types.
 
 E: Dihedrals defined but no dihedral types
 
 The data file header lists dihedrals but no dihedral types.
 
 E: Impropers defined but no improper types
 
 The data file header lists improper but no improper types.
 
 E: Did not assign all atoms correctly
 
 Atoms read in from a data file were not assigned correctly to
 processors.  This is likely due to some atom coordinates being
 outside a non-periodic simulation box.
 
 E: Invalid atom ID in Atoms section of data file
 
 Atom IDs must be positive integers.
 
 E: Too many lines in one body in data file - boost MAXBODY
 
-UNDOCUMENTED
+MAXBODY is a setting at the top of the src/read_data.cpp file.
+Set it larger and re-compile the code.
 
 E: Bonds assigned incorrectly
 
 Bonds read in from the data file were not assigned correctly to atoms.
 This means there is something invalid about the topology definitions.
 
 E: Angles assigned incorrectly
 
 Angles read in from the data file were not assigned correctly to
 atoms.  This means there is something invalid about the topology
 definitions.
 
 E: Dihedrals assigned incorrectly
 
 Dihedrals read in from the data file were not assigned correctly to
 atoms.  This means there is something invalid about the topology
 definitions.
 
 E: Impropers assigned incorrectly
 
 Impropers read in from the data file were not assigned correctly to
 atoms.  This means there is something invalid about the topology
 definitions.
 
 E: Molecular data file has too many atoms
 
 These kids of data files are currently limited to a number
 of atoms that fits in a 32-bit integer.
 
 E: Needed topology not in data file
 
 The header of the data file indicated that bonds or angles or
 dihedrals or impropers would be included, but they were not present.
 
 E: Needed bonus data not in data file
 
 Some atom styles require bonus data.  See the read_data doc page for
 details.
 
 E: Cannot open gzipped file
 
 LAMMPS is attempting to open a gzipped version of the specified file
 but was unsuccessful.  Check that the path and name are correct.
 
 E: Cannot open file %s
 
 The specified file cannot be opened.  Check that the path and name are
 correct.
 
 */
diff --git a/src/read_dump.h b/src/read_dump.h
index aa5ad9692..66d78b93d 100644
--- a/src/read_dump.h
+++ b/src/read_dump.h
@@ -1,151 +1,152 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 
    Contributed by Timothy Sirk
 ------------------------------------------------------------------------- */
 
 #ifdef COMMAND_CLASS
 
 CommandStyle(read_dump,ReadDump)
 
 #else
 
 #ifndef LMP_READ_DUMP_H
 #define LMP_READ_DUMP_H
 
 #include "stdio.h"
 #include "pointers.h"
 
 namespace LAMMPS_NS {
 
 class ReadDump : protected Pointers {
  public:
   ReadDump(class LAMMPS *);
   ~ReadDump();
   void command(int, char **);
 
   void store_files(int, char **);
   void setup_reader(int, char **);
   bigint seek(bigint, int);
   void header(int);
   bigint next(bigint, bigint, int, int);
   void atoms();
   int fields_and_keywords(int, char **);
 
 private:
   int me,nprocs;
   FILE *fp;
 
   int dimension;
   int triclinic;
 
   int nfile;               // # of dump files to process
   char **files;            // list of file names
   int currentfile;         // currently open file
 
   int boxflag;             // overwrite simulation with dump file box params
   int replaceflag,addflag; // flags for processing dump snapshot atoms
   int trimflag,purgeflag;
   int scaledflag;          // user setting for coordinate scaling
   int scaled;              // actual setting for coordinate scaling
   char *readerstyle;       // style of dump files to read
 
   int nfield;              // # of fields to extract from dump file
   int *fieldtype;          // type of each field = X,VY,IZ,etc
   char **fieldlabel;       // user specified label for field
   double **fields;         // per-atom field values
 
   double box[3][3];         // dump file box parameters
   double xlo,xhi,ylo,yhi,zlo,zhi,xy,xz,yz;  // dump snapshot box params
   double xprd,yprd,zprd;
 
   bigint nsnapatoms;        // # of atoms in dump file shapshot
 
   int npurge,nreplace,ntrim,nadd;     // stats on processed atoms
   int addproc;                        // proc that should add next atom
   int yindex,zindex;                  // field index for Y,Z coords
 
   int *uflag;               // set to 1 if snapshot atom matches owned atom
   int *ucflag,*ucflag_all;  // set to 1 if snapshot chunk atom was processed
 
   class Reader *reader;           // class that reads dump file
 
   void process_atoms(int);
   void delete_atoms();
 
   double xfield(int, int);
   double yfield(int, int);
   double zfield(int, int);
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Read_dump command before simulation box is defined
 
-UNDOCUMENTED
+The read_dump command cannot be used before a read_data, read_restart,
+or create_box command.
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Dump file does not contain requested snapshot
 
 Self-explanatory.
 
 E: Invalid dump reader style
 
 Self-explanatory.
 
 E: No box information in dump. You have to use 'box no'
 
 Self-explanatory.
 
 E: Read_dump triclinic status does not match simulation
 
 Both the dump snapshot and the current LAMMPS simulation must
 be using either an orthogonal or triclinic box.
 
 E: Read_dump field not found in dump file
 
 Self-explanatory.
 
 E: Read_dump x,y,z fields do not have consistent scaling
 
 Self-explanatory.
 
 E: All read_dump x,y,z fields must be specified for scaled, triclinic coords
 
 For triclinic boxes and scaled coordinates you must specify all 3 of
 the x,y,z fields, else LAMMPS cannot reconstruct the unscaled
 coordinates.
 
 E: Too many total atoms
 
 See the setting for bigint in the src/lmptype.h file.
 
 E: Duplicate fields in read_dump command
 
 Self-explanatory.
 
 E: If read_dump purges it cannot replace or trim
 
 These operations are not compatible.  See the read_dump doc
 page for details.
 
 */
diff --git a/src/region_cylinder.h b/src/region_cylinder.h
index 8936fe1de..67dcfc3a9 100644
--- a/src/region_cylinder.h
+++ b/src/region_cylinder.h
@@ -1,81 +1,81 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef REGION_CLASS
 
 RegionStyle(cylinder,RegCylinder)
 
 #else
 
 #ifndef LMP_REGION_CYLINDER_H
 #define LMP_REGION_CYLINDER_H
 
 #include "region.h"
 
 namespace LAMMPS_NS {
 
 class RegCylinder : public Region {
   friend class FixPour;
 
  public:
   RegCylinder(class LAMMPS *, int, char **);
   ~RegCylinder();
   void init();
   int inside(double, double, double);
   int surface_interior(double *, double);
   int surface_exterior(double *, double);
   void shape_update();
 
  private:
   char axis;
   double c1,c2;
   double radius;
   double lo,hi;
   int rstyle,rvar;
   char *rstr;
 
   void variable_check();
 
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Cannot use region INF or EDGE when box does not exist
 
 Regions that extend to the box boundaries can only be used after the
 create_box command has been used.
 
 E: Variable evaluation in region gave bad value
 
-UNDOCUMENTED
+Variable returned a radius < 0.0.
 
 E: Variable name for region cylinder does not exist
 
-UNDOCUMENTED
+Self-explanatory.
 
 E: Variable for region cylinder is invalid style
 
-UNDOCUMENTED
+Only equal-style varaibles are allowed.
 
 */
diff --git a/src/region_sphere.h b/src/region_sphere.h
index 27ebfb851..a9a6accaf 100644
--- a/src/region_sphere.h
+++ b/src/region_sphere.h
@@ -1,71 +1,71 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef REGION_CLASS
 
 RegionStyle(sphere,RegSphere)
 
 #else
 
 #ifndef LMP_REGION_SPHERE_H
 #define LMP_REGION_SPHERE_H
 
 #include "region.h"
 
 namespace LAMMPS_NS {
 
 class RegSphere : public Region {
  public:
   RegSphere(class LAMMPS *, int, char **);
   ~RegSphere();
   void init();
   int inside(double, double, double);
   int surface_interior(double *, double);
   int surface_exterior(double *, double);
   void shape_update();
 
  private:
   double xc,yc,zc;
   double radius;
   int rstyle,rvar;
   char *rstr;
 
   void variable_check();
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Variable evaluation in region gave bad value
 
-UNDOCUMENTED
+Variable returned a radius < 0.0.
 
 E: Variable name for region sphere does not exist
 
-UNDOCUMENTED
+Self-explanatory.
 
 E: Variable for region sphere is invalid style
 
-UNDOCUMENTED
+Only equal-style varaibles are allowed.
 
 */
diff --git a/src/variable.h b/src/variable.h
index 1d7ff8395..10ae12827 100644
--- a/src/variable.h
+++ b/src/variable.h
@@ -1,387 +1,390 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifndef LMP_VARIABLE_H
 #define LMP_VARIABLE_H
 
 #include "stdlib.h"
 #include "pointers.h"
 
 namespace LAMMPS_NS {
 
 class Variable : protected Pointers {
  public:
   Variable(class LAMMPS *);
   ~Variable();
   void set(int, char **);
   void set(char *, int, char **);
   int next(int, char **);
   int find(char *);
   int equalstyle(int);
   int atomstyle(int);
   char *retrieve(char *);
   double compute_equal(int);
   double compute_equal(char *);
   void compute_atom(int, int, double *, int, int);
   int int_between_brackets(char *&);
   double evaluate_boolean(char *);
 
   unsigned int data_mask(int ivar);
   unsigned int data_mask(char *str);
 
  private:
   int nvar;                // # of defined variables
   int maxvar;              // max # of variables arrays can hold
   char **names;            // name of each variable
   int *style;              // style of each variable
   int *num;                // # of values for each variable
   int *which;              // next available value for each variable
   int *pad;                // 1 = pad loop/uloop variables with 0s, 0 = no pad
   class VarReader **reader;   // variable that reads lines from file
   char ***data;            // str value of each variable's values
 
   int *eval_in_progress;   // flag if evaluation of variable is in progress
 
   class RanMars *randomequal;   // random number generator for equal-style vars
   class RanMars *randomatom;    // random number generator for atom-style vars
 
   int precedence[17];      // precedence level of math operators
                            // set length to include up to OR in enum
   int me;
 
   struct Tree {            // parse tree for atom-style variables
     double value;
     double *array;
     int *iarray;
     int type;
     int nstride;
     int ivalue1,ivalue2;
     Tree *left,*middle,*right;
   };
 
   void remove(int);
   void grow();
   void copy(int, char **, char **);
   double evaluate(char *, Tree **);
   double collapse_tree(Tree *);
   double eval_tree(Tree *, int);
   void free_tree(Tree *);
   int find_matching_paren(char *, int, char *&);
   int math_function(char *, char *, Tree **, Tree **, int &, double *, int &);
   int group_function(char *, char *, Tree **, Tree **, int &, double *, int &);
   int region_function(char *);
   int special_function(char *, char *, Tree **, Tree **,
                        int &, double *, int &);
   void peratom2global(int, char *, double *, int, int,
                       Tree **, Tree **, int &, double *, int &);
   int is_atom_vector(char *);
   void atom_vector(char *, Tree **, Tree **, int &);
   int is_constant(char *);
   double constant(char *);
   double numeric(char *);
   int inumeric(char *);
   char *find_next_comma(char *);
   void print_tree(Tree *, int);
 };
 
 class VarReader : protected Pointers {
  public:
   VarReader(class LAMMPS *, char *);
   ~VarReader();
   int read(char *);
 
  private:
   int me;
   FILE *fp;
 };
 
 }
 
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: World variable count doesn't match # of partitions
 
 A world-style variable must specify a number of values equal to the
 number of processor partitions.
 
 E: Universe/uloop variable count < # of partitions
 
 A universe or uloop style variable must specify a number of values >= to the
 number of processor partitions.
 
 E: All universe/uloop variables must have same # of values
 
 Self-explanatory.
 
 E: Cannot redefine variable as a different style
 
 An equal-style variable can be re-defined but only if it was
 originally an equal-style variable.
 
 E: File variable could not read value
 
-UNDOCUMENTED
+Check the file assigned to the variable.
 
 E: Variable name must be alphanumeric or underscore characters
 
 Self-explanatory.
 
 E: Invalid variable in next command
 
 Self-explanatory.
 
 E: All variables in next command must be same style
 
 Self-explanatory.
 
 E: Invalid variable style with next command
 
 Variable styles {equal} and {world} cannot be used in a next
 command.
 
 E: Invalid syntax in variable formula
 
 Self-explanatory.
 
 E: Variable evaluation before simulation box is defined
 
 Cannot evaluate a compute or fix or atom-based value in a variable
 before the simulation has been setup.
 
 E: Invalid compute ID in variable formula
 
 The compute is not recognized.
 
 E: Compute used in variable between runs is not current
 
 Computes cannot be invoked by a variable in between runs.  Thus they
 must have been evaluated on the last timestep of the previous run in
 order for their value(s) to be accessed.  See the doc page for the
 variable command for more info.
 
 E: Variable formula compute vector is accessed out-of-range
 
 Self-explanatory.
 
 E: Variable formula compute array is accessed out-of-range
 
 Self-explanatory.
 
 E: Per-atom compute in equal-style variable formula
 
 Equal-style variables cannot use per-atom quantities.
 
 E: Mismatched compute in variable formula
 
 A compute is referenced incorrectly or a compute that produces per-atom
 values is used in an equal-style variable formula.
 
 E: Invalid fix ID in variable formula
 
 The fix is not recognized.
 
 E: Fix in variable not computed at compatible time
 
 Fixes generate their values on specific timesteps.  The variable is
 requesting the values on a non-allowed timestep.
 
 E: Variable formula fix vector is accessed out-of-range
 
 Self-explanatory.
 
 E: Variable formula fix array is accessed out-of-range
 
 Self-explanatory.
 
 E: Per-atom fix in equal-style variable formula
 
 Equal-style variables cannot use per-atom quantities.
 
 E: Mismatched fix in variable formula
 
 A fix is referenced incorrectly or a fix that produces per-atom
 values is used in an equal-style variable formula.
 
 E: Invalid variable name in variable formula
 
 Variable name is not recognized.
 
 E: Variable has circular dependency
 
-UNDOCUMENTED
+A circular dependency is when variable "a" in used by variable "b" and
+variable "b" is also used by varaible "a".  Circular dependencies with
+longer chains of dependence are also not allowed.
 
 E: Invalid variable evaluation in variable formula
 
 A variable used in a formula could not be evaluated.
 
 E: Atom-style variable in equal-style variable formula
 
 Atom-style variables generate one value per atom which is not allowed
 in an equal-style variable.
 
 E: Mismatched variable in variable formula
 
 A variable is referenced incorrectly or an atom-style variable that
 produces per-atom values is used in an equal-style variable
 formula.
 
 E: Invalid math/group/special function in variable formula
 
 Self-explanatory.
 
 E: Invalid thermo keyword in variable formula
 
 The keyword is not recognized.
 
 E: Divide by 0 in variable formula
 
 Self-explanatory.
 
 E: Modulo 0 in variable formula
 
-UNDOCUMENTED
+Self-explanatory.
 
 E: Power by 0 in variable formula
 
 Self-explanatory.
 
 E: Sqrt of negative value in variable formula
 
 Self-explanatory.
 
 E: Log of zero/negative value in variable formula
 
 Self-explanatory.
 
 E: Arcsin of invalid value in variable formula
 
 Argument of arcsin() must be between -1 and 1.
 
 E: Arccos of invalid value in variable formula
 
 Argument of arccos() must be between -1 and 1.
 
 E: Invalid math function in variable formula
 
 Self-explanatory.
 
 E: Non digit character between brackets in variable
 
 Self-explantory.
 
 E: Mismatched brackets in variable
 
 Self-explanatory.
 
 E: Empty brackets in variable
 
 There is no variable syntax that uses empty brackets.  Check
 the variable doc page.
 
 E: Index between variable brackets must be positive
 
 Self-explanatory.
 
 E: Cannot use ramp in variable formula between runs
 
 This is because the ramp() function is time dependent.
 
 E: Cannot use vdisplace in variable formula between runs
 
 This is a function of elapsed time.
 
 E: Cannot use swiggle in variable formula between runs
 
 This is a function of elapsed time.
 
 E: Cannot use cwiggle in variable formula between runs
 
 This is a function of elapsed time.
 
 E: Group ID in variable formula does not exist
 
 Self-explanatory.
 
 E: Invalid group function in variable formula
 
 Group function is not recognized.
 
 E: Region ID in variable formula does not exist
 
 Self-explanatory.
 
 E: Invalid special function in variable formula
 
 Self-explanatory.
 
 E: Gmask function in equal-style variable formula
 
 Gmask is per-atom operation.
 
 E: Rmask function in equal-style variable formula
 
 Rmask is per-atom operation.
 
 E: Grmask function in equal-style variable formula
 
 Grmask is per-atom operation.
 
 E: Variable ID in variable formula does not exist
 
-UNDOCUMENTED
+Self-explanatory.
 
 E: Invalid variable in special function next
 
-UNDOCUMENTED
+Only file-style variables can be used with the next() function.
 
 E: Indexed per-atom vector in variable formula without atom map
 
 Accessing a value from an atom vector requires the ability to lookup
 an atom index, which is provided by an atom map.  An atom map does not
 exist (by default) for non-molecular problems.  Using the atom_modify
 map command will force an atom map to be created.
 
 E: Invalid atom vector in variable formula
 
 The atom vector is not recognized.
 
 E: Atom vector in equal-style variable formula
 
 Atom vectors generate one value per atom which is not allowed
 in an equal-style variable.
 
 E: Expected floating point parameter in variable definition
 
 The quantity being read is a non-numeric value.
 
 E: Expected integer parameter in variable definition
 
 The quantity being read is a floating point or non-numeric value.
 
 E: Invalid Boolean syntax in if command
 
 Self-explanatory.
 
 E: Cannot open file variable file %s
 
-UNDOCUMENTED
+The specified file cannot be opened.  Check that the path and name are
+correct.
 
 */
diff --git a/src/version.h b/src/version.h
index 6fbb01b35..4d1a9801b 100644
--- a/src/version.h
+++ b/src/version.h
@@ -1 +1 @@
-#define LAMMPS_VERSION "4 Mar 2013"
+#define LAMMPS_VERSION "22 Feb 2013"