diff --git a/src/ASPHERE/pair_resquared.cpp b/src/ASPHERE/pair_resquared.cpp
index 5a5c8c881..515153b74 100644
--- a/src/ASPHERE/pair_resquared.cpp
+++ b/src/ASPHERE/pair_resquared.cpp
@@ -1,992 +1,992 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Mike Brown (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_resquared.h"
 #include "math_extra.h"
 #include "atom.h"
 #include "atom_vec_ellipsoid.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "citeme.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairRESquared::PairRESquared(LAMMPS *lmp) : Pair(lmp),
-                                            b_alpha(45.0/56.0),
-                                            cr60(pow(60.0,1.0/3.0))
+                                            cr60(pow(60.0,1.0/3.0)),
+                                            b_alpha(45.0/56.0)
 {
   single_enable = 0;
 
   cr60 = pow(60.0,1.0/3.0);
   b_alpha = 45.0/56.0;
   solv_f_a = 3.0/(16.0*atan(1.0)*-36.0);
   solv_f_r = 3.0/(16.0*atan(1.0)*2025.0);
   citeme->add(CiteMe::BROWN_2009);
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairRESquared::~PairRESquared()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(form);
     memory->destroy(epsilon);
     memory->destroy(sigma);
     memory->destroy(shape1);
     memory->destroy(shape2);
     memory->destroy(well);
     memory->destroy(cut);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(lj4);
     memory->destroy(offset);
     delete [] lshape;
     delete [] setwell;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairRESquared::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double evdwl,one_eng,rsq,r2inv,r6inv,forcelj,factor_lj;
   double fforce[3],ttor[3],rtor[3],r12[3];
   int *ilist,*jlist,*numneigh,**firstneigh;
   RE2Vars wi,wj;
 
   evdwl = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double **tor = atom->torque;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     itype = type[i];
 
     // not a LJ sphere
 
     if (lshape[itype] != 0.0) precompute_i(i,wi);
 
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       // r12 = center to center vector
 
       r12[0] = x[j][0]-x[i][0];
       r12[1] = x[j][1]-x[i][1];
       r12[2] = x[j][2]-x[i][2];
       rsq = MathExtra::dot3(r12,r12);
       jtype = type[j];
 
       // compute if less than cutoff
 
       if (rsq < cutsq[itype][jtype]) {
         fforce[0] = fforce[1] = fforce[2] = 0.0;
 
         switch (form[itype][jtype]) {
 
          case SPHERE_SPHERE:
           r2inv = 1.0/rsq;
           r6inv = r2inv*r2inv*r2inv;
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
           forcelj *= -r2inv;
           if (eflag) one_eng =
               r6inv*(r6inv*lj3[itype][jtype]-lj4[itype][jtype]) -
               offset[itype][jtype];
           fforce[0] = r12[0]*forcelj;
           fforce[1] = r12[1]*forcelj;
           fforce[2] = r12[2]*forcelj;
           break;
 
          case SPHERE_ELLIPSE:
           precompute_i(j,wj);
           if (newton_pair || j < nlocal) {
             one_eng = resquared_lj(j,i,wj,r12,rsq,fforce,rtor,true);
             tor[j][0] += rtor[0]*factor_lj;
             tor[j][1] += rtor[1]*factor_lj;
             tor[j][2] += rtor[2]*factor_lj;
           } else
             one_eng = resquared_lj(j,i,wj,r12,rsq,fforce,rtor,false);
           break;
 
          case ELLIPSE_SPHERE:
           one_eng = resquared_lj(i,j,wi,r12,rsq,fforce,ttor,true);
           tor[i][0] += ttor[0]*factor_lj;
           tor[i][1] += ttor[1]*factor_lj;
           tor[i][2] += ttor[2]*factor_lj;
           break;
 
          default:
           precompute_i(j,wj);
           one_eng = resquared_analytic(i,j,wi,wj,r12,rsq,fforce,ttor,rtor);
           tor[i][0] += ttor[0]*factor_lj;
           tor[i][1] += ttor[1]*factor_lj;
           tor[i][2] += ttor[2]*factor_lj;
           if (newton_pair || j < nlocal) {
             tor[j][0] += rtor[0]*factor_lj;
             tor[j][1] += rtor[1]*factor_lj;
             tor[j][2] += rtor[2]*factor_lj;
           }
          break;
         }
 
         fforce[0] *= factor_lj;
         fforce[1] *= factor_lj;
         fforce[2] *= factor_lj;
         f[i][0] += fforce[0];
         f[i][1] += fforce[1];
         f[i][2] += fforce[2];
 
         if (newton_pair || j < nlocal) {
           f[j][0] -= fforce[0];
           f[j][1] -= fforce[1];
           f[j][2] -= fforce[2];
         }
 
         if (eflag) evdwl = factor_lj*one_eng;
 
         if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
                                  evdwl,0.0,fforce[0],fforce[1],fforce[2],
                                  -r12[0],-r12[1],-r12[2]);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairRESquared::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(form,n+1,n+1,"pair:form");
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(shape1,n+1,3,"pair:shape1");
   memory->create(shape2,n+1,3,"pair:shape2");
   memory->create(well,n+1,3,"pair:well");
   memory->create(cut,n+1,n+1,"pair:cut");
   memory->create(lj1,n+1,n+1,"pair:lj1");
   memory->create(lj2,n+1,n+1,"pair:lj2");
   memory->create(lj3,n+1,n+1,"pair:lj3");
   memory->create(lj4,n+1,n+1,"pair:lj4");
   memory->create(offset,n+1,n+1,"pair:offset");
 
   lshape = new double[n+1];
   setwell = new int[n+1];
   for (int i = 1; i <= n; i++) setwell[i] = 0;
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairRESquared::settings(int narg, char **arg)
 {
   if (narg != 1) error->all(FLERR,"Illegal pair_style command");
 
   cut_global = force->numeric(FLERR,arg[0]);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut[i][j] = cut_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairRESquared::coeff(int narg, char **arg)
 {
   if (narg < 10 || narg > 11)
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double epsilon_one = force->numeric(FLERR,arg[2]);
   double sigma_one = force->numeric(FLERR,arg[3]);
   double eia_one = force->numeric(FLERR,arg[4]);
   double eib_one = force->numeric(FLERR,arg[5]);
   double eic_one = force->numeric(FLERR,arg[6]);
   double eja_one = force->numeric(FLERR,arg[7]);
   double ejb_one = force->numeric(FLERR,arg[8]);
   double ejc_one = force->numeric(FLERR,arg[9]);
 
   double cut_one = cut_global;
   if (narg == 11) cut_one = force->numeric(FLERR,arg[10]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       epsilon[i][j] = epsilon_one;
       sigma[i][j] = sigma_one;
       cut[i][j] = cut_one;
       if (eia_one != 0.0 || eib_one != 0.0 || eic_one != 0.0) {
         well[i][0] = eia_one;
         well[i][1] = eib_one;
         well[i][2] = eic_one;
         if (eia_one == 1.0 && eib_one == 1.0 && eic_one == 1.0) setwell[i] = 2;
         else setwell[i] = 1;
       }
       if (eja_one != 0.0 || ejb_one != 0.0 || ejc_one != 0.0) {
         well[j][0] = eja_one;
         well[j][1] = ejb_one;
         well[j][2] = ejc_one;
         if (eja_one == 1.0 && ejb_one == 1.0 && ejc_one == 1.0) setwell[j] = 2;
         else setwell[j] = 1;
       }
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairRESquared::init_style()
 {
   avec = (AtomVecEllipsoid *) atom->style_match("ellipsoid");
   if (!avec) error->all(FLERR,"Pair resquared requires atom style ellipsoid");
 
   neighbor->request(this);
 
   // per-type shape precalculations
   // require that atom shapes are identical within each type
 
   for (int i = 1; i <= atom->ntypes; i++) {
     if (!atom->shape_consistency(i,shape1[i][0],shape1[i][1],shape1[i][2]))
       error->all(FLERR,"Pair resquared requires atoms with same type have same shape");
     if (setwell[i]) {
       shape2[i][0] = shape1[i][0]*shape1[i][0];
       shape2[i][1] = shape1[i][1]*shape1[i][1];
       shape2[i][2] = shape1[i][2]*shape1[i][2];
       lshape[i] = shape1[i][0]*shape1[i][1]*shape1[i][2];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairRESquared::init_one(int i, int j)
 {
   if (setwell[i] == 0 || setwell[j] == 0)
     error->all(FLERR,"Pair resquared epsilon a,b,c coeffs are not all set");
 
   int ishape = 0;
   if (shape1[i][0] != 0.0 && shape1[i][1] != 0.0 && shape1[i][2] != 0.0)
     ishape = 1;
   int jshape = 0;
   if (shape1[j][0] != 0.0 && shape1[j][1] != 0.0 && shape1[j][2] != 0.0)
     jshape = 1;
 
   if (ishape == 0 && jshape == 0) {
     form[i][j] = SPHERE_SPHERE;
     form[j][i] = SPHERE_SPHERE;
   } else if (ishape == 0) {
     form[i][j] = SPHERE_ELLIPSE;
     form[j][i] = ELLIPSE_SPHERE;
   } else if (jshape == 0) {
     form[i][j] = ELLIPSE_SPHERE;
     form[j][i] = SPHERE_ELLIPSE;
   } else {
     form[i][j] = ELLIPSE_ELLIPSE;
     form[j][i] = ELLIPSE_ELLIPSE;
   }
 
   // allow mixing only for LJ spheres
 
   if (setflag[i][j] == 0) {
     if (setflag[j][i] == 0) {
       if (ishape == 0 && jshape == 0) {
         epsilon[i][j] = mix_energy(epsilon[i][i],epsilon[j][j],
                                    sigma[i][i],sigma[j][j]);
         sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]);
         cut[i][j] = mix_distance(cut[i][i],cut[j][j]);
       } else
         error->all(FLERR,
                    "Pair resquared epsilon and sigma coeffs are not all set");
     }
     epsilon[i][j] = epsilon[j][i];
     sigma[i][j] = sigma[j][i];
     cut[i][j] = cut[j][i];
   }
 
   lj1[i][j] = 48.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj2[i][j] = 24.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
   lj3[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj4[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
 
   if (offset_flag) {
     double ratio = sigma[i][j] / cut[i][j];
     offset[i][j] = 4.0 * epsilon[i][j] * (pow(ratio,12.0) - pow(ratio,6.0));
   } else offset[i][j] = 0.0;
 
   epsilon[j][i] = epsilon[i][j];
   sigma[j][i] = sigma[i][j];
   lj1[j][i] = lj1[i][j];
   lj2[j][i] = lj2[i][j];
   lj3[j][i] = lj3[i][j];
   lj4[j][i] = lj4[i][j];
   offset[j][i] = offset[i][j];
 
   return cut[i][j];
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairRESquared::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++) {
     fwrite(&setwell[i],sizeof(int),1,fp);
     if (setwell[i]) fwrite(&well[i][0],sizeof(double),3,fp);
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&epsilon[i][j],sizeof(double),1,fp);
         fwrite(&sigma[i][j],sizeof(double),1,fp);
         fwrite(&cut[i][j],sizeof(double),1,fp);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairRESquared::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++) {
     if (me == 0) fread(&setwell[i],sizeof(int),1,fp);
     MPI_Bcast(&setwell[i],1,MPI_INT,0,world);
     if (setwell[i]) {
       if (me == 0) fread(&well[i][0],sizeof(double),3,fp);
       MPI_Bcast(&well[i][0],3,MPI_DOUBLE,0,world);
     }
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&epsilon[i][j],sizeof(double),1,fp);
           fread(&sigma[i][j],sizeof(double),1,fp);
           fread(&cut[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairRESquared::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_global,sizeof(double),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairRESquared::read_restart_settings(FILE *fp)
 {
   int me = comm->me;
   if (me == 0) {
     fread(&cut_global,sizeof(double),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
   }
   MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
 }
 
 /* ----------------------------------------------------------------------
    Precompute per-particle temporaries for RE-squared calculation
 ------------------------------------------------------------------------- */
 
 void PairRESquared::precompute_i(const int i,RE2Vars &ws)
 {
   double aTs[3][3];       // A1'*S1^2
   int *ellipsoid = atom->ellipsoid;
   AtomVecEllipsoid::Bonus *bonus = avec->bonus;
   MathExtra::quat_to_mat_trans(bonus[ellipsoid[i]].quat,ws.A);
   MathExtra::transpose_diag3(ws.A,well[atom->type[i]],ws.aTe);
   MathExtra::transpose_diag3(ws.A,shape2[atom->type[i]],aTs);
   MathExtra::diag_times3(shape2[atom->type[i]],ws.A,ws.sa);
   MathExtra::times3(aTs,ws.A,ws.gamma);
   MathExtra::rotation_generator_x(ws.A,ws.lA[0]);
   MathExtra::rotation_generator_y(ws.A,ws.lA[1]);
   MathExtra::rotation_generator_z(ws.A,ws.lA[2]);
   for (int i=0; i<3; i++) {
     MathExtra::times3(aTs,ws.lA[i],ws.lAtwo[i]);
     MathExtra::transpose_times3(ws.lA[i],ws.sa,ws.lAsa[i]);
     MathExtra::plus3(ws.lAsa[i],ws.lAtwo[i],ws.lAsa[i]);
   }
 }
 
 /* ----------------------------------------------------------------------
    Compute the derivative of the determinant of m, using m and the
    derivative of m (m2)
 ------------------------------------------------------------------------- */
 
 double PairRESquared::det_prime(const double m[3][3], const double m2[3][3])
 {
   double ans;
   ans = m2[0][0]*m[1][1]*m[2][2] - m2[0][0]*m[1][2]*m[2][1] -
         m[1][0]*m2[0][1]*m[2][2] + m[1][0]*m2[0][2]*m[2][1] +
         m[2][0]*m2[0][1]*m[1][2] - m[2][0]*m2[0][2]*m[1][1] +
         m[0][0]*m2[1][1]*m[2][2] - m[0][0]*m2[1][2]*m[2][1] -
         m2[1][0]*m[0][1]*m[2][2] + m2[1][0]*m[0][2]*m[2][1] +
         m[2][0]*m[0][1]*m2[1][2] - m[2][0]*m[0][2]*m2[1][1] +
         m[0][0]*m[1][1]*m2[2][2] - m[0][0]*m[1][2]*m2[2][1] -
         m[1][0]*m[0][1]*m2[2][2] + m[1][0]*m[0][2]*m2[2][1] +
         m2[2][0]*m[0][1]*m[1][2] - m2[2][0]*m[0][2]*m[1][1];
   return ans;
 }
 
 /* ----------------------------------------------------------------------
    Compute the energy, force, torque for a pair (INTEGRATED-INTEGRATED)
 ------------------------------------------------------------------------- */
 
 double PairRESquared::resquared_analytic(const int i, const int j,
                                          const RE2Vars &wi, const RE2Vars &wj,
                                          const double *r, const double rsq,
                                          double *fforce, double *ttor,
                                          double *rtor)
 {
   int *type = atom->type;
 
   // pair computations for energy, force, torque
 
   double z1[3],z2[3];        // A1*rhat  # don't need to store
   double v1[3],v2[3];        // inv(S1^2)*z1 # don't need to store
   double sigma1,sigma2;      // 1/sqrt(z1'*v1)
   double sigma1p2,sigma2p2;  // sigma1^2
   double rnorm;              // L2 norm of r
   double rhat[3];            // r/rnorm
   double s[3];               // inv(gamma1+gamma2)*rhat
   double sigma12;            // 1/sqrt(0.5*s'*rhat)
   double H12[3][3];          // gamma1/sigma1+gamma2/sigma2
   double dH;                 // det(H12)
   double lambda;             // dS1/sigma1p2+dS2/sigma2p2
   double nu;                 // sqrt(dH/(sigma1+sigma2))
   double w[3];               // inv(A1'*E1*A1+A2'*E2*A2)*rhat
   double h12;                // rnorm-sigma12;
   double eta;                // lambda/nu
   double chi;                // 2*rhat'*w
   double sprod;              // dS1*dS2
   double sigh;               // sigma/h12
   double tprod;              // eta*chi*sigh
   double Ua,Ur;              // attractive/repulsive parts of potential
 
   // pair computations for force, torque
 
   double sec;                          // sigma*eta*chi
   double sigma1p3, sigma2p3;           // sigma1^3
   double vsigma1[3], vsigma2[3];       // sigma1^3*v1;
   double sigma12p3;                    // sigma12^3
   double gsigma1[3][3], gsigma2[3][3]; // -gamma1/sigma1^2
   double tsig1sig2;                    // eta/(2*(sigma1+sigma2))
   double tdH;                          // eta/(2*dH)
   double teta1,teta2;                  // 2*eta/lambda*dS1/sigma1p3
   double fourw[3];                     // 4*w;
   double spr[3];                       // 0.5*sigma12^3*s
   double hsec;                         // h12+[3,b_alpha]*sec
   double dspu;                         // 1/h12 - 1/hsec + temp
   double pbsu;                         // 3*sigma/hsec
   double dspr;                         // 7/h12-1/hsec+temp
   double pbsr;                         // b_alpha*sigma/hsec;
   double u[3];                         // (-rhat(i)*rhat+eye(:,i))/rnorm
   double u1[3],u2[3];                  // A1*u
   double dsigma1,dsigma2;              // u1'*vsigma1 (force) p'*vsigma1 (tor)
   double dH12[3][3];                   // dsigma1*gsigma1 + dsigma2*gsigma2
   double ddH;                          // derivative of det(H12)
   double deta,dchi,dh12;               // derivatives of eta,chi,h12
   double dUr,dUa;                      // derivatives of Ua,Ur
 
   // pair computations for torque
 
   double fwae[3];        // -fourw'*aTe
   double p[3];           // lA*rhat
 
   rnorm = sqrt(rsq);
   rhat[0] = r[0]/rnorm;
   rhat[1] = r[1]/rnorm;
   rhat[2] = r[2]/rnorm;
 
   // energy
 
   double temp[3][3];
   MathExtra::plus3(wi.gamma,wj.gamma,temp);
   int ierror = MathExtra::mldivide3(temp,rhat,s);
   if (ierror) error->all(FLERR,"Bad matrix inversion in mldivide3");
 
   sigma12 = 1.0/sqrt(0.5*MathExtra::dot3(s,rhat));
   MathExtra::matvec(wi.A,rhat,z1);
   MathExtra::matvec(wj.A,rhat,z2);
   v1[0] = z1[0]/shape2[type[i]][0];
   v1[1] = z1[1]/shape2[type[i]][1];
   v1[2] = z1[2]/shape2[type[i]][2];
   v2[0] = z2[0]/shape2[type[j]][0];
   v2[1] = z2[1]/shape2[type[j]][1];
   v2[2] = z2[2]/shape2[type[j]][2];
   sigma1 = 1.0/sqrt(MathExtra::dot3(z1,v1));
   sigma2 = 1.0/sqrt(MathExtra::dot3(z2,v2));
   H12[0][0] = wi.gamma[0][0]/sigma1+wj.gamma[0][0]/sigma2;
   H12[0][1] = wi.gamma[0][1]/sigma1+wj.gamma[0][1]/sigma2;
   H12[0][2] = wi.gamma[0][2]/sigma1+wj.gamma[0][2]/sigma2;
   H12[1][0] = wi.gamma[1][0]/sigma1+wj.gamma[1][0]/sigma2;
   H12[1][1] = wi.gamma[1][1]/sigma1+wj.gamma[1][1]/sigma2;
   H12[1][2] = wi.gamma[1][2]/sigma1+wj.gamma[1][2]/sigma2;
   H12[2][0] = wi.gamma[2][0]/sigma1+wj.gamma[2][0]/sigma2;
   H12[2][1] = wi.gamma[2][1]/sigma1+wj.gamma[2][1]/sigma2;
   H12[2][2] = wi.gamma[2][2]/sigma1+wj.gamma[2][2]/sigma2;
   dH=MathExtra::det3(H12);
   sigma1p2 = sigma1*sigma1;
   sigma2p2 = sigma2*sigma2;
   lambda = lshape[type[i]]/sigma1p2 + lshape[type[j]]/sigma2p2;
   nu = sqrt(dH/(sigma1+sigma2));
   MathExtra::times3(wi.aTe,wi.A,temp);
   double temp2[3][3];
   MathExtra::times3(wj.aTe,wj.A,temp2);
   MathExtra::plus3(temp,temp2,temp);
   ierror = MathExtra::mldivide3(temp,rhat,w);
   if (ierror) error->all(FLERR,"Bad matrix inversion in mldivide3");
 
   h12 = rnorm-sigma12;
   eta = lambda/nu;
   chi = 2.0*MathExtra::dot3(rhat,w);
   sprod = lshape[type[i]] * lshape[type[j]];
   sigh = sigma[type[i]][type[j]]/h12;
   tprod = eta*chi*sigh;
 
   double stemp = h12/2.0;
   Ua = (shape1[type[i]][0]+stemp)*(shape1[type[i]][1]+stemp)*
        (shape1[type[i]][2]+stemp)*(shape1[type[j]][0]+stemp)*
        (shape1[type[j]][1]+stemp)*(shape1[type[j]][2]+stemp);
   Ua = (1.0+3.0*tprod)*sprod/Ua;
   Ua = epsilon[type[i]][type[j]]*Ua/-36.0;
 
   stemp = h12/cr60;
   Ur = (shape1[type[i]][0]+stemp)*(shape1[type[i]][1]+stemp)*
        (shape1[type[i]][2]+stemp)*(shape1[type[j]][0]+stemp)*
        (shape1[type[j]][1]+stemp)*(shape1[type[j]][2]+stemp);
   Ur = (1.0+b_alpha*tprod)*sprod/Ur;
   Ur = epsilon[type[i]][type[j]]*Ur*pow(sigh,6.0)/2025.0;
 
   // force
 
   sec = sigma[type[i]][type[j]]*eta*chi;
   sigma12p3 = pow(sigma12,3.0);
   sigma1p3 = sigma1p2*sigma1;
   sigma2p3 = sigma2p2*sigma2;
   vsigma1[0] = -sigma1p3*v1[0];
   vsigma1[1] = -sigma1p3*v1[1];
   vsigma1[2] = -sigma1p3*v1[2];
   vsigma2[0] = -sigma2p3*v2[0];
   vsigma2[1] = -sigma2p3*v2[1];
   vsigma2[2] = -sigma2p3*v2[2];
   gsigma1[0][0] = -wi.gamma[0][0]/sigma1p2;
   gsigma1[0][1] = -wi.gamma[0][1]/sigma1p2;
   gsigma1[0][2] = -wi.gamma[0][2]/sigma1p2;
   gsigma1[1][0] = -wi.gamma[1][0]/sigma1p2;
   gsigma1[1][1] = -wi.gamma[1][1]/sigma1p2;
   gsigma1[1][2] = -wi.gamma[1][2]/sigma1p2;
   gsigma1[2][0] = -wi.gamma[2][0]/sigma1p2;
   gsigma1[2][1] = -wi.gamma[2][1]/sigma1p2;
   gsigma1[2][2] = -wi.gamma[2][2]/sigma1p2;
   gsigma2[0][0] = -wj.gamma[0][0]/sigma2p2;
   gsigma2[0][1] = -wj.gamma[0][1]/sigma2p2;
   gsigma2[0][2] = -wj.gamma[0][2]/sigma2p2;
   gsigma2[1][0] = -wj.gamma[1][0]/sigma2p2;
   gsigma2[1][1] = -wj.gamma[1][1]/sigma2p2;
   gsigma2[1][2] = -wj.gamma[1][2]/sigma2p2;
   gsigma2[2][0] = -wj.gamma[2][0]/sigma2p2;
   gsigma2[2][1] = -wj.gamma[2][1]/sigma2p2;
   gsigma2[2][2] = -wj.gamma[2][2]/sigma2p2;
   tsig1sig2 = eta/(2.0*(sigma1+sigma2));
   tdH = eta/(2.0*dH);
   teta1 = 2.0*eta/lambda;
   teta2 = teta1*lshape[type[j]]/sigma2p3;
   teta1 = teta1*lshape[type[i]]/sigma1p3;
   fourw[0] = 4.0*w[0];
   fourw[1] = 4.0*w[1];
   fourw[2] = 4.0*w[2];
   spr[0] = 0.5*sigma12p3*s[0];
   spr[1] = 0.5*sigma12p3*s[1];
   spr[2] = 0.5*sigma12p3*s[2];
 
   stemp = 1.0/(shape1[type[i]][0]*2.0+h12)+
           1.0/(shape1[type[i]][1]*2.0+h12)+
           1.0/(shape1[type[i]][2]*2.0+h12)+
           1.0/(shape1[type[j]][0]*2.0+h12)+
           1.0/(shape1[type[j]][1]*2.0+h12)+
           1.0/(shape1[type[j]][2]*2.0+h12);
   hsec = h12+3.0*sec;
   dspu = 1.0/h12-1.0/hsec+stemp;
   pbsu = 3.0*sigma[type[i]][type[j]]/hsec;
 
   stemp = 1.0/(shape1[type[i]][0]*cr60+h12)+
           1.0/(shape1[type[i]][1]*cr60+h12)+
           1.0/(shape1[type[i]][2]*cr60+h12)+
           1.0/(shape1[type[j]][0]*cr60+h12)+
           1.0/(shape1[type[j]][1]*cr60+h12)+
           1.0/(shape1[type[j]][2]*cr60+h12);
   hsec = h12+b_alpha*sec;
   dspr = 7.0/h12-1.0/hsec+stemp;
   pbsr = b_alpha*sigma[type[i]][type[j]]/hsec;
 
   for (int i=0; i<3; i++) {
     u[0] = -rhat[i]*rhat[0];
     u[1] = -rhat[i]*rhat[1];
     u[2] = -rhat[i]*rhat[2];
     u[i] += 1.0;
     u[0] /= rnorm;
     u[1] /= rnorm;
     u[2] /= rnorm;
     MathExtra::matvec(wi.A,u,u1);
     MathExtra::matvec(wj.A,u,u2);
     dsigma1=MathExtra::dot3(u1,vsigma1);
     dsigma2=MathExtra::dot3(u2,vsigma2);
     dH12[0][0] = dsigma1*gsigma1[0][0]+dsigma2*gsigma2[0][0];
     dH12[0][1] = dsigma1*gsigma1[0][1]+dsigma2*gsigma2[0][1];
     dH12[0][2] = dsigma1*gsigma1[0][2]+dsigma2*gsigma2[0][2];
     dH12[1][0] = dsigma1*gsigma1[1][0]+dsigma2*gsigma2[1][0];
     dH12[1][1] = dsigma1*gsigma1[1][1]+dsigma2*gsigma2[1][1];
     dH12[1][2] = dsigma1*gsigma1[1][2]+dsigma2*gsigma2[1][2];
     dH12[2][0] = dsigma1*gsigma1[2][0]+dsigma2*gsigma2[2][0];
     dH12[2][1] = dsigma1*gsigma1[2][1]+dsigma2*gsigma2[2][1];
     dH12[2][2] = dsigma1*gsigma1[2][2]+dsigma2*gsigma2[2][2];
     ddH = det_prime(H12,dH12);
     deta = (dsigma1+dsigma2)*tsig1sig2;
     deta -= ddH*tdH;
     deta -= dsigma1*teta1+dsigma2*teta2;
     dchi = MathExtra::dot3(u,fourw);
     dh12 = rhat[i]+MathExtra::dot3(u,spr);
     dUa = pbsu*(eta*dchi+deta*chi)-dh12*dspu;
     dUr = pbsr*(eta*dchi+deta*chi)-dh12*dspr;
     fforce[i]=dUr*Ur+dUa*Ua;
   }
 
   // torque on i
 
   MathExtra::vecmat(fourw,wi.aTe,fwae);
 
   for (int i=0; i<3; i++) {
     MathExtra::matvec(wi.lA[i],rhat,p);
     dsigma1 = MathExtra::dot3(p,vsigma1);
     dH12[0][0] = wi.lAsa[i][0][0]/sigma1+dsigma1*gsigma1[0][0];
     dH12[0][1] = wi.lAsa[i][0][1]/sigma1+dsigma1*gsigma1[0][1];
     dH12[0][2] = wi.lAsa[i][0][2]/sigma1+dsigma1*gsigma1[0][2];
     dH12[1][0] = wi.lAsa[i][1][0]/sigma1+dsigma1*gsigma1[1][0];
     dH12[1][1] = wi.lAsa[i][1][1]/sigma1+dsigma1*gsigma1[1][1];
     dH12[1][2] = wi.lAsa[i][1][2]/sigma1+dsigma1*gsigma1[1][2];
     dH12[2][0] = wi.lAsa[i][2][0]/sigma1+dsigma1*gsigma1[2][0];
     dH12[2][1] = wi.lAsa[i][2][1]/sigma1+dsigma1*gsigma1[2][1];
     dH12[2][2] = wi.lAsa[i][2][2]/sigma1+dsigma1*gsigma1[2][2];
     ddH = det_prime(H12,dH12);
     deta = tsig1sig2*dsigma1-tdH*ddH;
     deta -= teta1*dsigma1;
     double tempv[3];
     MathExtra::matvec(wi.lA[i],w,tempv);
     dchi = -MathExtra::dot3(fwae,tempv);
     MathExtra::matvec(wi.lAtwo[i],spr,tempv);
     dh12 = -MathExtra::dot3(s,tempv);
 
     dUa = pbsu*(eta*dchi + deta*chi)-dh12*dspu;
     dUr = pbsr*(eta*dchi + deta*chi)-dh12*dspr;
     ttor[i] = -(dUa*Ua+dUr*Ur);
   }
 
   // torque on j
 
   if (!(force->newton_pair || j < atom->nlocal))
     return Ua+Ur;
 
   MathExtra::vecmat(fourw,wj.aTe,fwae);
 
   for (int i=0; i<3; i++) {
     MathExtra::matvec(wj.lA[i],rhat,p);
     dsigma2 = MathExtra::dot3(p,vsigma2);
     dH12[0][0] = wj.lAsa[i][0][0]/sigma2+dsigma2*gsigma2[0][0];
     dH12[0][1] = wj.lAsa[i][0][1]/sigma2+dsigma2*gsigma2[0][1];
     dH12[0][2] = wj.lAsa[i][0][2]/sigma2+dsigma2*gsigma2[0][2];
     dH12[1][0] = wj.lAsa[i][1][0]/sigma2+dsigma2*gsigma2[1][0];
     dH12[1][1] = wj.lAsa[i][1][1]/sigma2+dsigma2*gsigma2[1][1];
     dH12[1][2] = wj.lAsa[i][1][2]/sigma2+dsigma2*gsigma2[1][2];
     dH12[2][0] = wj.lAsa[i][2][0]/sigma2+dsigma2*gsigma2[2][0];
     dH12[2][1] = wj.lAsa[i][2][1]/sigma2+dsigma2*gsigma2[2][1];
     dH12[2][2] = wj.lAsa[i][2][2]/sigma2+dsigma2*gsigma2[2][2];
     ddH = det_prime(H12,dH12);
     deta = tsig1sig2*dsigma2-tdH*ddH;
     deta -= teta2*dsigma2;
     double tempv[3];
     MathExtra::matvec(wj.lA[i],w,tempv);
     dchi = -MathExtra::dot3(fwae,tempv);
     MathExtra::matvec(wj.lAtwo[i],spr,tempv);
     dh12 = -MathExtra::dot3(s,tempv);
 
     dUa = pbsu*(eta*dchi + deta*chi)-dh12*dspu;
     dUr = pbsr*(eta*dchi + deta*chi)-dh12*dspr;
     rtor[i] = -(dUa*Ua+dUr*Ur);
   }
 
   return Ua+Ur;
 }
 
 /* ----------------------------------------------------------------------
    Compute the energy, force, torque for a pair (INTEGRATED-LJ)
 ------------------------------------------------------------------------- */
 
 double PairRESquared::resquared_lj(const int i, const int j,
                                    const RE2Vars &wi, const double *r,
                                    const double rsq, double *fforce,
                                    double *ttor, bool calc_torque)
 {
   int *type = atom->type;
 
   // pair computations for energy, force, torque
 
   double rnorm;              // L2 norm of r
   double rhat[3];            // r/rnorm
   double s[3];               // inv(gamma1)*rhat
   double sigma12;            // 1/sqrt(0.5*s'*rhat)
   double w[3];               // inv(A1'*E1*A1+I)*rhat
   double h12;                // rnorm-sigma12;
   double chi;                // 2*rhat'*w
   double sigh;               // sigma/h12
   double tprod;              // chi*sigh
   double Ua,Ur;              // attractive/repulsive parts of potential
 
   // pair computations for force, torque
 
   double sec;                          // sigma*chi
   double sigma12p3;                    // sigma12^3
   double fourw[3];                     // 4*w;
   double spr[3];                       // 0.5*sigma12^3*s
   double hsec;                         // h12+[3,b_alpha]*sec
   double dspu;                         // 1/h12 - 1/hsec + temp
   double pbsu;                         // 3*sigma/hsec
   double dspr;                         // 7/h12-1/hsec+temp
   double pbsr;                         // b_alpha*sigma/hsec;
   double u[3];                         // (-rhat(i)*rhat+eye(:,i))/rnorm
   double dchi,dh12;                    // derivatives of chi,h12
   double dUr,dUa;                      // derivatives of Ua,Ur
   double h12p3;                        // h12^3
 
   // pair computations for torque
 
   double fwae[3];        // -fourw'*aTe
   double p[3];           // lA*rhat
 
   // distance of closest approach correction
 
   double aTs[3][3];       // A1'*S1^2
   double gamma[3][3];     // A1'*S1^2*A
   double lAtwo[3][3][3];  // A1'*S1^2*wi.lA
   double scorrect[3];
   double half_sigma=sigma[type[i]][type[j]] / 2.0;
   scorrect[0] = shape1[type[i]][0]+half_sigma;
   scorrect[1] = shape1[type[i]][1]+half_sigma;
   scorrect[2] = shape1[type[i]][2]+half_sigma;
   scorrect[0] = scorrect[0] * scorrect[0] / 2.0;
   scorrect[1] = scorrect[1] * scorrect[1] / 2.0;
   scorrect[2] = scorrect[2] * scorrect[2] / 2.0;
   MathExtra::transpose_diag3(wi.A,scorrect,aTs);
   MathExtra::times3(aTs,wi.A,gamma);
   for (int ii=0; ii<3; ii++)
     MathExtra::times3(aTs,wi.lA[ii],lAtwo[ii]);
 
   rnorm=sqrt(rsq);
   rhat[0] = r[0]/rnorm;
   rhat[1] = r[1]/rnorm;
   rhat[2] = r[2]/rnorm;
 
   // energy
 
   int ierror = MathExtra::mldivide3(gamma,rhat,s);
   if (ierror) error->all(FLERR,"Bad matrix inversion in mldivide3");
 
   sigma12 = 1.0/sqrt(0.5*MathExtra::dot3(s,rhat));
   double temp[3][3];
   MathExtra::times3(wi.aTe,wi.A,temp);
   temp[0][0] += 1.0;
   temp[1][1] += 1.0;
   temp[2][2] += 1.0;
   ierror = MathExtra::mldivide3(temp,rhat,w);
   if (ierror) error->all(FLERR,"Bad matrix inversion in mldivide3");
 
   h12 = rnorm-sigma12;
   chi = 2.0*MathExtra::dot3(rhat,w);
   sigh = sigma[type[i]][type[j]]/h12;
   tprod = chi*sigh;
 
   h12p3 = pow(h12,3.0);
   double sigmap3 = pow(sigma[type[i]][type[j]],3.0);
   double stemp = h12/2.0;
   Ua = (shape1[type[i]][0]+stemp)*(shape1[type[i]][1]+stemp)*
        (shape1[type[i]][2]+stemp)*h12p3/8.0;
   Ua = (1.0+3.0*tprod)*lshape[type[i]]/Ua;
   Ua = epsilon[type[i]][type[j]]*Ua*sigmap3*solv_f_a;
 
   stemp = h12/cr60;
   Ur = (shape1[type[i]][0]+stemp)*(shape1[type[i]][1]+stemp)*
        (shape1[type[i]][2]+stemp)*h12p3/60.0;
   Ur = (1.0+b_alpha*tprod)*lshape[type[i]]/Ur;
   Ur = epsilon[type[i]][type[j]]*Ur*sigmap3*pow(sigh,6.0)*solv_f_r;
 
   // force
 
   sec = sigma[type[i]][type[j]]*chi;
   sigma12p3 = pow(sigma12,3.0);
   fourw[0] = 4.0*w[0];
   fourw[1] = 4.0*w[1];
   fourw[2] = 4.0*w[2];
   spr[0] = 0.5*sigma12p3*s[0];
   spr[1] = 0.5*sigma12p3*s[1];
   spr[2] = 0.5*sigma12p3*s[2];
 
   stemp = 1.0/(shape1[type[i]][0]*2.0+h12)+
           1.0/(shape1[type[i]][1]*2.0+h12)+
           1.0/(shape1[type[i]][2]*2.0+h12)+
           3.0/h12;
   hsec = h12+3.0*sec;
   dspu = 1.0/h12-1.0/hsec+stemp;
   pbsu = 3.0*sigma[type[i]][type[j]]/hsec;
 
   stemp = 1.0/(shape1[type[i]][0]*cr60+h12)+
           1.0/(shape1[type[i]][1]*cr60+h12)+
           1.0/(shape1[type[i]][2]*cr60+h12)+
           3.0/h12;
   hsec = h12+b_alpha*sec;
   dspr = 7.0/h12-1.0/hsec+stemp;
   pbsr = b_alpha*sigma[type[i]][type[j]]/hsec;
 
   for (int i=0; i<3; i++) {
     u[0] = -rhat[i]*rhat[0];
     u[1] = -rhat[i]*rhat[1];
     u[2] = -rhat[i]*rhat[2];
     u[i] += 1.0;
     u[0] /= rnorm;
     u[1] /= rnorm;
     u[2] /= rnorm;
     dchi = MathExtra::dot3(u,fourw);
     dh12 = rhat[i]+MathExtra::dot3(u,spr);
     dUa = pbsu*dchi-dh12*dspu;
     dUr = pbsr*dchi-dh12*dspr;
     fforce[i]=dUr*Ur+dUa*Ua;
   }
 
   // torque on i
 
   if (calc_torque) {
     MathExtra::vecmat(fourw,wi.aTe,fwae);
 
     for (int i=0; i<3; i++) {
       MathExtra::matvec(wi.lA[i],rhat,p);
       double tempv[3];
       MathExtra::matvec(wi.lA[i],w,tempv);
       dchi = -MathExtra::dot3(fwae,tempv);
       MathExtra::matvec(lAtwo[i],spr,tempv);
       dh12 = -MathExtra::dot3(s,tempv);
 
       dUa = pbsu*dchi-dh12*dspu;
       dUr = pbsr*dchi-dh12*dspr;
       ttor[i] = -(dUa*Ua+dUr*Ur);
     }
   }
 
   return Ua+Ur;
 }
diff --git a/src/KSPACE/pair_lj_charmm_coul_long.cpp b/src/KSPACE/pair_lj_charmm_coul_long.cpp
index 29729342d..96f9cfd89 100644
--- a/src/KSPACE/pair_lj_charmm_coul_long.cpp
+++ b/src/KSPACE/pair_lj_charmm_coul_long.cpp
@@ -1,1039 +1,1039 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Paul Crozier (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_lj_charmm_coul_long.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCharmmCoulLong::PairLJCharmmCoulLong(LAMMPS *lmp) : Pair(lmp)
 {
   respa_enable = 1;
   ewaldflag = pppmflag = 1;
   ftable = NULL;
   implicit = 0;
   mix_flag = ARITHMETIC;
   writedata = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCharmmCoulLong::~PairLJCharmmCoulLong()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(epsilon);
     memory->destroy(sigma);
     memory->destroy(eps14);
     memory->destroy(sigma14);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(lj4);
     memory->destroy(lj14_1);
     memory->destroy(lj14_2);
     memory->destroy(lj14_3);
     memory->destroy(lj14_4);
   }
   if (ftable) free_tables();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype,itable;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double fraction,table;
   double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   double grij,expm2,prefactor,t,erfc;
   double philj,switch1,switch2;
   int *ilist,*jlist,*numneigh,**firstneigh;
   double rsq;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cut_bothsq) {
         r2inv = 1.0/rsq;
 
         if (rsq < cut_coulsq) {
           if (!ncoultablebits || rsq <= tabinnersq) {
             r = sqrt(rsq);
             grij = g_ewald * r;
             expm2 = exp(-grij*grij);
             t = 1.0 / (1.0 + EWALD_P*grij);
             erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
             prefactor = qqrd2e * qtmp*q[j]/r;
             forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
             if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
           } else {
             union_int_float_t rsq_lookup;
             rsq_lookup.f = rsq;
             itable = rsq_lookup.i & ncoulmask;
             itable >>= ncoulshiftbits;
             fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
             table = ftable[itable] + fraction*dftable[itable];
             forcecoul = qtmp*q[j] * table;
             if (factor_coul < 1.0) {
               table = ctable[itable] + fraction*dctable[itable];
               prefactor = qtmp*q[j] * table;
               forcecoul -= (1.0-factor_coul)*prefactor;
             }
           }
         } else forcecoul = 0.0;
 
         if (rsq < cut_ljsq) {
           r6inv = r2inv*r2inv*r2inv;
           jtype = type[j];
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
           if (rsq > cut_lj_innersq) {
             switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
               (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
             switch2 = 12.0*rsq * (cut_ljsq-rsq) *
               (rsq-cut_lj_innersq) / denom_lj;
             philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
             forcelj = forcelj*switch1 + philj*switch2;
           }
         } else forcelj = 0.0;
 
         fpair = (forcecoul + factor_lj*forcelj) * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             if (!ncoultablebits || rsq <= tabinnersq)
               ecoul = prefactor*erfc;
             else {
               table = etable[itable] + fraction*detable[itable];
               ecoul = qtmp*q[j] * table;
             }
             if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
           } else ecoul = 0.0;
 
           if (rsq < cut_ljsq) {
             evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
             if (rsq > cut_lj_innersq) {
               switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
                 (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
               evdwl *= switch1;
             }
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::compute_inner()
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,fpair;
   double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   double rsw;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = listinner->inum;
   ilist = listinner->ilist;
   numneigh = listinner->numneigh;
   firstneigh = listinner->firstneigh;
 
   double cut_out_on = cut_respa[0];
   double cut_out_off = cut_respa[1];
 
   double cut_out_diff = cut_out_off - cut_out_on;
   double cut_out_on_sq = cut_out_on*cut_out_on;
   double cut_out_off_sq = cut_out_off*cut_out_off;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cut_out_off_sq) {
         r2inv = 1.0/rsq;
         forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
         if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*forcecoul;
 
         r6inv = r2inv*r2inv*r2inv;
         jtype = type[j];
         forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
 
         fpair = (forcecoul + factor_lj*forcelj) * r2inv;
 
         if (rsq > cut_out_on_sq) {
           rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff;
           fpair *= 1.0 + rsw*rsw*(2.0*rsw-3.0);
         }
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::compute_middle()
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,fpair;
   double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   double philj,switch1,switch2;
   double rsw;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = listmiddle->inum;
   ilist = listmiddle->ilist;
   numneigh = listmiddle->numneigh;
   firstneigh = listmiddle->firstneigh;
 
   double cut_in_off = cut_respa[0];
   double cut_in_on = cut_respa[1];
   double cut_out_on = cut_respa[2];
   double cut_out_off = cut_respa[3];
 
   double cut_in_diff = cut_in_on - cut_in_off;
   double cut_out_diff = cut_out_off - cut_out_on;
   double cut_in_off_sq = cut_in_off*cut_in_off;
   double cut_in_on_sq = cut_in_on*cut_in_on;
   double cut_out_on_sq = cut_out_on*cut_out_on;
   double cut_out_off_sq = cut_out_off*cut_out_off;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
 
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cut_out_off_sq && rsq > cut_in_off_sq) {
         r2inv = 1.0/rsq;
         forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
         if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*forcecoul;
 
         r6inv = r2inv*r2inv*r2inv;
         jtype = type[j];
         forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
         if (rsq > cut_lj_innersq) {
           switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
             (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
           switch2 = 12.0*rsq * (cut_ljsq-rsq) *
             (rsq-cut_lj_innersq) / denom_lj;
           philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
           forcelj = forcelj*switch1 + philj*switch2;
         }
 
         fpair = (forcecoul + factor_lj*forcelj) * r2inv;
         if (rsq < cut_in_on_sq) {
           rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff;
           fpair *= rsw*rsw*(3.0 - 2.0*rsw);
         }
         if (rsq > cut_out_on_sq) {
           rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff;
           fpair *= 1.0 + rsw*rsw*(2.0*rsw - 3.0);
         }
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::compute_outer(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype,itable;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double fraction,table;
   double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   double grij,expm2,prefactor,t,erfc;
   double philj,switch1,switch2;
   double rsw;
   int *ilist,*jlist,*numneigh,**firstneigh;
   double rsq;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = listouter->inum;
   ilist = listouter->ilist;
   numneigh = listouter->numneigh;
   firstneigh = listouter->firstneigh;
 
   double cut_in_off = cut_respa[2];
   double cut_in_on = cut_respa[3];
 
   double cut_in_diff = cut_in_on - cut_in_off;
   double cut_in_off_sq = cut_in_off*cut_in_off;
   double cut_in_on_sq = cut_in_on*cut_in_on;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cut_bothsq) {
         r2inv = 1.0/rsq;
 
         if (rsq < cut_coulsq) {
           if (!ncoultablebits || rsq <= tabinnersq) {
             r = sqrt(rsq);
             grij = g_ewald * r;
             expm2 = exp(-grij*grij);
             t = 1.0 / (1.0 + EWALD_P*grij);
             erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
             prefactor = qqrd2e * qtmp*q[j]/r;
             forcecoul = prefactor * (erfc + EWALD_F*grij*expm2 - 1.0);
             if (rsq > cut_in_off_sq) {
               if (rsq < cut_in_on_sq) {
                 rsw = (r - cut_in_off)/cut_in_diff;
                 forcecoul += prefactor*rsw*rsw*(3.0 - 2.0*rsw);
                 if (factor_coul < 1.0)
                   forcecoul -=
                     (1.0-factor_coul)*prefactor*rsw*rsw*(3.0 - 2.0*rsw);
               } else {
                 forcecoul += prefactor;
                 if (factor_coul < 1.0)
                   forcecoul -= (1.0-factor_coul)*prefactor;
               }
             }
           } else {
             union_int_float_t rsq_lookup;
             rsq_lookup.f = rsq;
             itable = rsq_lookup.i & ncoulmask;
             itable >>= ncoulshiftbits;
             fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
             table = ftable[itable] + fraction*dftable[itable];
             forcecoul = qtmp*q[j] * table;
             if (factor_coul < 1.0) {
               table = ctable[itable] + fraction*dctable[itable];
               prefactor = qtmp*q[j] * table;
               forcecoul -= (1.0-factor_coul)*prefactor;
             }
           }
         } else forcecoul = 0.0;
 
         if (rsq < cut_ljsq && rsq > cut_in_off_sq) {
           r6inv = r2inv*r2inv*r2inv;
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
           if (rsq > cut_lj_innersq) {
             switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
               (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
             switch2 = 12.0*rsq * (cut_ljsq-rsq) *
               (rsq-cut_lj_innersq) / denom_lj;
             philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
             forcelj = forcelj*switch1 + philj*switch2;
           }
           if (rsq < cut_in_on_sq) {
             rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff;
             forcelj *= rsw*rsw*(3.0 - 2.0*rsw);
           }
         } else forcelj = 0.0;
 
         fpair = (forcecoul + forcelj) * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             if (!ncoultablebits || rsq <= tabinnersq) {
               ecoul = prefactor*erfc;
               if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
             } else {
               table = etable[itable] + fraction*detable[itable];
               ecoul = qtmp*q[j] * table;
               if (factor_coul < 1.0) {
                 table = ptable[itable] + fraction*dptable[itable];
                 prefactor = qtmp*q[j] * table;
                 ecoul -= (1.0-factor_coul)*prefactor;
               }
             }
           } else ecoul = 0.0;
 
           if (rsq < cut_ljsq) {
             r6inv = r2inv*r2inv*r2inv;
             evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
             if (rsq > cut_lj_innersq) {
               switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
                 (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
               evdwl *= switch1;
             }
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (vflag) {
           if (rsq < cut_coulsq) {
             if (!ncoultablebits || rsq <= tabinnersq) {
               forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
               if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
             } else {
               table = vtable[itable] + fraction*dvtable[itable];
               forcecoul = qtmp*q[j] * table;
               if (factor_coul < 1.0) {
                 table = ptable[itable] + fraction*dptable[itable];
                 prefactor = qtmp*q[j] * table;
                 forcecoul -= (1.0-factor_coul)*prefactor;
               }
             }
           } else forcecoul = 0.0;
 
           if (rsq <= cut_in_off_sq) {
             r6inv = r2inv*r2inv*r2inv;
             forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
             if (rsq > cut_lj_innersq) {
               switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
                 (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
               switch2 = 12.0*rsq * (cut_ljsq-rsq) *
                 (rsq-cut_lj_innersq) / denom_lj;
               philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
               forcelj = forcelj*switch1 + philj*switch2;
             }
           } else if (rsq <= cut_in_on_sq) {
             forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
             if (rsq > cut_lj_innersq) {
               switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
                 (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
               switch2 = 12.0*rsq * (cut_ljsq-rsq) *
                 (rsq-cut_lj_innersq) / denom_lj;
               philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
               forcelj = forcelj*switch1 + philj*switch2;
             }
           }
 
           fpair = (forcecoul + factor_lj*forcelj) * r2inv;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(eps14,n+1,n+1,"pair:eps14");
   memory->create(sigma14,n+1,n+1,"pair:sigma14");
   memory->create(lj1,n+1,n+1,"pair:lj1");
   memory->create(lj2,n+1,n+1,"pair:lj2");
   memory->create(lj3,n+1,n+1,"pair:lj3");
   memory->create(lj4,n+1,n+1,"pair:lj4");
   memory->create(lj14_1,n+1,n+1,"pair:lj14_1");
   memory->create(lj14_2,n+1,n+1,"pair:lj14_2");
   memory->create(lj14_3,n+1,n+1,"pair:lj14_3");
   memory->create(lj14_4,n+1,n+1,"pair:lj14_4");
 }
 
 /* ----------------------------------------------------------------------
    global settings
    unlike other pair styles,
      there are no individual pair settings that these override
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::settings(int narg, char **arg)
 {
   if (narg != 2 && narg != 3) error->all(FLERR,"Illegal pair_style command");
 
   cut_lj_inner = force->numeric(FLERR,arg[0]);
   cut_lj = force->numeric(FLERR,arg[1]);
   if (narg == 2) cut_coul = cut_lj;
   else cut_coul = force->numeric(FLERR,arg[2]);
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::coeff(int narg, char **arg)
 {
   if (narg != 4 && narg != 6) error->all(FLERR,"Illegal pair_coeff command");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double epsilon_one = force->numeric(FLERR,arg[2]);
   double sigma_one = force->numeric(FLERR,arg[3]);
   double eps14_one = epsilon_one;
   double sigma14_one = sigma_one;
   if (narg == 6) {
     eps14_one = force->numeric(FLERR,arg[4]);
     sigma14_one = force->numeric(FLERR,arg[5]);
   }
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       epsilon[i][j] = epsilon_one;
       sigma[i][j] = sigma_one;
       eps14[i][j] = eps14_one;
       sigma14[i][j] = sigma14_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,
                "Pair style lj/charmm/coul/long requires atom attribute q");
 
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
   if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
     int respa = 0;
     if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
     if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
 
     if (respa == 0) irequest = neighbor->request(this);
     else if (respa == 1) {
       irequest = neighbor->request(this);
       neighbor->requests[irequest]->id = 1;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respainner = 1;
       irequest = neighbor->request(this);
       neighbor->requests[irequest]->id = 3;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respaouter = 1;
     } else {
       irequest = neighbor->request(this);
       neighbor->requests[irequest]->id = 1;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respainner = 1;
       irequest = neighbor->request(this);
       neighbor->requests[irequest]->id = 2;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respamiddle = 1;
       irequest = neighbor->request(this);
       neighbor->requests[irequest]->id = 3;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respaouter = 1;
     }
 
   } else irequest = neighbor->request(this);
 
   // require cut_lj_inner < cut_lj
 
   if (cut_lj_inner >= cut_lj)
     error->all(FLERR,"Pair inner cutoff >= Pair outer cutoff");
 
   cut_lj_innersq = cut_lj_inner * cut_lj_inner;
   cut_ljsq = cut_lj * cut_lj;
   cut_coulsq = cut_coul * cut_coul;
   cut_bothsq = MAX(cut_ljsq,cut_coulsq);
 
   denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) *
     (cut_ljsq-cut_lj_innersq);
 
   // set & error check interior rRESPA cutoffs
 
   if (strstr(update->integrate_style,"respa") &&
       ((Respa *) update->integrate)->level_inner >= 0) {
     cut_respa = ((Respa *) update->integrate)->cutoff;
     if (MIN(cut_lj,cut_coul) < cut_respa[3])
       error->all(FLERR,"Pair cutoff < Respa interior cutoff");
     if (cut_lj_inner < cut_respa[1])
       error->all(FLERR,"Pair inner cutoff < Respa interior cutoff");
   } else cut_respa = NULL;
 
   // insure use of KSpace long-range solver, set g_ewald
 
   if (force->kspace == NULL)
     error->all(FLERR,"Pair style requires a KSpace style");
   g_ewald = force->kspace->g_ewald;
 
   // setup force tables
 
   if (ncoultablebits) init_tables(cut_coul,cut_respa);
 }
 
 /* ----------------------------------------------------------------------
    neighbor callback to inform pair style of neighbor list to use
    regular or rRESPA
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::init_list(int id, NeighList *ptr)
 {
   if (id == 0) list = ptr;
   else if (id == 1) listinner = ptr;
   else if (id == 2) listmiddle = ptr;
   else if (id == 3) listouter = ptr;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairLJCharmmCoulLong::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     epsilon[i][j] = mix_energy(epsilon[i][i],epsilon[j][j],
                                sigma[i][i],sigma[j][j]);
     sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]);
     eps14[i][j] = mix_energy(eps14[i][i],eps14[j][j],
                                sigma14[i][i],sigma14[j][j]);
     sigma14[i][j] = mix_distance(sigma14[i][i],sigma14[j][j]);
   }
 
   double cut = MAX(cut_lj,cut_coul);
 
   lj1[i][j] = 48.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj2[i][j] = 24.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
   lj3[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj4[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
   lj14_1[i][j] = 48.0 * eps14[i][j] * pow(sigma14[i][j],12.0);
   lj14_2[i][j] = 24.0 * eps14[i][j] * pow(sigma14[i][j],6.0);
   lj14_3[i][j] = 4.0 * eps14[i][j] * pow(sigma14[i][j],12.0);
   lj14_4[i][j] = 4.0 * eps14[i][j] * pow(sigma14[i][j],6.0);
 
   lj1[j][i] = lj1[i][j];
   lj2[j][i] = lj2[i][j];
   lj3[j][i] = lj3[i][j];
   lj4[j][i] = lj4[i][j];
   lj14_1[j][i] = lj14_1[i][j];
   lj14_2[j][i] = lj14_2[i][j];
   lj14_3[j][i] = lj14_3[i][j];
   lj14_4[j][i] = lj14_4[i][j];
 
   return cut;
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&epsilon[i][j],sizeof(double),1,fp);
         fwrite(&sigma[i][j],sizeof(double),1,fp);
         fwrite(&eps14[i][j],sizeof(double),1,fp);
         fwrite(&sigma14[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
 
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&epsilon[i][j],sizeof(double),1,fp);
           fread(&sigma[i][j],sizeof(double),1,fp);
           fread(&eps14[i][j],sizeof(double),1,fp);
           fread(&sigma14[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&eps14[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma14[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_lj_inner,sizeof(double),1,fp);
   fwrite(&cut_lj,sizeof(double),1,fp);
   fwrite(&cut_coul,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&ncoultablebits,sizeof(int),1,fp);
   fwrite(&tabinner,sizeof(double),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&cut_lj_inner,sizeof(double),1,fp);
     fread(&cut_lj,sizeof(double),1,fp);
     fread(&cut_coul,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&ncoultablebits,sizeof(int),1,fp);
     fread(&tabinner,sizeof(double),1,fp);
   }
   MPI_Bcast(&cut_lj_inner,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_lj,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&ncoultablebits,1,MPI_INT,0,world);
   MPI_Bcast(&tabinner,1,MPI_DOUBLE,0,world);
 }
 
 
 /* ----------------------------------------------------------------------
    proc 0 writes to data file
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::write_data(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     fprintf(fp,"%d %g %g %g %g\n",
             i,epsilon[i][i],sigma[i][i],eps14[i][i],sigma14[i][i]);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes all pairs to data file
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::write_data_all(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
-      fprintf(fp,"%d %d %g %g %g %g %g\n",i,j,
+      fprintf(fp,"%d %d %g %g %g %g\n",i,j,
               epsilon[i][j],sigma[i][j],eps14[i][j],sigma14[i][j]);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJCharmmCoulLong::single(int i, int j, int itype, int jtype,
                                     double rsq,
                                     double factor_coul, double factor_lj,
                                     double &fforce)
 {
   double r2inv,r6inv,r,grij,expm2,t,erfc,prefactor;
   double switch1,switch2,fraction,table,forcecoul,forcelj,phicoul,philj;
   int itable;
 
   r2inv = 1.0/rsq;
   if (rsq < cut_coulsq) {
     if (!ncoultablebits || rsq <= tabinnersq) {
       r = sqrt(rsq);
       grij = g_ewald * r;
       expm2 = exp(-grij*grij);
       t = 1.0 / (1.0 + EWALD_P*grij);
       erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
       prefactor = force->qqrd2e * atom->q[i]*atom->q[j]/r;
       forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
       if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
     } else {
       union_int_float_t rsq_lookup;
       rsq_lookup.f = rsq;
       itable = rsq_lookup.i & ncoulmask;
       itable >>= ncoulshiftbits;
       fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
       table = ftable[itable] + fraction*dftable[itable];
       forcecoul = atom->q[i]*atom->q[j] * table;
       if (factor_coul < 1.0) {
         table = ctable[itable] + fraction*dctable[itable];
         prefactor = atom->q[i]*atom->q[j] * table;
         forcecoul -= (1.0-factor_coul)*prefactor;
       }
     }
   } else forcecoul = 0.0;
   if (rsq < cut_ljsq) {
     r6inv = r2inv*r2inv*r2inv;
     forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
     if (rsq > cut_lj_innersq) {
       switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
         (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
       switch2 = 12.0*rsq * (cut_ljsq-rsq) *
         (rsq-cut_lj_innersq) / denom_lj;
       philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
       forcelj = forcelj*switch1 + philj*switch2;
     }
   } else forcelj = 0.0;
   fforce = (forcecoul + factor_lj*forcelj) * r2inv;
 
   double eng = 0.0;
   if (rsq < cut_coulsq) {
     if (!ncoultablebits || rsq <= tabinnersq)
       phicoul = prefactor*erfc;
     else {
       table = etable[itable] + fraction*detable[itable];
       phicoul = atom->q[i]*atom->q[j] * table;
     }
     if (factor_coul < 1.0) phicoul -= (1.0-factor_coul)*prefactor;
     eng += phicoul;
   }
 
   if (rsq < cut_ljsq) {
     philj = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
     if (rsq > cut_lj_innersq) {
       switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
         (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
       philj *= switch1;
     }
     eng += factor_lj*philj;
   }
 
   return eng;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *PairLJCharmmCoulLong::extract(const char *str, int &dim)
 {
   dim = 2;
   if (strcmp(str,"lj14_1") == 0) return (void *) lj14_1;
   if (strcmp(str,"lj14_2") == 0) return (void *) lj14_2;
   if (strcmp(str,"lj14_3") == 0) return (void *) lj14_3;
   if (strcmp(str,"lj14_4") == 0) return (void *) lj14_4;
 
   dim = 0;
   if (strcmp(str,"implicit") == 0) return (void *) &implicit;
   if (strcmp(str,"cut_coul") == 0) return (void *) &cut_coul;
 
   return NULL;
 }
diff --git a/src/KSPACE/pppm_stagger.h b/src/KSPACE/pppm_stagger.h
index b1ed7779d..2b2b5ae1c 100644
--- a/src/KSPACE/pppm_stagger.h
+++ b/src/KSPACE/pppm_stagger.h
@@ -1,109 +1,110 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef KSPACE_CLASS
 
 KSpaceStyle(pppm/stagger,PPPMStagger)
 
 #else
 
 #ifndef LMP_PPPM_STAGGER_H
 #define LMP_PPPM_STAGGER_H
 
 #include "pppm.h"
 
 namespace LAMMPS_NS {
 
 class PPPMStagger : public PPPM {
  public:
   PPPMStagger(class LAMMPS *, int, char **);
   virtual ~PPPMStagger();
   virtual void init();
   virtual void compute(int, int);
   virtual int timing_1d(int, double &);
   virtual int timing_3d(int, double &);
 
  protected:
   int nstagger;
   double stagger;
   double **gf_b2;
 
   virtual double compute_qopt();
   double compute_qopt_ad();
   virtual void compute_gf_denom();
   virtual void compute_gf_ik();
   virtual void compute_gf_ad();
   
   virtual void particle_map();
   virtual void make_rho();
   virtual void fieldforce_ik();
   virtual void fieldforce_ad();
   virtual void fieldforce_peratom();
 
 
   inline double gf_denom2(const double &x, const double &y,
                          const double &z) const {
     double sx,sy,sz;
     double x2 = x*x;
     double y2 = y*y;
     double z2 = z*z;
     double xl = x;
     double yl = y;
     double zl = z;
+    sx = sy = sz = 0.0;
     for (int l = 0; l < order; l++) {
       sx += gf_b2[order][l]*xl;
       sy += gf_b2[order][l]*yl;
       sz += gf_b2[order][l]*zl;
       xl *= x2;
       yl *= y2;
       zl *= z2;
     }
     double s = sx*sy*sz;
     return s*s;
   };
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Cannot (yet) use kspace_style pppm/stagger with triclinic systems
 
 This feature is not yet supported.
 
 E: Out of range atoms - cannot compute PPPM
 
 One or more atoms are attempting to map their charge to a PPPM grid
 point that is not owned by a processor.  This is likely for one of two
 reasons, both of them bad.  First, it may mean that an atom near the
 boundary of a processor's sub-domain has moved more than 1/2 the
 "neighbor skin distance"_neighbor.html without neighbor lists being
 rebuilt and atoms being migrated to new processors.  This also means
 you may be missing pairwise interactions that need to be computed.
 The solution is to change the re-neighboring criteria via the
 "neigh_modify"_neigh_modify command.  The safest settings are "delay 0
 every 1 check yes".  Second, it may mean that an atom has moved far
 outside a processor's sub-domain or even the entire simulation box.
 This indicates bad physics, e.g. due to highly overlapping atoms, too
 large a timestep, etc.
 
 */
diff --git a/src/MAKE/Makefile.serial-clang b/src/MAKE/Makefile.serial-clang
index 3d462ae72..255cbfba4 100644
--- a/src/MAKE/Makefile.serial-clang
+++ b/src/MAKE/Makefile.serial-clang
@@ -1,108 +1,108 @@
 # serial-clang = RedHat Linux box, clang, no MPI, no FFTs
 
 SHELL = /bin/sh
 
 # ---------------------------------------------------------------------
 # compiler/linker settings
 # specify flags and libraries needed for your compiler
 
 CC =		clang++
-CCFLAGS =	-O3  -fno-exceptions -Wall -Wno-sometimes-uninitialized
+CCFLAGS =	-O3  -fno-exceptions -Wall -Wno-sometimes-uninitialized -Wno-unused-variable
 SHFLAGS =	-fPIC
 DEPFLAGS =	-M
 
 LINK =		clang++
 LINKFLAGS =	-O3  -fno-exceptions
 LIB =          
 SIZE =		size
 
 ARCHIVE =	ar
 ARFLAGS =	-rc
 SHLIBFLAGS =	-shared
 
 # ---------------------------------------------------------------------
 # LAMMPS-specific settings
 # specify settings for LAMMPS features you will use
 # if you change any -D setting, do full re-compile after "make clean"
 
 # LAMMPS ifdef settings, OPTIONAL
 # see possible settings in doc/Section_start.html#2_2 (step 4)
 
 LMP_INC =	-DLAMMPS_GZIP -DLAMMPS_MEMALIGN=32
 
 # MPI library, REQUIRED
 # see discussion in doc/Section_start.html#2_2 (step 5)
 # can point to dummy MPI library in src/STUBS as in Makefile.serial
 # INC = path for mpi.h, MPI compiler settings
 # PATH = path for MPI library
 # LIB = name of MPI library
 
 MPI_INC =       -I../STUBS
 MPI_PATH =      -L../STUBS
 MPI_LIB =	-lmpi_stubs
 
 # FFT library, OPTIONAL
 # see discussion in doc/Section_start.html#2_2 (step 6)
 # can be left blank to use provided KISS FFT library
 # INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
 # PATH = path for FFT library
 # LIB = name of FFT library
 
 FFT_INC =
 FFT_PATH = 
 FFT_LIB =       -lfftw3f
 
 # JPEG library, OPTIONAL
 # see discussion in doc/Section_start.html#2_2 (step 7)
 # only needed if -DLAMMPS_JPEG listed with LMP_INC
 # INC = path for jpeglib.h
 # PATH = path for JPEG library
 # LIB = name of JPEG library
 
 JPG_INC =       
 JPG_PATH = 	
 JPG_LIB =	
 
 # ---------------------------------------------------------------------
 # build rules and dependencies
 # no need to edit this section
 
 include	Makefile.package.settings
 include	Makefile.package
 
 EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
 EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
 EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
 
 # Path to src files
 
 vpath %.cpp ..
 vpath %.h ..
 
 # Link target
 
 $(EXE):	$(OBJ)
 	$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
 	$(SIZE) $(EXE)
 
 # Library targets
 
 lib:	$(OBJ)
 	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
 
 shlib:	$(OBJ)
 	$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
         $(OBJ) $(EXTRA_LIB) $(LIB)
 
 # Compilation rules
 
 %.o:%.cpp
 	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
 
 %.d:%.cpp
 	$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
 
 # Individual dependencies
 
 DEPENDS = $(OBJ:.o=.d)
 sinclude $(DEPENDS)
diff --git a/src/MANYBODY/pair_bop.h b/src/MANYBODY/pair_bop.h
index 6fee56c29..fa6f4f1ce 100644
--- a/src/MANYBODY/pair_bop.h
+++ b/src/MANYBODY/pair_bop.h
@@ -1,270 +1,265 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    The this work follows the formulation from (a) D.G. Pettifor, et al., Mat.
    Sci. and Eng. A365, 2-13, (2004) and (b) D.A. Murdick, et al., Phys.
    Rev. B 73, 045206 (2006). (c) D.K. Ward, et al., Phys. Rev. B 85, 115206
    (2012)
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(bop,PairBOP)
 
 #else
 
 #ifndef LMP_PAIR_BOP_H
 #define LMP_PAIR_BOP_H
 
 #include "pair.h"
 #include "update.h"
 
 namespace LAMMPS_NS {
 
 class PairBOP : public Pair {
  public:
   PairBOP(class LAMMPS *);
   virtual ~PairBOP();
   void compute(int, int);
   void settings(int, char **);
   void coeff(int, char **);
   void init_style();
   double init_one(int, int);
   double memory_usage();
 
  private:
   int me;
   int maxneigh;                    // maximum size of neighbor list on this processor
   int update_list;                 // check for changing maximum size of neighbor list
-  int maxbopn;                     // maximum size of bop neighbor list for allocation
   int maxnall;                     // maximum size of bop neighbor list for allocation
   int *map;                        // mapping from atom types to elements
-  int nelements;                   // # of unique elments
   int nr;                     // increments for the BOP potential
   int nBOt;                   // second BO increments
   int bop_types;                   // number of elments in potential
   int npairs;                      // number of element pairs
-  char **elements;                   // names of unique elements
-  int ***elem2param;
-  int nparams;
   int bop_step;
   int allocate_pi;
   int allocate_sigma;
   int allocate_neigh;
   int nb_pi,nb_sg;
 
   int *BOP_index;                 // index for neighbor list position
   int neigh_total;                // total number of neighbors stored
   int *cos_index;                 // index for neighbor cosine if not using on the fly
   int *neigh_flag;                 // index for neighbor cosine if not using on the fly
   int cos_total;                  // number of cosines stored if not using on the fly
   int neigh_ct;                  //  limit for large arrays
 
 /*Parameters variables*/
 
   int ncutoff,nfunc;
   int a_flag;
   double *pi_a,*pro_delta,*pi_delta;
   double *pi_p,*pi_c,*sigma_r0,*pi_r0,*phi_r0;
   double *sigma_rc,*pi_rc,*phi_rc,*r1,*sigma_beta0;
   double *pi_beta0,*phi0,*sigma_n,*pi_n,*phi_m;
   double *sigma_nc,*pi_nc,*phi_nc;
   double *pro,*sigma_delta,*sigma_c,*sigma_a;
   double ***sigma_g0,***sigma_g1,***sigma_g2,***sigma_g3;
   double ***sigma_g4,*sigma_f,*sigma_k,*small3;
   double small1,small2,small3g,small4,small5,small6,small7;
   double which,alpha,alpha1,beta1,gamma1,alpha2,beta2,alpha3;
   double beta3,rsmall,rbig,rcore;
   char **words;
 
   double cutmax;                //max cutoff for all elements
   int otfly;                        //Defines whether to do on the fly
                                 //calculations of angles and distances
                                 //on the fly will slow down calculations
                                 //but requires less memory on = 1, off=0
 
   int table;                        //determines the method for reading in
                                 //potential parameters a preset table
                                 //or generate the tables using a spline
 
 /* Neigh variables */
 
   double *rcut,*dr,*rdr;
   double **disij,*rij;
 
 /*Triple variables */
 
-  double *cosAng,***dcosAng,***dcAng;
+  double *cosAng,***dcAng;
 
 /*Double variables */
 
   double *betaS,*dBetaS,*betaP;
   double *dBetaP,*repul,*dRepul;
 
 /*Sigma variables */
 
   int **itypeSigBk,*nSigBk;
   double *sigB;
   double *sigB1;
 
 
 /*Pi variables */
 
   int **itypePiBk,*nPiBk;
   double *piB;
 
 /*Grids1 variables */
 
   double **pBetaS,**pBetaS1,**pBetaS2,**pBetaS3;
   double **pBetaS4,**pBetaS5,**pBetaS6;
 
 /*Grids2 variables */
 
   double **pBetaP,**pBetaP1,**pBetaP2,**pBetaP3;
   double **pBetaP4,**pBetaP5,**pBetaP6;
 
 /*Grids3 variables */
 
   double **pRepul,**pRepul1,**pRepul2,**pRepul3;
   double **pRepul4,**pRepul5,**pRepul6;
 
 /*Grids4 variables */
 
   double **FsigBO,**FsigBO1,**FsigBO2,**FsigBO3;
   double **FsigBO4,**FsigBO5,**FsigBO6;
   double dBO,rdBO;
 
 /* End of BOP variables */
 
   double **rcmin,**rcmax,**rcmaxp;
   struct B_PI{
     double dAA[3];
     double dBB[3];
     double dPiB[3];
     int temp;
     int i;
     int j;
   };
   B_PI *bt_pi;
 
   struct B_SG{
     double dAA[3];
     double dBB[3];
     double dCC[3];
     double dDD[3];
     double dEE[3];
     double dEE1[3];
     double dFF[3];
     double dAAC[3];
     double dBBC[3];
     double dCCC[3];
     double dDDC[3];
     double dEEC[3];
     double dFFC[3];
     double dGGC[3];
     double dUT[3];
     double dSigB1[3];
     double dSigB[3];
     int temp;
     int i;
     int j;
   };
   B_SG *bt_sg;
 
   void setPbetaS();
   void setPbetaP();
   void setPrepul();
   void setSign();
   void gneigh();
   void theta();
   void theta_mod();
   void sigmaBo();
   void PiBo();
   void sigmaBo_otf();
   void PiBo_otf();
   void sigmaBo_noa();
   void sigmaBo_noa_otf();
   void memory_theta_create();
   void memory_theta_destroy();
   void memory_theta_grow();
   double cutoff(double, double, int, double);
   double betaSfunc(int, double);
   double dBetaSfunc(int, double, double, double);
   double betaPfunc(int, double);
   double dBetaPfunc(int, double, double, double);
   double repulfunc(int, double);
   double dRepulfunc(int, double, double, double);
 
   void read_file(char *);
   void read_table(char *);
   void allocate();
   void create_pi(int);
   void create_sigma(int);
   void destroy_pi();
   void destroy_sigma();
   void grow_pi(int,int);
   void grow_sigma(int,int);
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Incorrect args for pair coefficients
 
 Self-explanatory.  Check the input script or data file.
 
 E: Pair style BOP requires atom IDs
 
 This is a requirement to use the BOP potential.
 
 E: Pair style BOP requires newton pair on
 
 See the newton command.  This is a restriction to use the BOP
 potential.
 
 E: Pair style bop requires comm ghost cutoff at least 3x larger than %g
 
 Use the communicate ghost command to set this.  See the pair bop
 doc page for more details.
 
 E: All pair coeffs are not set
 
 All pair coefficients must be set in the data file or by the
 pair_coeff command before running a simulation.
 
 E: Too many atom pairs for pair bop
 
 The number of atomic pairs exceeds the expected number.  Check your
 atomic structure to ensure that it is realistic.
 
 E: Too many atom triplets for pair bop
 
 The number of three atom groups for angle determinations exceeds the
 expected number.  Check your atomic structrure to ensure that it is
 realistic.
 
 E: Cannot open BOP potential file %s
 
 The specified BOP potential file cannot be opened.  Check that the
 path and name are correct.
 
 */
diff --git a/src/USER-MISC/fix_imd.cpp b/src/USER-MISC/fix_imd.cpp
index 677ea0932..05fe5b001 100644
--- a/src/USER-MISC/fix_imd.cpp
+++ b/src/USER-MISC/fix_imd.cpp
@@ -1,1478 +1,1479 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    The FixIMD class contains code from VMD and NAMD which is copyrighted
    by the Board of Trustees of the University of Illinois and is free to
    use with LAMMPS according to point 2 of the UIUC license (10% clause):
 
 " Licensee may, at its own expense, create and freely distribute
 complimentary works that interoperate with the Software, directing others to
 the TCBG server to license and obtain the Software itself. Licensee may, at
 its own expense, modify the Software to make derivative works.  Except as
 explicitly provided below, this License shall apply to any derivative work
 as it does to the original Software distributed by Illinois.  Any derivative
 work should be clearly marked and renamed to notify users that it is a
 modified version and not the original Software distributed by Illinois.
 Licensee agrees to reproduce the copyright notice and other proprietary
 markings on any derivative work and to include in the documentation of such
 work the acknowledgement:
 
  "This software includes code developed by the Theoretical and Computational
   Biophysics Group in the Beckman Institute for Advanced Science and
   Technology at the University of Illinois at Urbana-Champaign."
 
 Licensee may redistribute without restriction works with up to 1/2 of their
 non-comment source code derived from at most 1/10 of the non-comment source
 code developed by Illinois and contained in the Software, provided that the
 above directions for notice and acknowledgement are observed.  Any other
 distribution of the Software or any derivative work requires a separate
 license with Illinois.  Licensee may contact Illinois (vmd@ks.uiuc.edu) to
 negotiate an appropriate license for such distribution."
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author:  Axel Kohlmeyer (Temple U)
    IMD API, hash, and socket code written by: John E. Stone,
    Justin Gullingsrud, and James Phillips, (TCBG, Beckman Institute, UIUC)
 ------------------------------------------------------------------------- */
 
 #include "fix_imd.h"
 #include "atom.h"
 #include "comm.h"
 #include "update.h"
 #include "respa.h"
 #include "domain.h"
 #include "force.h"
 #include "error.h"
 #include "group.h"
 #include "memory.h"
 
 #include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 #if defined(_MSC_VER) || defined(__MINGW32__)
 #include <winsock2.h>
 #else
 #include <arpa/inet.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <sys/time.h>
 #include <netinet/in.h>
 #include <netdb.h>
 #include <sys/file.h>
 #endif
 
 #include <errno.h>
 
 /* re-usable integer hash table code with static linkage. */
 
 /** hash table top level data structure */
 typedef struct inthash_t {
   struct inthash_node_t **bucket;        /* array of hash nodes */
   int size;                           /* size of the array */
   int entries;                        /* number of entries in table */
   int downshift;                      /* shift cound, used in hash function */
   int mask;                           /* used to select bits for hashing */
 } inthash_t;
 
 /** hash table node data structure */
 typedef struct inthash_node_t {
   int data;                           /* data in hash node */
   int key;                            /* key for hash lookup */
   struct inthash_node_t *next;        /* next node in hash chain */
 } inthash_node_t;
 
 #define HASH_FAIL  -1
 #define HASH_LIMIT  0.5
 
 /* initialize new hash table  */
 static void inthash_init(inthash_t *tptr, int buckets);
 /* lookup entry in hash table */
 static int inthash_lookup(const inthash_t *tptr, int key);
 /* generate list of keys for reverse lookups. */
 static int *inthash_keys(inthash_t *tptr);
 /* insert an entry into hash table. */
 static int inthash_insert(inthash_t *tptr, int key, int data);
 /* delete the hash table */
 static void inthash_destroy(inthash_t *tptr);
 /* adapted sort for in-place sorting of map indices. */
 static void id_sort(int *idmap, int left, int right);
 
 /************************************************************************
  * integer hash code:
  ************************************************************************/
 
 /* inthash() - Hash function returns a hash number for a given key.
  * tptr: Pointer to a hash table, key: The key to create a hash number for */
 static int inthash(const inthash_t *tptr, int key) {
   int hashvalue;
 
   hashvalue = (((key*1103515249)>>tptr->downshift) & tptr->mask);
   if (hashvalue < 0) {
     hashvalue = 0;
   }
 
   return hashvalue;
 }
 
 /*
  *  rebuild_table_int() - Create new hash table when old one fills up.
  *
  *  tptr: Pointer to a hash table
  */
 static void rebuild_table_int(inthash_t *tptr) {
   inthash_node_t **old_bucket, *old_hash, *tmp;
   int old_size, h, i;
 
   old_bucket=tptr->bucket;
   old_size=tptr->size;
 
   /* create a new table and rehash old buckets */
   inthash_init(tptr, old_size<<1);
   for (i=0; i<old_size; i++) {
     old_hash=old_bucket[i];
     while(old_hash) {
       tmp=old_hash;
       old_hash=old_hash->next;
       h=inthash(tptr, tmp->key);
       tmp->next=tptr->bucket[h];
       tptr->bucket[h]=tmp;
       tptr->entries++;
     } /* while */
   } /* for */
 
   /* free memory used by old table */
   free(old_bucket);
 
   return;
 }
 
 /*
  *  inthash_init() - Initialize a new hash table.
  *
  *  tptr: Pointer to the hash table to initialize
  *  buckets: The number of initial buckets to create
  */
 void inthash_init(inthash_t *tptr, int buckets) {
 
   /* make sure we allocate something */
   if (buckets==0)
     buckets=16;
 
   /* initialize the table */
   tptr->entries=0;
   tptr->size=2;
   tptr->mask=1;
   tptr->downshift=29;
 
   /* ensure buckets is a power of 2 */
   while (tptr->size<buckets) {
     tptr->size<<=1;
     tptr->mask=(tptr->mask<<1)+1;
     tptr->downshift--;
   } /* while */
 
   /* allocate memory for table */
   tptr->bucket=(inthash_node_t **) calloc(tptr->size, sizeof(inthash_node_t *));
 
   return;
 }
 
 /*
  *  inthash_lookup() - Lookup an entry in the hash table and return a pointer to
  *    it or HASH_FAIL if it wasn't found.
  *
  *  tptr: Pointer to the hash table
  *  key: The key to lookup
  */
 int inthash_lookup(const inthash_t *tptr, int key) {
   int h;
   inthash_node_t *node;
 
 
   /* find the entry in the hash table */
   h=inthash(tptr, key);
   for (node=tptr->bucket[h]; node!=NULL; node=node->next) {
     if (node->key == key)
       break;
   }
 
   /* return the entry if it exists, or HASH_FAIL */
   return(node ? node->data : HASH_FAIL);
 }
 
 
 /*
  *  inthash_keys() - Return a list of keys.
  *  NOTE: the returned list must be freed with free(3).
  */
 int *inthash_keys(inthash_t *tptr) {
 
   int *keys;
   inthash_node_t *node;
 
   keys = (int *)calloc(tptr->entries, sizeof(int));
 
   for (int i=0; i < tptr->size; ++i) {
     for (node=tptr->bucket[i]; node != NULL; node=node->next) {
       keys[node->data] = node->key;
     }
   }
 
   return keys;
 }
 
 /*
  *  inthash_insert() - Insert an entry into the hash table.  If the entry already
  *  exists return a pointer to it, otherwise return HASH_FAIL.
  *
  *  tptr: A pointer to the hash table
  *  key: The key to insert into the hash table
  *  data: A pointer to the data to insert into the hash table
  */
 int inthash_insert(inthash_t *tptr, int key, int data) {
   int tmp;
   inthash_node_t *node;
   int h;
 
   /* check to see if the entry exists */
   if ((tmp=inthash_lookup(tptr, key)) != HASH_FAIL)
     return(tmp);
 
   /* expand the table if needed */
   while (tptr->entries>=HASH_LIMIT*tptr->size)
     rebuild_table_int(tptr);
 
   /* insert the new entry */
   h=inthash(tptr, key);
   node=(struct inthash_node_t *) malloc(sizeof(inthash_node_t));
   node->data=data;
   node->key=key;
   node->next=tptr->bucket[h];
   tptr->bucket[h]=node;
   tptr->entries++;
 
   return HASH_FAIL;
 }
 
 /*
  * inthash_destroy() - Delete the entire table, and all remaining entries.
  *
  */
 void inthash_destroy(inthash_t *tptr) {
   inthash_node_t *node, *last;
   int i;
 
   for (i=0; i<tptr->size; i++) {
     node = tptr->bucket[i];
     while (node != NULL) {
       last = node;
       node = node->next;
       free(last);
     }
   }
 
   /* free the entire array of buckets */
   if (tptr->bucket != NULL) {
     free(tptr->bucket);
     memset(tptr, 0, sizeof(inthash_t));
   }
 }
 
 /************************************************************************
  * integer list sort code:
  ************************************************************************/
 
 /* sort for integer map. initial call  id_sort(idmap, 0, natoms - 1); */
 static void id_sort(int *idmap, int left, int right)
 {
   int pivot, l_hold, r_hold;
 
   l_hold = left;
   r_hold = right;
   pivot = idmap[left];
 
   while (left < right) {
     while ((idmap[right] >= pivot) && (left < right))
       right--;
     if (left != right) {
       idmap[left] = idmap[right];
       left++;
     }
     while ((idmap[left] <= pivot) && (left < right))
       left++;
     if (left != right) {
       idmap[right] = idmap[left];
       right--;
     }
   }
   idmap[left] = pivot;
   pivot = left;
   left = l_hold;
   right = r_hold;
 
   if (left < pivot)
     id_sort(idmap, left, pivot-1);
   if (right > pivot)
     id_sort(idmap, pivot+1, right);
 }
 
 /********** API definitions of the VMD/NAMD code ************************
  * This code was taken and adapted from VMD-1.8.7/NAMD-2.7 in Sep 2009. *
  * If there are any bugs or problems, please contact akohlmey@gmail.com *
  ************************************************************************/
 
 /***************************************************************************
  *cr
  *cr            (C) Copyright 1995-2009 The Board of Trustees of the
  *cr                        University of Illinois
  *cr                         All Rights Reserved
  *cr
  ***************************************************************************/
 
 /* part 1: Interactive MD (IMD) API */
 
 #include <limits.h>
 
 #if ( INT_MAX == 2147483647 )
 typedef int     int32;
 #else
 typedef short   int32;
 #endif
 
 typedef struct {
   int32 type;
   int32 length;
 } IMDheader;
 
 #define IMDHEADERSIZE 8
 #define IMDVERSION 2
 
 typedef enum IMDType_t {
   IMD_DISCONNECT,   /**< close IMD connection, leaving sim running */
   IMD_ENERGIES,     /**< energy data block                         */
   IMD_FCOORDS,      /**< atom coordinates                          */
   IMD_GO,           /**< start the simulation                      */
   IMD_HANDSHAKE,    /**< endianism and version check message       */
   IMD_KILL,         /**< kill the simulation job, shutdown IMD     */
   IMD_MDCOMM,       /**< MDComm style force data                   */
   IMD_PAUSE,        /**< pause the running simulation              */
   IMD_TRATE,        /**< set IMD update transmission rate          */
   IMD_IOERROR       /**< indicate an I/O error                     */
 } IMDType;          /**< IMD command message type enumerations */
 
 typedef struct {
   int32 tstep;      /**< integer timestep index                    */
   float T;          /**< Temperature in degrees Kelvin             */
   float Etot;       /**< Total energy, in Kcal/mol                 */
   float Epot;       /**< Potential energy, in Kcal/mol             */
   float Evdw;       /**< Van der Waals energy, in Kcal/mol         */
   float Eelec;      /**< Electrostatic energy, in Kcal/mol         */
   float Ebond;      /**< Bond energy, Kcal/mol                     */
   float Eangle;     /**< Angle energy, Kcal/mol                    */
   float Edihe;      /**< Dihedral energy, Kcal/mol                 */
   float Eimpr;      /**< Improper energy, Kcal/mol                 */
 } IMDEnergies;      /**< IMD simulation energy report structure    */
 
 /** Send control messages - these consist of a header with no subsequent data */
 static int imd_handshake(void *);    /**< check endianness, version compat */
 /** Receive header and data */
 static IMDType imd_recv_header(void *, int32 *);
 /** Receive MDComm-style forces, units are Kcal/mol/angstrom */
 static int imd_recv_mdcomm(void *, int32, int32 *, float *);
 /** Receive energies */
 static int imd_recv_energies(void *, IMDEnergies *);
 /** Receive atom coordinates. */
 static int imd_recv_fcoords(void *, int32, float *);
 /** Prepare IMD data packet header */
 static void imd_fill_header(IMDheader *header, IMDType type, int32 length);
 /** Write data to socket */
 static int32 imd_writen(void *s, const char *ptr, int32 n);
 
 /* part 2: abstracts platform-dependent routines/APIs for using sockets */
 
 typedef struct {
   struct sockaddr_in addr; /* address of socket provided by bind() */
   int addrlen;             /* size of the addr struct */
   int sd;                  /* socket file descriptor */
 } imdsocket;
 
 static int   imdsock_init(void);
 static void *imdsock_create(void);
 static int   imdsock_bind(void *, int);
 static int   imdsock_listen(void *);
 static void *imdsock_accept(void *);  /* return new socket */
 static int   imdsock_write(void *, const void *, int);
 static int   imdsock_read(void *, void *, int);
 static int   imdsock_selread(void *, int);
 static int   imdsock_selwrite(void *, int);
 static void  imdsock_shutdown(void *);
 static void  imdsock_destroy(void *);
 
 /***************************************************************
  * End of API definitions of the VMD/NAMD code.                *
  * The implementation follows at the end of the file.          *
  ***************************************************************/
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
 /* struct for packed data communication of coordinates and forces. */
 struct commdata {
   int tag;
   float x,y,z;
 };
 
 /***************************************************************
  * create class and parse arguments in LAMMPS script. Syntax:
  * fix ID group-ID imd <imd_trate> <imd_port> [unwrap (on|off)] [fscale <imd_fscale>]
  ***************************************************************/
 FixIMD::FixIMD(LAMMPS *lmp, int narg, char **arg) :
   Fix(lmp, narg, arg)
 {
   if (narg < 4)
     error->all(FLERR,"Illegal fix imd command");
 
   imd_port = force->inumeric(FLERR,arg[3]);
   if (imd_port < 1024)
     error->all(FLERR,"Illegal fix imd parameter: port < 1024");
 
   /* default values for optional flags */
   unwrap_flag = 0;
   nowait_flag = 0;
   connect_msg = 1;
   imd_fscale = 1.0;
   imd_trate = 1;
 
   /* parse optional arguments */
   int argsdone = 4;
   while (argsdone+1 < narg) {
     if (0 == strcmp(arg[argsdone], "unwrap")) {
       if (0 == strcmp(arg[argsdone+1], "on")) {
         unwrap_flag = 1;
       } else {
         unwrap_flag = 0;
       }
     } else if (0 == strcmp(arg[argsdone], "nowait")) {
       if (0 == strcmp(arg[argsdone+1], "on")) {
         nowait_flag = 1;
       } else {
         nowait_flag = 0;
       }
     } else if (0 == strcmp(arg[argsdone], "fscale")) {
       imd_fscale = force->numeric(FLERR,arg[argsdone+1]);
     } else if (0 == strcmp(arg[argsdone], "trate")) {
       imd_trate = force->inumeric(FLERR,arg[argsdone+1]);
     } else {
       error->all(FLERR,"Unknown fix imd parameter");
     }
     ++argsdone; ++argsdone;
   }
 
   /* sanity check on parameters */
   if (imd_trate < 1)
     error->all(FLERR,"Illegal fix imd parameter. trate < 1.");
 
   bigint n = group->count(igroup);
   if (n > MAXSMALLINT) error->all(FLERR,"Too many atoms for fix imd");
   num_coords = static_cast<int> (n);
 
   MPI_Comm_rank(world,&me);
 
   /* initialize various imd state variables. */
   clientsock = NULL;
   localsock  = NULL;
   nlevels_respa = 0;
   imd_inactive = 0;
   imd_terminate = 0;
   imd_forces = 0;
   force_buf = NULL;
   maxbuf = 0;
   msgdata = NULL;
   msglen = 0;
   comm_buf = NULL;
   idmap = NULL;
   rev_idmap = NULL;
 
   if (me == 0) {
     /* set up incoming socket on MPI rank 0. */
     imdsock_init();
     localsock = imdsock_create();
     clientsock = NULL;
     if (imdsock_bind(localsock,imd_port)) {
       perror("bind to socket failed");
       imdsock_destroy(localsock);
       imd_terminate = 1;
     } else {
       imdsock_listen(localsock);
     }
   }
   MPI_Bcast(&imd_terminate, 1, MPI_INT, 0, world);
   if (imd_terminate)
     error->all(FLERR,"LAMMPS Terminated on error in IMD.");
 
   /* storage required to communicate a single coordinate or force. */
   size_one = sizeof(struct commdata);
 
 #if defined(LAMMPS_ASYNC_IMD)
   /* set up for i/o worker thread on MPI rank 0.*/
   if (me == 0) {
     if (screen)
       fputs("Using fix imd with asynchronous I/O.\n",screen);
     if (logfile)
       fputs("Using fix imd with asynchronous I/O.\n",logfile);
 
     /* set up mutex and condition variable for i/o thread */
     /* hold mutex before creating i/o thread to keep it waiting. */
     pthread_mutex_init(&read_mutex, NULL);
     pthread_mutex_init(&write_mutex, NULL);
     pthread_cond_init(&write_cond, NULL);
 
     pthread_mutex_lock(&write_mutex);
     buf_has_data=0;
     pthread_mutex_unlock(&write_mutex);
 
     /* set up and launch i/o thread */
     pthread_attr_init(&iot_attr);
     pthread_attr_setdetachstate(&iot_attr, PTHREAD_CREATE_JOINABLE);
     pthread_create(&iothread, &iot_attr, &fix_imd_ioworker, this);
   }
 #endif
 }
 
 /*********************************
  * Clean up on deleting the fix. *
  *********************************/
 FixIMD::~FixIMD()
 {
 
 #if defined(LAMMPS_ASYNC_IMD)
   if (me == 0) {
     pthread_mutex_lock(&write_mutex);
     buf_has_data=-1;
     pthread_cond_signal(&write_cond);
     pthread_mutex_unlock(&write_mutex);
     pthread_join(iothread, NULL);
 
     /* cleanup */
     pthread_attr_destroy(&iot_attr);
     pthread_mutex_destroy(&write_mutex);
     pthread_cond_destroy(&write_cond);
   }
 #endif
 
   inthash_t *hashtable = (inthash_t *)idmap;
   memory->destroy(comm_buf);
   memory->destroy(force_buf);
   inthash_destroy(hashtable);
   delete hashtable;
   free(rev_idmap);
   // close sockets
   imdsock_shutdown(clientsock);
   imdsock_destroy(clientsock);
   imdsock_shutdown(localsock);
   imdsock_destroy(localsock);
   clientsock=NULL;
   localsock=NULL;
   return;
 }
 
 /* ---------------------------------------------------------------------- */
 int FixIMD::setmask()
 {
   int mask = 0;
   mask |= POST_FORCE;
   mask |= POST_FORCE_RESPA;
   return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 void FixIMD::init()
 {
   if (strstr(update->integrate_style,"respa"))
     nlevels_respa = ((Respa *) update->integrate)->nlevels;
 
   return;
 }
 
 /* ---------------------------------------------------------------------- */
 
 /* (re-)connect to an IMD client (e.g. VMD). return 1 if
    new connection was made, 0 if not. */
 int FixIMD::reconnect()
 {
   /* set up IMD communication, but only if needed. */
   imd_inactive = 0;
   imd_terminate = 0;
 
   if (me == 0) {
     if (clientsock) return 1;
-    if (screen && connect_msg)
-      if (nowait_flag)
+    if (screen && connect_msg) {
+      if (nowait_flag) {
         fprintf(screen,"Listening for IMD connection on port %d. Transfer rate %d.\n",imd_port, imd_trate);
-      else
+      } else {
         fprintf(screen,"Waiting for IMD connection on port %d. Transfer rate %d.\n",imd_port, imd_trate);
-
+      }
+    }
     connect_msg = 0;
     clientsock = NULL;
     if (nowait_flag) {
       int retval = imdsock_selread(localsock,0);
       if (retval > 0) {
         clientsock = imdsock_accept(localsock);
       } else {
         imd_inactive = 1;
         return 0;
       }
     } else {
       int retval=0;
       do {
         retval = imdsock_selread(localsock, 60);
       } while (retval <= 0);
       clientsock = imdsock_accept(localsock);
     }
 
     if (!imd_inactive && !clientsock) {
       if (screen)
         fprintf(screen, "IMD socket accept error. Dropping connection.\n");
       imd_terminate = 1;
       return 0;
     } else {
       /* check endianness and IMD protocol version. */
       if (imd_handshake(clientsock)) {
         if (screen)
           fprintf(screen, "IMD handshake error. Dropping connection.\n");
         imdsock_destroy(clientsock);
         imd_terminate = 1;
         return 0;
       } else {
         int32 length;
         if (imdsock_selread(clientsock, 1) != 1 ||
             imd_recv_header(clientsock, &length) != IMD_GO) {
           if (screen)
             fprintf(screen, "Incompatible IMD client version? Dropping connection.\n");
           imdsock_destroy(clientsock);
           imd_terminate = 1;
           return 0;
         } else {
           return 1;
         }
       }
     }
   }
   return 0;
 }
 
 /* ---------------------------------------------------------------------- */
 /* wait for IMD client (e.g. VMD) to respond, initialize communication
  * buffers and collect tag/id maps. */
 void FixIMD::setup(int)
 {
   /* nme:    number of atoms in group on this MPI task
    * nmax:   max number of atoms in group across all MPI tasks
    * nlocal: all local atoms
    */
   int i,j;
   int nmax,nme,nlocal;
   int *mask  = atom->mask;
   int *tag  = atom->tag;
   nlocal = atom->nlocal;
   nme=0;
   for (i=0; i < nlocal; ++i)
     if (mask[i] & groupbit) ++nme;
 
   MPI_Allreduce(&nme,&nmax,1,MPI_INT,MPI_MAX,world);
   memory->destroy(comm_buf);
   maxbuf = nmax*size_one;
   comm_buf = (void *) memory->smalloc(maxbuf,"imd:comm_buf");
 
   connect_msg = 1;
   reconnect();
   MPI_Bcast(&imd_inactive, 1, MPI_INT, 0, world);
   MPI_Bcast(&imd_terminate, 1, MPI_INT, 0, world);
   if (imd_terminate)
     error->all(FLERR,"LAMMPS terminated on error in setting up IMD connection.");
 
   /* initialize and build hashtable. */
   inthash_t *hashtable=new inthash_t;
   inthash_init(hashtable, num_coords);
   idmap = (void *)hashtable;
 
   MPI_Status status;
   MPI_Request request;
   int tmp, ndata;
   struct commdata *buf = static_cast<struct commdata *>(comm_buf);
 
   if (me == 0) {
     int *taglist = new int[num_coords];
     int numtag=0; /* counter to map atom tags to a 0-based consecutive index list */
 
     for (i=0; i < nlocal; ++i) {
       if (mask[i] & groupbit) {
         taglist[numtag] = tag[i];
         ++numtag;
       }
     }
 
     /* loop over procs to receive remote data */
     for (i=1; i < comm->nprocs; ++i) {
       MPI_Irecv(comm_buf, maxbuf, MPI_BYTE, i, 0, world, &request);
       MPI_Send(&tmp, 0, MPI_INT, i, 0, world);
       MPI_Wait(&request, &status);
       MPI_Get_count(&status, MPI_BYTE, &ndata);
       ndata /= size_one;
 
       for (j=0; j < ndata; ++j) {
         taglist[numtag] = buf[j].tag;
         ++numtag;
       }
     }
 
     /* sort list of tags by value to have consistently the
      * same list when running in parallel and build hash table. */
     id_sort(taglist, 0, num_coords-1);
     for (i=0; i < num_coords; ++i) {
       inthash_insert(hashtable, taglist[i], i);
     }
     delete[] taglist;
 
     /* generate reverse index-to-tag map for communicating
      * IMD forces back to the proper atoms */
     rev_idmap=inthash_keys(hashtable);
   } else {
     nme=0;
     for (i=0; i < nlocal; ++i) {
       if (mask[i] & groupbit) {
         buf[nme].tag = tag[i];
         ++nme;
       }
     }
     /* blocking receive to wait until it is our turn to send data. */
     MPI_Recv(&tmp, 0, MPI_INT, 0, 0, world, &status);
     MPI_Rsend(comm_buf, nme*size_one, MPI_BYTE, 0, 0, world);
   }
 
   return;
 }
 
 /* worker threads for asynchronous i/o */
 #if defined(LAMMPS_ASYNC_IMD)
 /* c bindings wrapper */
 void *fix_imd_ioworker(void *t)
 {
   FixIMD *imd=(FixIMD *)t;
   imd->ioworker();
   return NULL;
 }
 
 /* the real i/o worker thread */
 void FixIMD::ioworker()
 {
   while (1) {
     pthread_mutex_lock(&write_mutex);
     if (buf_has_data < 0) {
       /* master told us to go away */
       fprintf(screen,"Asynchronous I/O thread is exiting.\n");
       buf_has_data=0;
       pthread_mutex_unlock(&write_mutex);
       pthread_exit(NULL);
     } else if (buf_has_data > 0) {
       /* send coordinate data, if client is able to accept */
       if (clientsock && imdsock_selwrite(clientsock,0)) {
         imd_writen(clientsock, msgdata, msglen);
       }
       delete[] msgdata;
       buf_has_data=0;
       pthread_mutex_unlock(&write_mutex);
     } else {
       /* nothing to write out yet. wait on condition. */
       pthread_cond_wait(&write_cond, &write_mutex);
       pthread_mutex_unlock(&write_mutex);
     }
   }
 }
 #endif
 
 /* ---------------------------------------------------------------------- */
 /* Main IMD protocol handler:
  * Send coodinates, energies, and add IMD forces to atoms. */
 void FixIMD::post_force(int vflag)
 {
   /* check for reconnect */
   if (imd_inactive) {
     reconnect();
     MPI_Bcast(&imd_inactive, 1, MPI_INT, 0, world);
     MPI_Bcast(&imd_terminate, 1, MPI_INT, 0, world);
     if (imd_terminate)
       error->all(FLERR,"LAMMPS terminated on error in setting up IMD connection.");
     if (imd_inactive)
       return;     /* IMD client has detached and not yet come back. do nothing. */
   }
 
   int *tag = atom->tag;
   double **x = atom->x;
   tagint *image = atom->image;
   int nlocal = atom->nlocal;
   int *mask  = atom->mask;
   struct commdata *buf;
 
   if (me == 0) {
     /* process all pending incoming data. */
     int imd_paused=0;
     while ((imdsock_selread(clientsock, 0) > 0) || imd_paused) {
       /* if something requested to turn off IMD while paused get out */
       if (imd_inactive) break;
 
       int32 length;
       int msg = imd_recv_header(clientsock, &length);
 
       switch(msg) {
 
         case IMD_GO:
           if (screen)
             fprintf(screen, "Ignoring unexpected IMD_GO message.\n");
           break;
 
         case IMD_IOERROR:
           if (screen)
             fprintf(screen, "IMD connection lost.\n");
           /* fallthrough */
 
         case IMD_DISCONNECT: {
           /* disconnect from client. wait for new connection. */
           imd_paused = 0;
           imd_forces = 0;
           memory->destroy(force_buf);
           force_buf = NULL;
           imdsock_destroy(clientsock);
           clientsock = NULL;
           if (screen)
             fprintf(screen, "IMD client detached. LAMMPS run continues.\n");
 
           connect_msg = 1;
           reconnect();
           if (imd_terminate) imd_inactive = 1;
           break;
         }
 
         case IMD_KILL:
           /* stop the simulation job and shutdown IMD */
           if (screen)
             fprintf(screen, "IMD client requested termination of run.\n");
           imd_inactive = 1;
           imd_terminate = 1;
           imd_paused = 0;
           imdsock_destroy(clientsock);
           clientsock = NULL;
           break;
 
         case IMD_PAUSE:
           /* pause the running simulation. wait for second IMD_PAUSE to continue. */
           if (imd_paused) {
             if (screen)
               fprintf(screen, "Continuing run on IMD client request.\n");
             imd_paused = 0;
           } else {
             if (screen)
               fprintf(screen, "Pausing run on IMD client request.\n");
             imd_paused = 1;
           }
           break;
 
         case IMD_TRATE:
           /* change the IMD transmission data rate */
           if (length > 0)
             imd_trate = length;
           if (screen)
             fprintf(screen, "IMD client requested change of transfer rate. Now it is %d.\n", imd_trate);
           break;
 
         case IMD_ENERGIES: {
           IMDEnergies dummy_energies;
           imd_recv_energies(clientsock, &dummy_energies);
           break;
         }
 
         case IMD_FCOORDS: {
           float *dummy_coords = new float[3*length];
           imd_recv_fcoords(clientsock, length, dummy_coords);
           delete[] dummy_coords;
           break;
         }
 
         case IMD_MDCOMM: {
           int32 *imd_tags = new int32[length];
           float *imd_fdat = new float[3*length];
           imd_recv_mdcomm(clientsock, length, imd_tags, imd_fdat);
 
           if (imd_forces < length) { /* grow holding space for forces, if needed. */
             memory->destroy(force_buf);
             force_buf = (void *) memory->smalloc(length*size_one,
                                                  "imd:force_buf");
           }
           imd_forces = length;
           buf = static_cast<struct commdata *>(force_buf);
 
           /* compare data to hash table */
           for (int ii=0; ii < length; ++ii) {
             buf[ii].tag = rev_idmap[imd_tags[ii]];
             buf[ii].x   = imd_fdat[3*ii];
             buf[ii].y   = imd_fdat[3*ii+1];
             buf[ii].z   = imd_fdat[3*ii+2];
           }
           delete[] imd_tags;
           delete[] imd_fdat;
           break;
         }
 
         default:
           if (screen)
             fprintf(screen, "Unhandled incoming IMD message #%d. length=%d\n", msg, length);
           break;
       }
     }
   }
 
   /* update all tasks with current settings. */
   int old_imd_forces = imd_forces;
   MPI_Bcast(&imd_trate, 1, MPI_INT, 0, world);
   MPI_Bcast(&imd_inactive, 1, MPI_INT, 0, world);
   MPI_Bcast(&imd_forces, 1, MPI_INT, 0, world);
   MPI_Bcast(&imd_terminate, 1, MPI_INT, 0, world);
   if (imd_terminate)
     error->all(FLERR,"LAMMPS terminated on IMD request.");
 
   if (imd_forces > 0) {
     /* check if we need to readjust the forces comm buffer on the receiving nodes. */
     if (me != 0) {
       if (old_imd_forces < imd_forces) { /* grow holding space for forces, if needed. */
         if (force_buf != NULL)
           memory->sfree(force_buf);
         force_buf = memory->smalloc(imd_forces*size_one, "imd:force_buf");
       }
     }
     MPI_Bcast(force_buf, imd_forces*size_one, MPI_BYTE, 0, world);
   }
 
   /* Check if we need to communicate coordinates to the client.
    * Tuning imd_trate allows to keep the overhead for IMD low
    * at the expense of a more jumpy display. Rather than using
    * end_of_step() we do everything here in one go.
    *
    * If we don't communicate, only check if we have forces
    * stored away and apply them. */
   if (update->ntimestep % imd_trate) {
     if (imd_forces > 0) {
       double **f = atom->f;
       buf = static_cast<struct commdata *>(force_buf);
 
       /* XXX. this is in principle O(N**2) == not good.
        * however we assume for now that the number of atoms
        * that we manipulate via IMD will be small compared
        * to the total system size, so we don't hurt too much. */
       for (int j=0; j < imd_forces; ++j) {
         for (int i=0; i < nlocal; ++i) {
           if (mask[i] & groupbit) {
             if (buf[j].tag == tag[i]) {
               f[i][0] += imd_fscale*buf[j].x;
               f[i][1] += imd_fscale*buf[j].y;
               f[i][2] += imd_fscale*buf[j].z;
             }
           }
         }
       }
     }
     return;
   }
 
   /* check and potentially grow local communication buffers. */
   int i, k, nmax, nme=0;
   for (i=0; i < nlocal; ++i)
     if (mask[i] & groupbit) ++nme;
 
   MPI_Allreduce(&nme,&nmax,1,MPI_INT,MPI_MAX,world);
   if (nmax*size_one > maxbuf) {
     memory->destroy(comm_buf);
     maxbuf = nmax*size_one;
     comm_buf = memory->smalloc(maxbuf,"imd:comm_buf");
   }
 
   MPI_Status status;
   MPI_Request request;
   int tmp, ndata;
   buf = static_cast<struct commdata *>(comm_buf);
 
   if (me == 0) {
     /* collect data into new array. we bypass the IMD API to save
      * us one extra copy of the data. */
     msglen = 3*sizeof(float)*num_coords+IMDHEADERSIZE;
     msgdata = new char[msglen];
     imd_fill_header((IMDheader *)msgdata, IMD_FCOORDS, num_coords);
     /* array pointer, to the offset where we receive the coordinates. */
     float *recvcoord = (float *) (msgdata+IMDHEADERSIZE);
 
     /* add local data */
     if (unwrap_flag) {
       double xprd = domain->xprd;
       double yprd = domain->yprd;
       double zprd = domain->zprd;
       double xy = domain->xy;
       double xz = domain->xz;
       double yz = domain->yz;
 
       for (i=0; i<nlocal; ++i) {
         if (mask[i] & groupbit) {
           const int j = 3*inthash_lookup((inthash_t *)idmap, tag[i]);
           if (j != HASH_FAIL) {
             int ix = (image[i] & IMGMASK) - IMGMAX;
             int iy = (image[i] >> IMGBITS & IMGMASK) - IMGMAX;
             int iz = (image[i] >> IMG2BITS) - IMGMAX;
 
             if (domain->triclinic) {
               recvcoord[j]   = x[i][0] + ix * xprd + iy * xy + iz * xz;
               recvcoord[j+1] = x[i][1] + iy * yprd + iz * yz;
               recvcoord[j+2] = x[i][2] + iz * zprd;
             } else {
               recvcoord[j]   = x[i][0] + ix * xprd;
               recvcoord[j+1] = x[i][1] + iy * yprd;
               recvcoord[j+2] = x[i][2] + iz * zprd;
             }
           }
         }
       }
     } else {
       for (i=0; i<nlocal; ++i) {
         if (mask[i] & groupbit) {
           const int j = 3*inthash_lookup((inthash_t *)idmap, tag[i]);
           if (j != HASH_FAIL) {
             recvcoord[j]   = x[i][0];
             recvcoord[j+1] = x[i][1];
             recvcoord[j+2] = x[i][2];
           }
         }
       }
     }
 
     /* loop over procs to receive remote data */
     for (i=1; i < comm->nprocs; ++i) {
       MPI_Irecv(comm_buf, maxbuf, MPI_BYTE, i, 0, world, &request);
       MPI_Send(&tmp, 0, MPI_INT, i, 0, world);
       MPI_Wait(&request, &status);
       MPI_Get_count(&status, MPI_BYTE, &ndata);
       ndata /= size_one;
 
       for (k=0; k<ndata; ++k) {
         const int j = 3*inthash_lookup((inthash_t *)idmap, buf[k].tag);
         if (j != HASH_FAIL) {
           recvcoord[j]   = buf[k].x;
           recvcoord[j+1] = buf[k].y;
           recvcoord[j+2] = buf[k].z;
         }
       }
     }
 
     /* done collecting frame data now communicate with IMD client. */
 
 #if defined(LAMMPS_ASYNC_IMD)
     /* wake up i/o worker thread and release lock on i/o buffer
      * we can go back to our MD and let the i/o thread do the rest */
     buf_has_data=1;
     pthread_cond_signal(&write_cond);
     pthread_mutex_unlock(&write_mutex);
 #else
     /* send coordinate data, if client is able to accept */
     if (clientsock && imdsock_selwrite(clientsock,0)) {
       imd_writen(clientsock, msgdata, msglen);
     }
     delete[] msgdata;
 #endif
 
   } else {
     /* copy coordinate data into communication buffer */
     nme = 0;
     if (unwrap_flag) {
       double xprd = domain->xprd;
       double yprd = domain->yprd;
       double zprd = domain->zprd;
       double xy = domain->xy;
       double xz = domain->xz;
       double yz = domain->yz;
 
       for (i=0; i<nlocal; ++i) {
         if (mask[i] & groupbit) {
           int ix = (image[i] & IMGMASK) - IMGMAX;
           int iy = (image[i] >> IMGBITS & IMGMASK) - IMGMAX;
           int iz = (image[i] >> IMG2BITS) - IMGMAX;
 
           if (domain->triclinic) {
             buf[nme].tag = tag[i];
             buf[nme].x   = x[i][0] + ix * xprd + iy * xy + iz * xz;
             buf[nme].y   = x[i][1] + iy * yprd + iz * yz;
             buf[nme].z   = x[i][2] + iz * zprd;
           } else {
             buf[nme].tag = tag[i];
             buf[nme].x   = x[i][0] + ix * xprd;
             buf[nme].y   = x[i][1] + iy * yprd;
             buf[nme].z   = x[i][2] + iz * zprd;
           }
           ++nme;
         }
       }
     } else {
       for (i=0; i<nlocal; ++i) {
         if (mask[i] & groupbit) {
           buf[nme].tag = tag[i];
           buf[nme].x   = x[i][0];
           buf[nme].y   = x[i][1];
           buf[nme].z   = x[i][2];
           ++nme;
         }
       }
     }
     /* blocking receive to wait until it is our turn to send data. */
     MPI_Recv(&tmp, 0, MPI_INT, 0, 0, world, &status);
     MPI_Rsend(comm_buf, nme*size_one, MPI_BYTE, 0, 0, world);
   }
 
   return;
 }
 
 /* ---------------------------------------------------------------------- */
 void FixIMD::post_force_respa(int vflag, int ilevel, int iloop)
 {
   /* only process IMD on the outmost RESPA level. */
   if (ilevel == nlevels_respa-1) post_force(vflag);
   return;
 }
 
 /* ---------------------------------------------------------------------- */
 /* local memory usage. approximately. */
 double FixIMD::memory_usage(void)
 {
   return static_cast<double>(num_coords+maxbuf+imd_forces)*size_one;
 }
 
 /* End of FixIMD class implementation. */
 
 /***************************************************************************/
 
 /* NOTE: the following code is the based on the example implementation
  * of the IMD protocol API from VMD and NAMD. The UIUC license allows
  * to re-use up to 10% of a project's code to be used in other software */
 
 /***************************************************************************
  * DESCRIPTION:
  *   Socket interface, abstracts machine dependent APIs/routines.
  ***************************************************************************/
 
 int imdsock_init(void) {
 #if defined(_MSC_VER) || defined(__MINGW32__)
   int rc = 0;
   static int initialized=0;
 
   if (!initialized) {
     WSADATA wsdata;
     rc = WSAStartup(MAKEWORD(1,1), &wsdata);
     if (rc == 0)
       initialized = 1;
   }
 
   return rc;
 #else
   return 0;
 #endif
 }
 
 
 void * imdsock_create(void) {
   imdsocket * s;
 
   s = (imdsocket *) malloc(sizeof(imdsocket));
   if (s != NULL)
     memset(s, 0, sizeof(imdsocket));
 
   if ((s->sd = socket(PF_INET, SOCK_STREAM, 0)) == -1) {
     printf("Failed to open socket.");
     free(s);
     return NULL;
   }
 
   return (void *) s;
 }
 
 int imdsock_bind(void * v, int port) {
   imdsocket *s = (imdsocket *) v;
   memset(&(s->addr), 0, sizeof(s->addr));
   s->addr.sin_family = PF_INET;
   s->addr.sin_port = htons(port);
 
   return bind(s->sd, (struct sockaddr *) &s->addr, sizeof(s->addr));
 }
 
 int imdsock_listen(void * v) {
   imdsocket *s = (imdsocket *) v;
   return listen(s->sd, 5);
 }
 
 void *imdsock_accept(void * v) {
   int rc;
   imdsocket *new_s = NULL, *s = (imdsocket *) v;
 #if defined(ARCH_AIX5) || defined(ARCH_AIX5_64) || defined(ARCH_AIX6_64)
   unsigned int len;
 #define _SOCKLEN_TYPE unsigned int
 #elif defined(SOCKLEN_T)
   SOCKLEN_T len;
 #define _SOCKLEN_TYPE SOCKLEN_T
 #elif defined(_POSIX_SOURCE) || (defined(__APPLE__) && defined(__MACH__)) || defined(__linux)
   socklen_t len;
 #define _SOCKLEN_TYPE socklen_t
 #else
 #define _SOCKLEN_TYPE int
   int len;
 #endif
 
   len = sizeof(s->addr);
   rc = accept(s->sd, (struct sockaddr *) &s->addr, ( _SOCKLEN_TYPE * ) &len);
   if (rc >= 0) {
     new_s = (imdsocket *) malloc(sizeof(imdsocket));
     if (new_s != NULL) {
       *new_s = *s;
       new_s->sd = rc;
     }
   }
   return (void *)new_s;
 }
 
 int  imdsock_write(void * v, const void *buf, int len) {
   imdsocket *s = (imdsocket *) v;
 #if defined(_MSC_VER) || defined(__MINGW32__)
   return send(s->sd, (const char*) buf, len, 0);  /* windows lacks the write() call */
 #else
   return write(s->sd, buf, len);
 #endif
 }
 
 int  imdsock_read(void * v, void *buf, int len) {
   imdsocket *s = (imdsocket *) v;
 #if defined(_MSC_VER) || defined(__MINGW32__)
   return recv(s->sd, (char*) buf, len, 0); /* windows lacks the read() call */
 #else
   return read(s->sd, buf, len);
 #endif
 
 }
 
 void imdsock_shutdown(void *v) {
   imdsocket * s = (imdsocket *) v;
   if (s == NULL)
     return;
 
 #if defined(_MSC_VER) || defined(__MINGW32__)
   shutdown(s->sd, SD_SEND);
 #else
   shutdown(s->sd, 1);  /* complete sends and send FIN */
 #endif
 }
 
 void imdsock_destroy(void * v) {
   imdsocket * s = (imdsocket *) v;
   if (s == NULL)
     return;
 
 #if defined(_MSC_VER) || defined(__MINGW32__)
   closesocket(s->sd);
 #else
   close(s->sd);
 #endif
   free(s);
 }
 
 int imdsock_selread(void *v, int sec) {
   imdsocket *s = (imdsocket *)v;
   fd_set rfd;
   struct timeval tv;
   int rc;
 
   if (v == NULL) return 0;
 
   FD_ZERO(&rfd);
   FD_SET(s->sd, &rfd);
   memset((void *)&tv, 0, sizeof(struct timeval));
   tv.tv_sec = sec;
   do {
     rc = select(s->sd+1, &rfd, NULL, NULL, &tv);
   } while (rc < 0 && errno == EINTR);
   return rc;
 
 }
 
 int imdsock_selwrite(void *v, int sec) {
   imdsocket *s = (imdsocket *)v;
   fd_set wfd;
   struct timeval tv;
   int rc;
 
   if (v == NULL) return 0;
 
   FD_ZERO(&wfd);
   FD_SET(s->sd, &wfd);
   memset((void *)&tv, 0, sizeof(struct timeval));
   tv.tv_sec = sec;
   do {
     rc = select(s->sd + 1, NULL, &wfd, NULL, &tv);
   } while (rc < 0 && errno == EINTR);
   return rc;
 }
 
 /* end of socket code. */
 /*************************************************************************/
 
 /*************************************************************************/
 /* start of imd API code. */
 /* Only works with aligned 4-byte quantities, will cause a bus error */
 /* on some platforms if used on unaligned data.                      */
 void swap4_aligned(void *v, long ndata) {
   int *data = (int *) v;
   long i;
   int *N;
   for (i=0; i<ndata; i++) {
     N = data + i;
     *N=(((*N>>24)&0xff) | ((*N&0xff)<<24) |
         ((*N>>8)&0xff00) | ((*N&0xff00)<<8));
   }
 }
 
 
 /** structure used to perform byte swapping operations */
 typedef union {
   int32 i;
   struct {
     unsigned int highest : 8;
     unsigned int high    : 8;
     unsigned int low     : 8;
     unsigned int lowest  : 8;
   } b;
 } netint;
 
 static int32 imd_htonl(int32 h) {
   netint n;
   n.b.highest = h >> 24;
   n.b.high    = h >> 16;
   n.b.low     = h >> 8;
   n.b.lowest  = h;
   return n.i;
 }
 
 static int32 imd_ntohl(int32 n) {
   netint u;
   u.i = n;
   return (u.b.highest << 24 | u.b.high << 16 | u.b.low << 8 | u.b.lowest);
 }
 
 static void imd_fill_header(IMDheader *header, IMDType type, int32 length) {
   header->type = imd_htonl((int32)type);
   header->length = imd_htonl(length);
 }
 
 static void swap_header(IMDheader *header) {
   header->type = imd_ntohl(header->type);
   header->length= imd_ntohl(header->length);
 }
 
 static int32 imd_readn(void *s, char *ptr, int32 n) {
   int32 nleft;
   int32 nread;
 
   nleft = n;
   while (nleft > 0) {
     if ((nread = imdsock_read(s, ptr, nleft)) < 0) {
       if (errno == EINTR)
         nread = 0;         /* and call read() again */
       else
         return -1;
     } else if (nread == 0)
       break;               /* EOF */
     nleft -= nread;
     ptr += nread;
   }
   return n-nleft;
 }
 
 static int32 imd_writen(void *s, const char *ptr, int32 n) {
   int32 nleft;
   int32 nwritten;
 
   nleft = n;
   while (nleft > 0) {
     if ((nwritten = imdsock_write(s, ptr, nleft)) <= 0) {
       if (errno == EINTR)
         nwritten = 0;
       else
         return -1;
     }
     nleft -= nwritten;
     ptr += nwritten;
   }
   return n;
 }
 
 int imd_disconnect(void *s) {
   IMDheader header;
   imd_fill_header(&header, IMD_DISCONNECT, 0);
   return (imd_writen(s, (char *)&header, IMDHEADERSIZE) != IMDHEADERSIZE);
 }
 
 int imd_handshake(void *s) {
   IMDheader header;
   imd_fill_header(&header, IMD_HANDSHAKE, 1);
   header.length = IMDVERSION;   /* Not byteswapped! */
   return (imd_writen(s, (char *)&header, IMDHEADERSIZE) != IMDHEADERSIZE);
 }
 
 /* The IMD receive functions */
 
 IMDType imd_recv_header(void *s, int32 *length) {
   IMDheader header;
   if (imd_readn(s, (char *)&header, IMDHEADERSIZE) != IMDHEADERSIZE)
     return IMD_IOERROR;
   swap_header(&header);
   *length = header.length;
   return IMDType(header.type);
 }
 
 int imd_recv_mdcomm(void *s, int32 n, int32 *indices, float *forces) {
   if (imd_readn(s, (char *)indices, 4*n) != 4*n) return 1;
   if (imd_readn(s, (char *)forces, 12*n) != 12*n) return 1;
   return 0;
 }
 
 int imd_recv_energies(void *s, IMDEnergies *energies) {
   return (imd_readn(s, (char *)energies, sizeof(IMDEnergies))
           != sizeof(IMDEnergies));
 }
 
 int imd_recv_fcoords(void *s, int32 n, float *coords) {
   return (imd_readn(s, (char *)coords, 12*n) != 12*n);
 }
 
 // Local Variables:
 // mode: c++
 // compile-command: "make -j4 openmpi"
 // c-basic-offset: 2
 // fill-column: 76
 // indent-tabs-mode: nil
 // End:
diff --git a/src/USER-MISC/fix_spring_pull.h b/src/USER-MISC/fix_spring_pull.h
index 29617ff68..eed69cef3 100644
--- a/src/USER-MISC/fix_spring_pull.h
+++ b/src/USER-MISC/fix_spring_pull.h
@@ -1,54 +1,53 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef FIX_CLASS
 
 FixStyle(spring/pull,FixSpringPull)
 
 #else
 
 #ifndef LMP_FIX_SPRING_PULL_H
 #define LMP_FIX_SPRING_PULL_H
 
 #include "fix.h"
 
 namespace LAMMPS_NS {
 
 class FixSpringPull : public Fix {
  public:
   FixSpringPull(class LAMMPS *, int, char **);
   virtual int setmask();
   virtual void init();
   virtual void setup(int);
   virtual void min_setup(int);
   virtual void post_force(int);
   virtual void post_force_respa(int, int, int);
   virtual void min_post_force(int);
   virtual double compute_scalar();
   virtual double compute_vector(int);
 
  private:
   double xc,yc,zc,r0;
   double xv,yv,zv;
   double k_spring;
   int xflag,yflag,zflag;
   double masstotal;
   int nlevels_respa;
   double espring,ftotal[8];
-  int force_flag;
 };
 
 }
 
 #endif
 #endif
diff --git a/src/USER-MISC/pair_tersoff_table.cpp b/src/USER-MISC/pair_tersoff_table.cpp
index a0dd0fcb0..d5850ac81 100644
--- a/src/USER-MISC/pair_tersoff_table.cpp
+++ b/src/USER-MISC/pair_tersoff_table.cpp
@@ -1,1026 +1,1022 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Luca Ferraro (CASPUR)
    email: luca.ferraro@caspur.it
 
    Tersoff Potential
    References: (referenced as tersoff_2 functional form in LAMMPS manual)
     1) Tersoff, Phys. Rev. B 39, 5566 (1988)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_tersoff_table.h"
 #include "atom.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "force.h"
 #include "comm.h"
 #include "memory.h"
 
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define MAXLINE 1024
 #define DELTA 4
 
 #define GRIDSTART 0.1
 #define GRIDDENSITY_FCUTOFF 5000
 #define GRIDDENSITY_EXP 12000
 #define GRIDDENSITY_GTETA 12000
 #define GRIDDENSITY_BIJ 7500
 
 // max number of interaction per atom for environment potential
 
 #define leadingDimensionInteractionList 64
 
 /* ---------------------------------------------------------------------- */
 
 PairTersoffTable::PairTersoffTable(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
   one_coeff = 1;
   manybody_flag = 1;
 
   nelements = 0;
   elements = NULL;
   nparams = maxparam = 0;
   params = NULL;
   elem2param = NULL;
 }
 
 /* ----------------------------------------------------------------------
    check if allocated, since class can be destructed when incomplete
 ------------------------------------------------------------------------- */
 
 PairTersoffTable::~PairTersoffTable()
 {
   if (elements)
     for (int i = 0; i < nelements; i++) delete [] elements[i];
   delete [] elements;
   memory->destroy(params);
   memory->destroy(elem2param);
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
     delete [] map;
 
     deallocateGrids();
     deallocatePreLoops();
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairTersoffTable::compute(int eflag, int vflag)
 {
   int i,j,k,ii,inum,jnum;
   int itype,jtype,ktype,ijparam,ikparam,ijkparam;
   double xtmp,ytmp,ztmp;
   double fxtmp,fytmp,fztmp;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
 
   int interpolIDX;
   double r_ik_x, r_ik_y, r_ik_z;
   double directorCos_ij_x, directorCos_ij_y, directorCos_ij_z, directorCos_ik_x, directorCos_ik_y, directorCos_ik_z;
   double invR_ij, invR_ik, cosTeta;
   double repulsivePotential, attractivePotential;
   double exponentRepulsivePotential, exponentAttractivePotential,interpolTMP,interpolDeltaX,interpolY1;
   double interpolY2, cutoffFunctionIJ, attractiveExponential, repulsiveExponential, cutoffFunctionDerivedIJ,zeta;
   double gtetaFunctionIJK,gtetaFunctionDerivedIJK,cutoffFunctionIK;
   double cutoffFunctionDerivedIK,factor_force3_ij,factor_1_force3_ik;
   double factor_2_force3_ik,betaZetaPowerIJK,betaZetaPowerDerivedIJK,factor_force_tot;
   double factor_force_ij;
   double gtetaFunctionDerived_temp,gtetaFunction_temp;
 
   double evdwl = 0.0;
 
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int newton_pair = force->newton_pair;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over full neighbor list of my atoms
   for (ii = 0; ii < inum; ii++) {
 
     i = ilist[ii];
     itype = map[type[i]];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     fxtmp = fytmp = fztmp = 0.0;
 
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     if (jnum > leadingDimensionInteractionList) {
       char errmsg[256];
       sprintf(errmsg,"Too many neighbors for interaction list: %d vs %d.\n"
               "Check your system or increase 'leadingDimensionInteractionList'",
               jnum, leadingDimensionInteractionList);
       error->one(FLERR,errmsg);
     }
 
     // Pre-calculate gteta and cutoff function
     for (int neighbor_j = 0; neighbor_j < jnum; neighbor_j++) {
 
       double dr_ij[3], r_ij;
 
       j = jlist[neighbor_j];
       j &= NEIGHMASK;
 
       dr_ij[0] = xtmp - x[j][0];
       dr_ij[1] = ytmp - x[j][1];
       dr_ij[2] = ztmp - x[j][2];
       r_ij = dr_ij[0]*dr_ij[0] + dr_ij[1]*dr_ij[1] + dr_ij[2]*dr_ij[2];
 
       jtype = map[type[j]];
       ijparam = elem2param[itype][jtype][jtype];
 
       if (r_ij > params[ijparam].cutsq) continue;
 
       r_ij = sqrt(r_ij);
 
       invR_ij = 1.0 / r_ij;
 
       directorCos_ij_x = invR_ij * dr_ij[0];
       directorCos_ij_y = invR_ij * dr_ij[1];
       directorCos_ij_z = invR_ij * dr_ij[2];
 
       // preCutoffFunction
       interpolDeltaX =  r_ij - GRIDSTART;
       interpolTMP = (interpolDeltaX * GRIDDENSITY_FCUTOFF);
       interpolIDX = (int) interpolTMP;
       interpolY1 = cutoffFunction[itype][jtype][interpolIDX];
       interpolY2 = cutoffFunction[itype][jtype][interpolIDX+1];
       preCutoffFunction[neighbor_j] = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
       // preCutoffFunctionDerived
       interpolY1 = cutoffFunctionDerived[itype][jtype][interpolIDX];
       interpolY2 = cutoffFunctionDerived[itype][jtype][interpolIDX+1];
       preCutoffFunctionDerived[neighbor_j] = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
 
 
       for (int neighbor_k = neighbor_j + 1; neighbor_k < jnum; neighbor_k++) {
         double dr_ik[3], r_ik;
 
         k = jlist[neighbor_k];
         k &= NEIGHMASK;
         ktype = map[type[k]];
         ikparam = elem2param[itype][ktype][ktype];
         ijkparam = elem2param[itype][jtype][ktype];
 
         dr_ik[0] = xtmp -x[k][0];
         dr_ik[1] = ytmp -x[k][1];
         dr_ik[2] = ztmp -x[k][2];
         r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
 
         if (r_ik > params[ikparam].cutsq) continue;
 
         r_ik = sqrt(r_ik);
 
         invR_ik = 1.0 / r_ik;
 
         directorCos_ik_x = invR_ik * dr_ik[0];
         directorCos_ik_y = invR_ik * dr_ik[1];
         directorCos_ik_z = invR_ik * dr_ik[2];
 
         cosTeta = directorCos_ij_x * directorCos_ik_x + directorCos_ij_y * directorCos_ik_y + directorCos_ij_z * directorCos_ik_z;
 
         // preGtetaFunction
         interpolDeltaX=cosTeta+1.0;
         interpolTMP = (interpolDeltaX * GRIDDENSITY_GTETA);
         interpolIDX = (int) interpolTMP;
         interpolY1 = gtetaFunction[itype][interpolIDX];
         interpolY2 = gtetaFunction[itype][interpolIDX+1];
         gtetaFunction_temp = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
         // preGtetaFunctionDerived
         interpolY1 = gtetaFunctionDerived[itype][interpolIDX];
         interpolY2 = gtetaFunctionDerived[itype][interpolIDX+1];
         gtetaFunctionDerived_temp = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
 
         preGtetaFunction[neighbor_j][neighbor_k]=params[ijkparam].gamma*gtetaFunction_temp;
         preGtetaFunctionDerived[neighbor_j][neighbor_k]=params[ijkparam].gamma*gtetaFunctionDerived_temp;
         preGtetaFunction[neighbor_k][neighbor_j]=params[ijkparam].gamma*gtetaFunction_temp;
         preGtetaFunctionDerived[neighbor_k][neighbor_j]=params[ijkparam].gamma*gtetaFunctionDerived_temp;
 
       } // loop on K
 
     } // loop on J
 
 
     // loop over neighbours of atom i
     for (int neighbor_j = 0; neighbor_j < jnum; neighbor_j++) {
 
       double dr_ij[3], r_ij, f_ij[3];
 
       j = jlist[neighbor_j];
       j &= NEIGHMASK;
 
       dr_ij[0] = xtmp - x[j][0];
       dr_ij[1] = ytmp - x[j][1];
       dr_ij[2] = ztmp - x[j][2];
       r_ij = dr_ij[0]*dr_ij[0] + dr_ij[1]*dr_ij[1] + dr_ij[2]*dr_ij[2];
 
       jtype = map[type[j]];
       ijparam = elem2param[itype][jtype][jtype];
 
       if (r_ij > params[ijparam].cutsq) continue;
 
       r_ij = sqrt(r_ij);
       invR_ij = 1.0 / r_ij;
 
       directorCos_ij_x = invR_ij * dr_ij[0];
       directorCos_ij_y = invR_ij * dr_ij[1];
       directorCos_ij_z = invR_ij * dr_ij[2];
 
       exponentRepulsivePotential = params[ijparam].lam1 * r_ij;
       exponentAttractivePotential = params[ijparam].lam2 * r_ij;
 
       // repulsiveExponential
       interpolDeltaX =  exponentRepulsivePotential - minArgumentExponential;
       interpolTMP = (interpolDeltaX * GRIDDENSITY_EXP);
       interpolIDX = (int) interpolTMP;
       interpolY1 = exponential[interpolIDX];
       interpolY2 = exponential[interpolIDX+1];
       repulsiveExponential = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
       // attractiveExponential
       interpolDeltaX =  exponentAttractivePotential - minArgumentExponential;
       interpolTMP = (interpolDeltaX * GRIDDENSITY_EXP);
       interpolIDX = (int) interpolTMP;
       interpolY1 = exponential[interpolIDX];
       interpolY2 = exponential[interpolIDX+1];
       attractiveExponential = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
 
       repulsivePotential = params[ijparam].biga * repulsiveExponential;
       attractivePotential = -params[ijparam].bigb * attractiveExponential;
 
       cutoffFunctionIJ = preCutoffFunction[neighbor_j];
       cutoffFunctionDerivedIJ = preCutoffFunctionDerived[neighbor_j];
 
       zeta = 0.0;
 
       // first loop over neighbours of atom i except j - part 1/2
       for (int neighbor_k = 0; neighbor_k < neighbor_j; neighbor_k++) {
         double dr_ik[3], r_ik;
 
         k = jlist[neighbor_k];
         k &= NEIGHMASK;
         ktype = map[type[k]];
         ikparam = elem2param[itype][ktype][ktype];
         ijkparam = elem2param[itype][jtype][ktype];
 
         dr_ik[0] = xtmp -x[k][0];
         dr_ik[1] = ytmp -x[k][1];
         dr_ik[2] = ztmp -x[k][2];
         r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
 
         if (r_ik > params[ikparam].cutsq) continue;
 
         r_ik = sqrt(r_ik);
 
         invR_ik = 1.0 / r_ik;
 
-        directorCos_ik_x = invR_ik * r_ik_x;
-        directorCos_ik_y = invR_ik * r_ik_y;
-        directorCos_ik_z = invR_ik * r_ik_z;
-
         gtetaFunctionIJK = preGtetaFunction[neighbor_j][neighbor_k];
 
         cutoffFunctionIK = preCutoffFunction[neighbor_k];
 
         zeta += cutoffFunctionIK * gtetaFunctionIJK;
 
       }
 
       // first loop over neighbours of atom i except j - part 2/2
       for (int neighbor_k = neighbor_j+1; neighbor_k < jnum; neighbor_k++) {
         double dr_ik[3], r_ik;
 
         k = jlist[neighbor_k];
         k &= NEIGHMASK;
         ktype = map[type[k]];
         ikparam = elem2param[itype][ktype][ktype];
         ijkparam = elem2param[itype][jtype][ktype];
 
         dr_ik[0] = xtmp -x[k][0];
         dr_ik[1] = ytmp -x[k][1];
         dr_ik[2] = ztmp -x[k][2];
         r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
 
         if (r_ik > params[ikparam].cutsq) continue;
 
         r_ik = sqrt(r_ik);
         invR_ik = 1.0 / r_ik;
 
         directorCos_ik_x = invR_ik * dr_ik[0];
         directorCos_ik_y = invR_ik * dr_ik[1];
         directorCos_ik_z = invR_ik * dr_ik[2];
 
         gtetaFunctionIJK = preGtetaFunction[neighbor_j][neighbor_k];
 
         cutoffFunctionIK = preCutoffFunction[neighbor_k];
 
         zeta += cutoffFunctionIK * gtetaFunctionIJK;
       }
 
       // betaZetaPowerIJK
       interpolDeltaX= params[ijparam].beta * zeta;
       interpolTMP = (interpolDeltaX * GRIDDENSITY_BIJ);
       interpolIDX = (int) interpolTMP;
       interpolY1 = betaZetaPower[itype][interpolIDX];
       interpolY2 = betaZetaPower[itype][interpolIDX+1];
       betaZetaPowerIJK = (interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX));
       // betaZetaPowerDerivedIJK
       interpolY1 = betaZetaPowerDerived[itype][interpolIDX];
       interpolY2 = betaZetaPowerDerived[itype][interpolIDX+1];
       betaZetaPowerDerivedIJK = params[ijparam].beta*(interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX));
 
       // Forces and virial
       factor_force_ij = 0.5*cutoffFunctionDerivedIJ*(repulsivePotential + attractivePotential * betaZetaPowerIJK)+0.5*cutoffFunctionIJ*(-repulsivePotential*params[ijparam].lam1-betaZetaPowerIJK*attractivePotential*params[ijparam].lam2);
 
       f_ij[0] = factor_force_ij * directorCos_ij_x;
       f_ij[1] = factor_force_ij * directorCos_ij_y;
       f_ij[2] = factor_force_ij * directorCos_ij_z;
 
       f[j][0] += f_ij[0];
       f[j][1] += f_ij[1];
       f[j][2] += f_ij[2];
 
       fxtmp -= f_ij[0];
       fytmp -= f_ij[1];
       fztmp -= f_ij[2];
 
       // potential energy
       evdwl = cutoffFunctionIJ * repulsivePotential + cutoffFunctionIJ * attractivePotential * betaZetaPowerIJK;
 
       if (evflag) ev_tally(i, j, nlocal, newton_pair, 0.5 * evdwl, 0.0,
                            -factor_force_ij*invR_ij, dr_ij[0], dr_ij[1], dr_ij[2]);
 
       factor_force_tot= 0.5*cutoffFunctionIJ*attractivePotential*betaZetaPowerDerivedIJK;
 
       // second loop over neighbours of atom i except j, forces and virial only - part 1/2
       for (int neighbor_k = 0; neighbor_k < neighbor_j; neighbor_k++) {
         double dr_ik[3], r_ik, f_ik[3];
 
         k = jlist[neighbor_k];
         k &= NEIGHMASK;
         ktype = map[type[k]];
         ikparam = elem2param[itype][ktype][ktype];
         ijkparam = elem2param[itype][jtype][ktype];
 
         dr_ik[0] = xtmp -x[k][0];
         dr_ik[1] = ytmp -x[k][1];
         dr_ik[2] = ztmp -x[k][2];
         r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
 
         if (r_ik > params[ikparam].cutsq) continue;
 
         r_ik = sqrt(r_ik);
         invR_ik = 1.0 / r_ik;
 
         directorCos_ik_x = invR_ik * dr_ik[0];
         directorCos_ik_y = invR_ik * dr_ik[1];
         directorCos_ik_z = invR_ik * dr_ik[2];
 
         cosTeta = directorCos_ij_x * directorCos_ik_x + directorCos_ij_y * directorCos_ik_y + directorCos_ij_z * directorCos_ik_z;
 
         gtetaFunctionIJK = preGtetaFunction[neighbor_j][neighbor_k];
 
         gtetaFunctionDerivedIJK = preGtetaFunctionDerived[neighbor_j][neighbor_k];
 
         cutoffFunctionIK = preCutoffFunction[neighbor_k];
 
         cutoffFunctionDerivedIK = preCutoffFunctionDerived[neighbor_k];
 
         factor_force3_ij= cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ij *factor_force_tot;
 
         f_ij[0] = factor_force3_ij * (directorCos_ij_x*cosTeta - directorCos_ik_x);
         f_ij[1] = factor_force3_ij * (directorCos_ij_y*cosTeta - directorCos_ik_y);
         f_ij[2] = factor_force3_ij * (directorCos_ij_z*cosTeta - directorCos_ik_z);
 
         factor_1_force3_ik = (cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ik)*factor_force_tot;
         factor_2_force3_ik = -(cutoffFunctionDerivedIK * gtetaFunctionIJK)*factor_force_tot;
 
         f_ik[0] = factor_1_force3_ik * (directorCos_ik_x*cosTeta - directorCos_ij_x) + factor_2_force3_ik * directorCos_ik_x;
         f_ik[1] = factor_1_force3_ik * (directorCos_ik_y*cosTeta - directorCos_ij_y) + factor_2_force3_ik * directorCos_ik_y;
         f_ik[2] = factor_1_force3_ik * (directorCos_ik_z*cosTeta - directorCos_ij_z) + factor_2_force3_ik * directorCos_ik_z;
 
         f[j][0] -= f_ij[0];
         f[j][1] -= f_ij[1];
         f[j][2] -= f_ij[2];
 
         f[k][0] -= f_ik[0];
         f[k][1] -= f_ik[1];
         f[k][2] -= f_ik[2];
 
         fxtmp += f_ij[0] + f_ik[0];
         fytmp += f_ij[1] + f_ik[1];
         fztmp += f_ij[2] + f_ik[2];
 
         // potential energy
         evdwl = 0.0;
 
         if (evflag) ev_tally3(i,j,k,evdwl,0.0,f_ij,f_ik,dr_ij,dr_ik);
       }
 
       // second loop over neighbours of atom i except j, forces and virial only - part 2/2
       for (int neighbor_k = neighbor_j+1; neighbor_k < jnum; neighbor_k++) {
         double dr_ik[3], r_ik, f_ik[3];
 
         k = jlist[neighbor_k];
         k &= NEIGHMASK;
         ktype = map[type[k]];
         ikparam = elem2param[itype][ktype][ktype];
         ijkparam = elem2param[itype][jtype][ktype];
 
         dr_ik[0] = xtmp -x[k][0];
         dr_ik[1] = ytmp -x[k][1];
         dr_ik[2] = ztmp -x[k][2];
         r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
 
         if (r_ik > params[ikparam].cutsq) continue;
 
         r_ik = sqrt(r_ik);
         invR_ik = 1.0 / r_ik;
 
         directorCos_ik_x = invR_ik * dr_ik[0];
         directorCos_ik_y = invR_ik * dr_ik[1];
         directorCos_ik_z = invR_ik * dr_ik[2];
 
         cosTeta = directorCos_ij_x * directorCos_ik_x + directorCos_ij_y * directorCos_ik_y + directorCos_ij_z * directorCos_ik_z;
 
         gtetaFunctionIJK = preGtetaFunction[neighbor_j][neighbor_k];
 
         gtetaFunctionDerivedIJK = preGtetaFunctionDerived[neighbor_j][neighbor_k];
 
         cutoffFunctionIK = preCutoffFunction[neighbor_k];
 
         cutoffFunctionDerivedIK = preCutoffFunctionDerived[neighbor_k];
 
         factor_force3_ij= cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ij *factor_force_tot;
 
         f_ij[0] = factor_force3_ij * (directorCos_ij_x*cosTeta - directorCos_ik_x);
         f_ij[1] = factor_force3_ij * (directorCos_ij_y*cosTeta - directorCos_ik_y);
         f_ij[2] = factor_force3_ij * (directorCos_ij_z*cosTeta - directorCos_ik_z);
 
         factor_1_force3_ik = (cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ik)*factor_force_tot;
         factor_2_force3_ik = -(cutoffFunctionDerivedIK * gtetaFunctionIJK)*factor_force_tot;
 
         f_ik[0] = factor_1_force3_ik * (directorCos_ik_x*cosTeta - directorCos_ij_x) + factor_2_force3_ik * directorCos_ik_x;
         f_ik[1] = factor_1_force3_ik * (directorCos_ik_y*cosTeta - directorCos_ij_y) + factor_2_force3_ik * directorCos_ik_y;
         f_ik[2] = factor_1_force3_ik * (directorCos_ik_z*cosTeta - directorCos_ij_z) + factor_2_force3_ik * directorCos_ik_z;
 
         f[j][0] -= f_ij[0];
         f[j][1] -= f_ij[1];
         f[j][2] -= f_ij[2];
 
         f[k][0] -= f_ik[0];
         f[k][1] -= f_ik[1];
         f[k][2] -= f_ik[2];
 
         fxtmp += f_ij[0] + f_ik[0];
         fytmp += f_ij[1] + f_ik[1];
         fztmp += f_ij[2] + f_ik[2];
 
         // potential energy
         evdwl = 0.0;
 
         if (evflag) ev_tally3(i,j,k,evdwl,0.0,f_ij,f_ik,dr_ij,dr_ik);
 
       }
     } // loop on J
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   } // loop on I
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairTersoffTable::deallocatePreLoops(void)
 {
     memory->destroy (preGtetaFunction);
     memory->destroy (preGtetaFunctionDerived);
     memory->destroy(preCutoffFunction);
     memory->destroy(preCutoffFunctionDerived);
 }
 
 void PairTersoffTable::allocatePreLoops(void)
 {
   memory->create(preGtetaFunction,leadingDimensionInteractionList,leadingDimensionInteractionList,"tersofftable:preGtetaFunction");
 
   memory->create(preGtetaFunctionDerived,leadingDimensionInteractionList,leadingDimensionInteractionList,"tersofftable:preGtetaFunctionDerived");
 
   memory->create(preCutoffFunction,leadingDimensionInteractionList,"tersofftable:preCutoffFunction");
 
   memory->create(preCutoffFunctionDerived,leadingDimensionInteractionList,"tersofftable:preCutoffFunctionDerived");
 }
 
 void PairTersoffTable::deallocateGrids()
 {
   int i,j;
 
   memory->destroy(exponential);
   memory->destroy(gtetaFunction);
   memory->destroy(gtetaFunctionDerived);
   memory->destroy(cutoffFunction);
   memory->destroy(cutoffFunctionDerived);
   memory->destroy(betaZetaPower);
   memory->destroy(betaZetaPowerDerived);
 }
 
 void PairTersoffTable::allocateGrids(void)
 {
   int   i, j, l;
 
   int     numGridPointsExponential, numGridPointsGtetaFunction, numGridPointsOneCutoffFunction;
   int     numGridPointsNotOneCutoffFunction, numGridPointsCutoffFunction, numGridPointsBetaZetaPower;
   // double minArgumentExponential;
   double  deltaArgumentCutoffFunction, deltaArgumentExponential, deltaArgumentBetaZetaPower;
   double  deltaArgumentGtetaFunction;
   double  r, minMu, maxLambda, maxCutoff;
   double const PI=acos(-1.0);
 
   // exponential
 
   // find min and max argument
   minMu=params[0].lam2;
   maxLambda=params[0].lam1;
   for (i=1; i<nparams; i++) {
     if (params[i].lam2 < minMu) minMu = params[i].lam2;
     if (params[i].lam1 > maxLambda) maxLambda = params[i].lam1;
   }
   maxCutoff=cutmax;
 
   minArgumentExponential=minMu*GRIDSTART;
 
   numGridPointsExponential=(int)((maxLambda*maxCutoff-minArgumentExponential)*GRIDDENSITY_EXP)+2;
 
   memory->create(exponential,numGridPointsExponential,"tersofftable:exponential");
 
   r = minArgumentExponential;
   deltaArgumentExponential = 1.0 / GRIDDENSITY_EXP;
   for (i = 0; i < numGridPointsExponential; i++)
     {
       exponential[i] = exp(-r);
       r += deltaArgumentExponential;
     }
 
 
   // gtetaFunction
 
   numGridPointsGtetaFunction=(int)(2.0*GRIDDENSITY_GTETA)+2;
 
   memory->create(gtetaFunction,nelements,numGridPointsGtetaFunction,"tersofftable:gtetaFunction");
   memory->create(gtetaFunctionDerived,nelements,numGridPointsGtetaFunction,"tersofftable:gtetaFunctionDerived");
 
   r = minArgumentExponential;
   for (i=0; i<nelements; i++) {
     r = -1.0;
     deltaArgumentGtetaFunction = 1.0 / GRIDDENSITY_GTETA;
 
     int iparam = elem2param[i][i][i];
     double c = params[iparam].c;
     double d = params[iparam].d;
     double h = params[iparam].h;
 
     for (j = 0; j < numGridPointsGtetaFunction; j++) {
       gtetaFunction[i][j]=1.0+(c*c)/(d*d)-(c*c)/(d*d+(h-r)*(h-r));
       gtetaFunctionDerived[i][j]= -2.0 * c * c * (h-r) / ((d*d+(h-r)*(h-r))*(d*d+(h-r)*(h-r)));
       r += deltaArgumentGtetaFunction;
     }
   }
 
 
   // cutoffFunction, zetaFunction, find grids.
 
   int ngrid_max = -1;
   int zeta_max = -1;
 
   for (i=0; i<nelements; i++) {
 
     int iparam = elem2param[i][i][i];
     double c = params[iparam].c;
     double d = params[iparam].d;
     double beta = params[iparam].beta;
 
     numGridPointsBetaZetaPower=(int)(((1.0+(c*c)/(d*d)-(c*c)/(d*d+4))*beta*leadingDimensionInteractionList*GRIDDENSITY_BIJ))+2;
     zeta_max = MAX(zeta_max,numGridPointsBetaZetaPower);
 
     for (j=0; j<nelements; j++) {
       for (j=0; j<nelements; j++) {
 
         int ijparam = elem2param[i][j][j];
         double cutoffR = params[ijparam].cutoffR;
         double cutoffS = params[ijparam].cutoffS;
 
         numGridPointsOneCutoffFunction=(int) ((cutoffR-GRIDSTART)*GRIDDENSITY_FCUTOFF)+1;
         numGridPointsNotOneCutoffFunction=(int) ((cutoffS-cutoffR)*GRIDDENSITY_FCUTOFF)+2;
         numGridPointsCutoffFunction=numGridPointsOneCutoffFunction+numGridPointsNotOneCutoffFunction;
 
         ngrid_max = MAX(ngrid_max,numGridPointsCutoffFunction);
       }
     }
   }
 
   memory->create(cutoffFunction,nelements,nelements,ngrid_max,"tersoff:cutfunc");
   memory->create(cutoffFunctionDerived,nelements,nelements,ngrid_max,"tersoff:cutfuncD");
 
   // cutoffFunction, compute.
 
   for (i=0; i<nelements; i++) {
     for (j=0; j<nelements; j++) {
       for (j=0; j<nelements; j++) {
         int ijparam = elem2param[i][j][j];
         double cutoffR = params[ijparam].cutoffR;
         double cutoffS = params[ijparam].cutoffS;
 
         numGridPointsOneCutoffFunction=(int) ((cutoffR-GRIDSTART)*GRIDDENSITY_FCUTOFF)+1;
         numGridPointsNotOneCutoffFunction=(int) ((cutoffS-cutoffR)*GRIDDENSITY_FCUTOFF)+2;
         numGridPointsCutoffFunction=numGridPointsOneCutoffFunction+numGridPointsNotOneCutoffFunction;
 
         r = GRIDSTART;
         deltaArgumentCutoffFunction = 1.0 / GRIDDENSITY_FCUTOFF;
 
         for (l = 0; l < numGridPointsOneCutoffFunction; l++) {
           cutoffFunction[i][j][l] = 1.0;
           cutoffFunctionDerived[i][j][l]=0.0;
           r += deltaArgumentCutoffFunction;
         }
 
         for (l = numGridPointsOneCutoffFunction; l < numGridPointsCutoffFunction; l++) {
           cutoffFunction[i][j][l] = 0.5 + 0.5 * cos (PI * (r - cutoffR)/(cutoffS-cutoffR)) ;
           cutoffFunctionDerived[i][j][l] =  -0.5 * PI * sin (PI * (r - cutoffR)/(cutoffS-cutoffR)) / (cutoffS-cutoffR) ;
           r += deltaArgumentCutoffFunction;
         }
       }
     }
   }
 
   // betaZetaPower, compute
 
   memory->create(betaZetaPower,nelements,zeta_max,"tersoff:zetafunc");
   memory->create(betaZetaPowerDerived,nelements,zeta_max,"tersoff:zetafuncD");
 
   for (i=0; i<nelements; i++) {
 
     int iparam = elem2param[i][i][i];
     double c = params[iparam].c;
     double d = params[iparam].d;
     double beta = params[iparam].beta;
 
     numGridPointsBetaZetaPower=(int)(((1.0+(c*c)/(d*d)-(c*c)/(d*d+4))*beta*leadingDimensionInteractionList*GRIDDENSITY_BIJ))+2;
 
     r=0.0;
     deltaArgumentBetaZetaPower = 1.0 / GRIDDENSITY_BIJ;
 
     betaZetaPower[i][0]=1.0;
 
     r += deltaArgumentBetaZetaPower;
 
     for (j = 1; j < numGridPointsBetaZetaPower; j++) {
       double powern=params[iparam].powern;
       betaZetaPower[i][j]=pow((1+pow(r,powern)),-1/(2*powern));
       betaZetaPowerDerived[i][j]=-0.5*pow(r,powern-1.0)*pow((1+pow(r,powern)),-1/(2*powern)-1) ;
       r += deltaArgumentBetaZetaPower;
     }
     betaZetaPowerDerived[i][0]=(betaZetaPower[i][1]-1.0)*GRIDDENSITY_BIJ;
   }
 }
 
 void PairTersoffTable::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   map = new int[n+1];
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairTersoffTable::settings(int narg, char **arg)
 {
   if (narg != 0) error->all(FLERR,"Illegal pair_style command");
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairTersoffTable::coeff(int narg, char **arg)
 {
   int i,j,n;
 
   if (!allocated) allocate();
 
   if (narg != 3 + atom->ntypes)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // insure I,J args are * *
 
   if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // read args that map atom types to elements in potential file
   // map[i] = which element the Ith atom type is, -1 if NULL
   // nelements = # of unique elements
   // elements = list of element names
 
   if (elements) {
     for (i = 0; i < nelements; i++) delete [] elements[i];
     delete [] elements;
   }
   elements = new char*[atom->ntypes];
   for (i = 0; i < atom->ntypes; i++) elements[i] = NULL;
 
   nelements = 0;
   for (i = 3; i < narg; i++) {
     if (strcmp(arg[i],"NULL") == 0) {
       map[i-2] = -1;
       continue;
     }
     for (j = 0; j < nelements; j++)
       if (strcmp(arg[i],elements[j]) == 0) break;
     map[i-2] = j;
     if (j == nelements) {
       n = strlen(arg[i]) + 1;
       elements[j] = new char[n];
       strcpy(elements[j],arg[i]);
       nelements++;
     }
   }
 
   // read potential file and initialize potential parameters
 
   read_file(arg[2]);
   setup();
 
   // clear setflag since coeff() called once with I,J = * *
 
   n = atom->ntypes;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   // set setflag i,j for type pairs where both are mapped to elements
 
   int count = 0;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       if (map[i] >= 0 && map[j] >= 0) {
         setflag[i][j] = 1;
         count++;
       }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 
   // allocate tables and internal structures
   allocatePreLoops();
   allocateGrids();
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairTersoffTable::init_style()
 {
   if (atom->tag_enable == 0)
     error->all(FLERR,"Pair style Tersoff requires atom IDs");
   if (force->newton_pair == 0)
     error->all(FLERR,"Pair style Tersoff requires newton pair on");
 
   // need a full neighbor list
 
   int irequest = neighbor->request(this);
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairTersoffTable::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
 
   return cutmax;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairTersoffTable::read_file(char *file)
 {
   int params_per_line = 17;
   char **words = new char*[params_per_line+1];
 
   memory->sfree(params);
   params = NULL;
   nparams = maxparam = 0;
 
   // open file on proc 0
 
   FILE *fp;
   if (comm->me == 0) {
     fp = open_potential(file);
     if (fp == NULL) {
       char str[128];
       sprintf(str,"Cannot open Tersoff potential file %s",file);
       error->one(FLERR,str);
     }
   }
 
   // read each set of params from potential file
   // one set of params can span multiple lines
   // store params if all 3 element tags are in element list
 
   int n,nwords,ielement,jelement,kelement;
   char line[MAXLINE],*ptr;
   int eof = 0;
 
   while (1) {
     if (comm->me == 0) {
       ptr = fgets(line,MAXLINE,fp);
       if (ptr == NULL) {
         eof = 1;
         fclose(fp);
       } else n = strlen(line) + 1;
     }
     MPI_Bcast(&eof,1,MPI_INT,0,world);
     if (eof) break;
     MPI_Bcast(&n,1,MPI_INT,0,world);
     MPI_Bcast(line,n,MPI_CHAR,0,world);
 
     // strip comment, skip line if blank
 
     if ((ptr = strchr(line,'#'))) *ptr = '\0';
     nwords = atom->count_words(line);
     if (nwords == 0) continue;
 
     // concatenate additional lines until have params_per_line words
 
     while (nwords < params_per_line) {
       n = strlen(line);
       if (comm->me == 0) {
         ptr = fgets(&line[n],MAXLINE-n,fp);
         if (ptr == NULL) {
           eof = 1;
           fclose(fp);
         } else n = strlen(line) + 1;
       }
       MPI_Bcast(&eof,1,MPI_INT,0,world);
       if (eof) break;
       MPI_Bcast(&n,1,MPI_INT,0,world);
       MPI_Bcast(line,n,MPI_CHAR,0,world);
       if ((ptr = strchr(line,'#'))) *ptr = '\0';
       nwords = atom->count_words(line);
     }
 
     if (nwords != params_per_line)
       error->all(FLERR,"Incorrect format in Tersoff potential file");
 
     // words = ptrs to all words in line
 
     nwords = 0;
     words[nwords++] = strtok(line," \t\n\r\f");
     while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue;
 
     // ielement,jelement,kelement = 1st args
     // if all 3 args are in element list, then parse this line
     // else skip to next entry in file
 
     for (ielement = 0; ielement < nelements; ielement++)
       if (strcmp(words[0],elements[ielement]) == 0) break;
     if (ielement == nelements) continue;
     for (jelement = 0; jelement < nelements; jelement++)
       if (strcmp(words[1],elements[jelement]) == 0) break;
     if (jelement == nelements) continue;
     for (kelement = 0; kelement < nelements; kelement++)
       if (strcmp(words[2],elements[kelement]) == 0) break;
     if (kelement == nelements) continue;
 
     // load up parameter settings and error check their values
 
     if (nparams == maxparam) {
       maxparam += DELTA;
       params = (Param *) memory->srealloc(params,maxparam*sizeof(Param),
                                           "pair:params");
     }
 
     params[nparams].ielement = ielement;
     params[nparams].jelement = jelement;
     params[nparams].kelement = kelement;
     params[nparams].powerm = atof(words[3]); // not used (only tersoff_2 is implemented)
     params[nparams].gamma = atof(words[4]); // not used (only tersoff_2 is implemented)
     params[nparams].lam3 = atof(words[5]); // not used (only tersoff_2 is implemented)
     params[nparams].c = atof(words[6]);
     params[nparams].d = atof(words[7]);
     params[nparams].h = atof(words[8]);
     params[nparams].powern = atof(words[9]);
     params[nparams].beta = atof(words[10]);
     params[nparams].lam2 = atof(words[11]);
     params[nparams].bigb = atof(words[12]);
     // current implementation is based on functional form
     // of tersoff_2 as reported in the reference paper
     double bigr = atof(words[13]);
     double bigd = atof(words[14]);
     params[nparams].cutoffR = bigr - bigd;
     params[nparams].cutoffS = bigr + bigd;
     params[nparams].lam1 = atof(words[15]);
     params[nparams].biga = atof(words[16]);
 
     // currently only allow m exponent of 1 or 3
     params[nparams].powermint = int(params[nparams].powerm);
 
     if (params[nparams].c < 0.0 || params[nparams].d < 0.0 ||
         params[nparams].powern < 0.0 || params[nparams].beta < 0.0 ||
         params[nparams].lam2 < 0.0 || params[nparams].bigb < 0.0 ||
         params[nparams].cutoffR < 0.0 ||params[nparams].cutoffS < 0.0 ||
         params[nparams].cutoffR > params[nparams].cutoffS ||
         params[nparams].lam1 < 0.0 || params[nparams].biga < 0.0
     ) error->all(FLERR,"Illegal Tersoff parameter");
 
     // only tersoff_2 parametrization is implemented
     if (params[nparams].gamma != 1.0 || params[nparams].lam3 != 0.0)
       error->all(FLERR,"Current tersoff/table pair_style implements only tersoff_2 parametrization");
     nparams++;
   }
 
   delete [] words;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairTersoffTable::setup()
 {
   int i,j,k,m,n;
 
   // set elem2param for all triplet combinations
   // must be a single exact match to lines read from file
   // do not allow for ACB in place of ABC
 
   memory->destroy(elem2param);
   memory->create(elem2param,nelements,nelements,nelements,"pair:elem2param");
 
   for (i = 0; i < nelements; i++)
     for (j = 0; j < nelements; j++)
       for (k = 0; k < nelements; k++) {
         n = -1;
         for (m = 0; m < nparams; m++) {
           if (i == params[m].ielement && j == params[m].jelement &&
               k == params[m].kelement) {
             if (n >= 0) error->all(FLERR,"Potential file has duplicate entry");
             n = m;
           }
         }
         if (n < 0) error->all(FLERR,"Potential file is missing an entry");
         elem2param[i][j][k] = n;
       }
 
   // set cutoff square
   for (m = 0; m < nparams; m++) {
     params[m].cut = params[m].cutoffS;
     params[m].cutsq = params[m].cut*params[m].cut;
   }
 
   // set cutmax to max of all params
   cutmax = 0.0;
   for (m = 0; m < nparams; m++) {
     if (params[m].cut > cutmax) cutmax = params[m].cut;
   }
 }
diff --git a/src/USER-OMP/pair_tersoff_table_omp.cpp b/src/USER-OMP/pair_tersoff_table_omp.cpp
index 9f9f8f9b3..08c04fd68 100644
--- a/src/USER-OMP/pair_tersoff_table_omp.cpp
+++ b/src/USER-OMP/pair_tersoff_table_omp.cpp
@@ -1,518 +1,514 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "pair_tersoff_table_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "error.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 
 #include "suffix.h"
 using namespace LAMMPS_NS;
 
 #define GRIDSTART 0.1
 #define GRIDDENSITY_FCUTOFF 5000
 #define GRIDDENSITY_EXP 12000
 #define GRIDDENSITY_GTETA 12000
 #define GRIDDENSITY_BIJ 7500
 
 // max number of interaction per atom for environment potential
 
 #define leadingDimensionInteractionList 64
 
 /* ---------------------------------------------------------------------- */
 
 PairTersoffTableOMP::PairTersoffTableOMP(LAMMPS *lmp) :
   PairTersoffTable(lmp), ThrOMP(lmp, THR_PAIR)
 {
   suffix_flag |= Suffix::OMP;
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairTersoffTableOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
   } else evflag = vflag_fdotr = vflag_atom = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
     thr->timer(ThrData::TIME_START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag)
       if (vflag_atom) eval<1,1>(ifrom, ito, thr);
       else eval<1,0>(ifrom, ito, thr);
     else eval<0,0>(ifrom, ito, thr);
 
     thr->timer(ThrData::TIME_PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
 
 template <int EVFLAG, int VFLAG_ATOM>
 void PairTersoffTableOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,k,ii,inum,jnum;
   int itype,jtype,ktype,ijparam,ikparam,ijkparam;
   double xtmp,ytmp,ztmp;
   double fxtmp,fytmp,fztmp;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   int interpolIDX;
   double r_ik_x, r_ik_y, r_ik_z;
   double directorCos_ij_x, directorCos_ij_y, directorCos_ij_z, directorCos_ik_x, directorCos_ik_y, directorCos_ik_z;
   double invR_ij, invR_ik, cosTeta;
   double repulsivePotential, attractivePotential;
   double exponentRepulsivePotential, exponentAttractivePotential,interpolTMP,interpolDeltaX,interpolY1;
   double interpolY2, cutoffFunctionIJ, attractiveExponential, repulsiveExponential, cutoffFunctionDerivedIJ,zeta;
   double gtetaFunctionIJK,gtetaFunctionDerivedIJK,cutoffFunctionIK;
   double cutoffFunctionDerivedIK,factor_force3_ij,factor_1_force3_ik;
   double factor_2_force3_ik,betaZetaPowerIJK,betaZetaPowerDerivedIJK,factor_force_tot;
   double factor_force_ij;
   double gtetaFunctionDerived_temp,gtetaFunction_temp;
 
   double evdwl = 0.0;
 
   const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
   dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
   const int * _noalias const type = atom->type;
   const int nlocal = atom->nlocal;
   const int tid = thr->get_tid();
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over full neighbor list of my atoms
   for (ii = iifrom; ii < iito; ii++) {
 
     i = ilist[ii];
     itype = map[type[i]];
     xtmp = x[i].x;
     ytmp = x[i].y;
     ztmp = x[i].z;
     fxtmp = fytmp = fztmp = 0.0;
 
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     if (check_error_thr((jnum > leadingDimensionInteractionList), tid,
                         FLERR,"Too many neighbors for interaction list.\n"
                         "Check your system or increase 'leadingDimension"
                         "InteractionList'"))
       return;
 
     // Pre-calculate gteta and cutoff function
     for (int neighbor_j = 0; neighbor_j < jnum; neighbor_j++) {
 
       double dr_ij[3], r_ij;
 
       j = jlist[neighbor_j];
       j &= NEIGHMASK;
 
       dr_ij[0] = xtmp - x[j].x;
       dr_ij[1] = ytmp - x[j].y;
       dr_ij[2] = ztmp - x[j].z;
       r_ij = dr_ij[0]*dr_ij[0] + dr_ij[1]*dr_ij[1] + dr_ij[2]*dr_ij[2];
 
       jtype = map[type[j]];
       ijparam = elem2param[itype][jtype][jtype];
 
       if (r_ij > params[ijparam].cutsq) continue;
 
       r_ij = sqrt(r_ij);
 
       invR_ij = 1.0 / r_ij;
 
       directorCos_ij_x = invR_ij * dr_ij[0];
       directorCos_ij_y = invR_ij * dr_ij[1];
       directorCos_ij_z = invR_ij * dr_ij[2];
 
       // preCutoffFunction
       interpolDeltaX =  r_ij - GRIDSTART;
       interpolTMP = (interpolDeltaX * GRIDDENSITY_FCUTOFF);
       interpolIDX = (int) interpolTMP;
       interpolY1 = cutoffFunction[itype][jtype][interpolIDX];
       interpolY2 = cutoffFunction[itype][jtype][interpolIDX+1];
       preCutoffFunction[neighbor_j] = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
       // preCutoffFunctionDerived
       interpolY1 = cutoffFunctionDerived[itype][jtype][interpolIDX];
       interpolY2 = cutoffFunctionDerived[itype][jtype][interpolIDX+1];
       preCutoffFunctionDerived[neighbor_j] = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
 
 
       for (int neighbor_k = neighbor_j + 1; neighbor_k < jnum; neighbor_k++) {
         double dr_ik[3], r_ik;
 
         k = jlist[neighbor_k];
         k &= NEIGHMASK;
         ktype = map[type[k]];
         ikparam = elem2param[itype][ktype][ktype];
         ijkparam = elem2param[itype][jtype][ktype];
 
         dr_ik[0] = xtmp -x[k].x;
         dr_ik[1] = ytmp -x[k].y;
         dr_ik[2] = ztmp -x[k].z;
         r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
 
         if (r_ik > params[ikparam].cutsq) continue;
 
         r_ik = sqrt(r_ik);
 
         invR_ik = 1.0 / r_ik;
 
         directorCos_ik_x = invR_ik * dr_ik[0];
         directorCos_ik_y = invR_ik * dr_ik[1];
         directorCos_ik_z = invR_ik * dr_ik[2];
 
         cosTeta = directorCos_ij_x * directorCos_ik_x + directorCos_ij_y * directorCos_ik_y + directorCos_ij_z * directorCos_ik_z;
 
         // preGtetaFunction
         interpolDeltaX=cosTeta+1.0;
         interpolTMP = (interpolDeltaX * GRIDDENSITY_GTETA);
         interpolIDX = (int) interpolTMP;
         interpolY1 = gtetaFunction[itype][interpolIDX];
         interpolY2 = gtetaFunction[itype][interpolIDX+1];
         gtetaFunction_temp = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
         // preGtetaFunctionDerived
         interpolY1 = gtetaFunctionDerived[itype][interpolIDX];
         interpolY2 = gtetaFunctionDerived[itype][interpolIDX+1];
         gtetaFunctionDerived_temp = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
 
         preGtetaFunction[neighbor_j][neighbor_k]=params[ijkparam].gamma*gtetaFunction_temp;
         preGtetaFunctionDerived[neighbor_j][neighbor_k]=params[ijkparam].gamma*gtetaFunctionDerived_temp;
         preGtetaFunction[neighbor_k][neighbor_j]=params[ijkparam].gamma*gtetaFunction_temp;
         preGtetaFunctionDerived[neighbor_k][neighbor_j]=params[ijkparam].gamma*gtetaFunctionDerived_temp;
 
       } // loop on K
 
     } // loop on J
 
 
     // loop over neighbours of atom i
     for (int neighbor_j = 0; neighbor_j < jnum; neighbor_j++) {
 
       double dr_ij[3], r_ij, f_ij[3];
 
       j = jlist[neighbor_j];
       j &= NEIGHMASK;
 
       dr_ij[0] = xtmp - x[j].x;
       dr_ij[1] = ytmp - x[j].y;
       dr_ij[2] = ztmp - x[j].z;
       r_ij = dr_ij[0]*dr_ij[0] + dr_ij[1]*dr_ij[1] + dr_ij[2]*dr_ij[2];
 
       jtype = map[type[j]];
       ijparam = elem2param[itype][jtype][jtype];
 
       if (r_ij > params[ijparam].cutsq) continue;
 
       r_ij = sqrt(r_ij);
       invR_ij = 1.0 / r_ij;
 
       directorCos_ij_x = invR_ij * dr_ij[0];
       directorCos_ij_y = invR_ij * dr_ij[1];
       directorCos_ij_z = invR_ij * dr_ij[2];
 
       exponentRepulsivePotential = params[ijparam].lam1 * r_ij;
       exponentAttractivePotential = params[ijparam].lam2 * r_ij;
 
       // repulsiveExponential
       interpolDeltaX =  exponentRepulsivePotential - minArgumentExponential;
       interpolTMP = (interpolDeltaX * GRIDDENSITY_EXP);
       interpolIDX = (int) interpolTMP;
       interpolY1 = exponential[interpolIDX];
       interpolY2 = exponential[interpolIDX+1];
       repulsiveExponential = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
       // attractiveExponential
       interpolDeltaX =  exponentAttractivePotential - minArgumentExponential;
       interpolTMP = (interpolDeltaX * GRIDDENSITY_EXP);
       interpolIDX = (int) interpolTMP;
       interpolY1 = exponential[interpolIDX];
       interpolY2 = exponential[interpolIDX+1];
       attractiveExponential = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
 
       repulsivePotential = params[ijparam].biga * repulsiveExponential;
       attractivePotential = -params[ijparam].bigb * attractiveExponential;
 
       cutoffFunctionIJ = preCutoffFunction[neighbor_j];
       cutoffFunctionDerivedIJ = preCutoffFunctionDerived[neighbor_j];
 
       zeta = 0.0;
 
       // first loop over neighbours of atom i except j - part 1/2
       for (int neighbor_k = 0; neighbor_k < neighbor_j; neighbor_k++) {
         double dr_ik[3], r_ik;
 
         k = jlist[neighbor_k];
         k &= NEIGHMASK;
         ktype = map[type[k]];
         ikparam = elem2param[itype][ktype][ktype];
         ijkparam = elem2param[itype][jtype][ktype];
 
         dr_ik[0] = xtmp -x[k].x;
         dr_ik[1] = ytmp -x[k].y;
         dr_ik[2] = ztmp -x[k].z;
         r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
 
         if (r_ik > params[ikparam].cutsq) continue;
 
         r_ik = sqrt(r_ik);
 
         invR_ik = 1.0 / r_ik;
 
-        directorCos_ik_x = invR_ik * r_ik_x;
-        directorCos_ik_y = invR_ik * r_ik_y;
-        directorCos_ik_z = invR_ik * r_ik_z;
-
         gtetaFunctionIJK = preGtetaFunction[neighbor_j][neighbor_k];
 
         cutoffFunctionIK = preCutoffFunction[neighbor_k];
 
         zeta += cutoffFunctionIK * gtetaFunctionIJK;
 
       }
 
       // first loop over neighbours of atom i except j - part 2/2
       for (int neighbor_k = neighbor_j+1; neighbor_k < jnum; neighbor_k++) {
         double dr_ik[3], r_ik;
 
         k = jlist[neighbor_k];
         k &= NEIGHMASK;
         ktype = map[type[k]];
         ikparam = elem2param[itype][ktype][ktype];
         ijkparam = elem2param[itype][jtype][ktype];
 
         dr_ik[0] = xtmp -x[k].x;
         dr_ik[1] = ytmp -x[k].y;
         dr_ik[2] = ztmp -x[k].z;
         r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
 
         if (r_ik > params[ikparam].cutsq) continue;
 
         r_ik = sqrt(r_ik);
         invR_ik = 1.0 / r_ik;
 
         directorCos_ik_x = invR_ik * dr_ik[0];
         directorCos_ik_y = invR_ik * dr_ik[1];
         directorCos_ik_z = invR_ik * dr_ik[2];
 
         gtetaFunctionIJK = preGtetaFunction[neighbor_j][neighbor_k];
 
         cutoffFunctionIK = preCutoffFunction[neighbor_k];
 
         zeta += cutoffFunctionIK * gtetaFunctionIJK;
       }
 
       // betaZetaPowerIJK
       interpolDeltaX= params[ijparam].beta * zeta;
       interpolTMP = (interpolDeltaX * GRIDDENSITY_BIJ);
       interpolIDX = (int) interpolTMP;
       interpolY1 = betaZetaPower[itype][interpolIDX];
       interpolY2 = betaZetaPower[itype][interpolIDX+1];
       betaZetaPowerIJK = (interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX));
       // betaZetaPowerDerivedIJK
       interpolY1 = betaZetaPowerDerived[itype][interpolIDX];
       interpolY2 = betaZetaPowerDerived[itype][interpolIDX+1];
       betaZetaPowerDerivedIJK = params[ijparam].beta*(interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX));
 
       // Forces and virial
       factor_force_ij = 0.5*cutoffFunctionDerivedIJ*(repulsivePotential + attractivePotential * betaZetaPowerIJK)+0.5*cutoffFunctionIJ*(-repulsivePotential*params[ijparam].lam1-betaZetaPowerIJK*attractivePotential*params[ijparam].lam2);
 
       f_ij[0] = factor_force_ij * directorCos_ij_x;
       f_ij[1] = factor_force_ij * directorCos_ij_y;
       f_ij[2] = factor_force_ij * directorCos_ij_z;
 
       f[j].x += f_ij[0];
       f[j].y += f_ij[1];
       f[j].z += f_ij[2];
 
       fxtmp -= f_ij[0];
       fytmp -= f_ij[1];
       fztmp -= f_ij[2];
 
       // potential energy
       evdwl = cutoffFunctionIJ * repulsivePotential
         + cutoffFunctionIJ * attractivePotential * betaZetaPowerIJK;
 
       if (EVFLAG) ev_tally_thr(this,i, j, nlocal, /* newton_pair */ 1, 0.5 * evdwl, 0.0,
                                -factor_force_ij*invR_ij, dr_ij[0], dr_ij[1], dr_ij[2],thr);
 
       factor_force_tot= 0.5*cutoffFunctionIJ*attractivePotential*betaZetaPowerDerivedIJK;
 
       // second loop over neighbours of atom i except j, forces and virial only - part 1/2
       for (int neighbor_k = 0; neighbor_k < neighbor_j; neighbor_k++) {
         double dr_ik[3], r_ik, f_ik[3];
 
         k = jlist[neighbor_k];
         k &= NEIGHMASK;
         ktype = map[type[k]];
         ikparam = elem2param[itype][ktype][ktype];
         ijkparam = elem2param[itype][jtype][ktype];
 
         dr_ik[0] = xtmp -x[k].x;
         dr_ik[1] = ytmp -x[k].y;
         dr_ik[2] = ztmp -x[k].z;
         r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
 
         if (r_ik > params[ikparam].cutsq) continue;
 
         r_ik = sqrt(r_ik);
         invR_ik = 1.0 / r_ik;
 
         directorCos_ik_x = invR_ik * dr_ik[0];
         directorCos_ik_y = invR_ik * dr_ik[1];
         directorCos_ik_z = invR_ik * dr_ik[2];
 
         cosTeta = directorCos_ij_x * directorCos_ik_x + directorCos_ij_y * directorCos_ik_y
           + directorCos_ij_z * directorCos_ik_z;
 
         gtetaFunctionIJK = preGtetaFunction[neighbor_j][neighbor_k];
 
         gtetaFunctionDerivedIJK = preGtetaFunctionDerived[neighbor_j][neighbor_k];
 
         cutoffFunctionIK = preCutoffFunction[neighbor_k];
 
         cutoffFunctionDerivedIK = preCutoffFunctionDerived[neighbor_k];
 
         factor_force3_ij= cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ij *factor_force_tot;
 
         f_ij[0] = factor_force3_ij * (directorCos_ij_x*cosTeta - directorCos_ik_x);
         f_ij[1] = factor_force3_ij * (directorCos_ij_y*cosTeta - directorCos_ik_y);
         f_ij[2] = factor_force3_ij * (directorCos_ij_z*cosTeta - directorCos_ik_z);
 
         factor_1_force3_ik = (cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ik)*factor_force_tot;
         factor_2_force3_ik = -(cutoffFunctionDerivedIK * gtetaFunctionIJK)*factor_force_tot;
 
         f_ik[0] = factor_1_force3_ik * (directorCos_ik_x*cosTeta - directorCos_ij_x)
           + factor_2_force3_ik * directorCos_ik_x;
         f_ik[1] = factor_1_force3_ik * (directorCos_ik_y*cosTeta - directorCos_ij_y)
           + factor_2_force3_ik * directorCos_ik_y;
         f_ik[2] = factor_1_force3_ik * (directorCos_ik_z*cosTeta - directorCos_ij_z)
           + factor_2_force3_ik * directorCos_ik_z;
 
         f[j].x -= f_ij[0];
         f[j].y -= f_ij[1];
         f[j].z -= f_ij[2];
 
         f[k].x -= f_ik[0];
         f[k].y -= f_ik[1];
         f[k].z -= f_ik[2];
 
         fxtmp += f_ij[0] + f_ik[0];
         fytmp += f_ij[1] + f_ik[1];
         fztmp += f_ij[2] + f_ik[2];
 
         if (VFLAG_ATOM) v_tally3_thr(i,j,k,f_ij,f_ik,dr_ij,dr_ik,thr);
       }
 
       // second loop over neighbours of atom i except j, forces and virial only - part 2/2
       for (int neighbor_k = neighbor_j+1; neighbor_k < jnum; neighbor_k++) {
         double dr_ik[3], r_ik, f_ik[3];
 
         k = jlist[neighbor_k];
         k &= NEIGHMASK;
         ktype = map[type[k]];
         ikparam = elem2param[itype][ktype][ktype];
         ijkparam = elem2param[itype][jtype][ktype];
 
         dr_ik[0] = xtmp -x[k].x;
         dr_ik[1] = ytmp -x[k].y;
         dr_ik[2] = ztmp -x[k].z;
         r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
 
         if (r_ik > params[ikparam].cutsq) continue;
 
         r_ik = sqrt(r_ik);
         invR_ik = 1.0 / r_ik;
 
         directorCos_ik_x = invR_ik * dr_ik[0];
         directorCos_ik_y = invR_ik * dr_ik[1];
         directorCos_ik_z = invR_ik * dr_ik[2];
 
         cosTeta = directorCos_ij_x * directorCos_ik_x + directorCos_ij_y * directorCos_ik_y
           + directorCos_ij_z * directorCos_ik_z;
 
         gtetaFunctionIJK = preGtetaFunction[neighbor_j][neighbor_k];
 
         gtetaFunctionDerivedIJK = preGtetaFunctionDerived[neighbor_j][neighbor_k];
 
         cutoffFunctionIK = preCutoffFunction[neighbor_k];
 
         cutoffFunctionDerivedIK = preCutoffFunctionDerived[neighbor_k];
 
         factor_force3_ij= cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ij *factor_force_tot;
 
         f_ij[0] = factor_force3_ij * (directorCos_ij_x*cosTeta - directorCos_ik_x);
         f_ij[1] = factor_force3_ij * (directorCos_ij_y*cosTeta - directorCos_ik_y);
         f_ij[2] = factor_force3_ij * (directorCos_ij_z*cosTeta - directorCos_ik_z);
 
         factor_1_force3_ik = (cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ik)*factor_force_tot;
         factor_2_force3_ik = -(cutoffFunctionDerivedIK * gtetaFunctionIJK)*factor_force_tot;
 
         f_ik[0] = factor_1_force3_ik * (directorCos_ik_x*cosTeta - directorCos_ij_x)
           + factor_2_force3_ik * directorCos_ik_x;
         f_ik[1] = factor_1_force3_ik * (directorCos_ik_y*cosTeta - directorCos_ij_y)
           + factor_2_force3_ik * directorCos_ik_y;
         f_ik[2] = factor_1_force3_ik * (directorCos_ik_z*cosTeta - directorCos_ij_z)
           + factor_2_force3_ik * directorCos_ik_z;
 
         f[j].x -= f_ij[0];
         f[j].y -= f_ij[1];
         f[j].z -= f_ij[2];
 
         f[k].x -= f_ik[0];
         f[k].y -= f_ik[1];
         f[k].z -= f_ik[2];
 
         fxtmp += f_ij[0] + f_ik[0];
         fytmp += f_ij[1] + f_ik[1];
         fztmp += f_ij[2] + f_ik[2];
 
         if (VFLAG_ATOM) v_tally3_thr(i,j,k,f_ij,f_ik,dr_ij,dr_ik,thr);
 
       }
     } // loop on J
     f[i].x += fxtmp;
     f[i].y += fytmp;
     f[i].z += fztmp;
   } // loop on I
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairTersoffTableOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
   bytes += PairTersoffTable::memory_usage();
 
   return bytes;
 }
diff --git a/src/balance.cpp b/src/balance.cpp
index b315a4454..e1bcf3692 100644
--- a/src/balance.cpp
+++ b/src/balance.cpp
@@ -1,1028 +1,918 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "lmptype.h"
 #include "mpi.h"
 #include "math.h"
 #include "stdlib.h"
 #include "string.h"
 #include "balance.h"
 #include "atom.h"
 #include "comm.h"
 #include "irregular.h"
 #include "domain.h"
 #include "force.h"
 #include "update.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 enum{NONE,UNIFORM,USER,DYNAMIC};
 enum{X,Y,Z};
 
 //#define BALANCE_DEBUG 1
 
 /* ---------------------------------------------------------------------- */
 
 Balance::Balance(LAMMPS *lmp) : Pointers(lmp)
 {
   MPI_Comm_rank(world,&me);
   MPI_Comm_size(world,&nprocs);
 
   memory->create(proccount,nprocs,"balance:proccount");
   memory->create(allproccount,nprocs,"balance:allproccount");
 
   user_xsplit = user_ysplit = user_zsplit = NULL;
   dflag = 0;
 
   fp = NULL;
   firststep = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 Balance::~Balance()
 {
   memory->destroy(proccount);
   memory->destroy(allproccount);
 
   delete [] user_xsplit;
   delete [] user_ysplit;
   delete [] user_zsplit;
 
   if (dflag) {
     delete [] bdim;
     delete [] count;
     delete [] sum;
     delete [] target;
     delete [] onecount;
     delete [] lo;
     delete [] hi;
     delete [] losum;
     delete [] hisum;
   }
 
   if (fp) fclose(fp);
 }
 
 /* ----------------------------------------------------------------------
    called as balance command in input script
 ------------------------------------------------------------------------- */
 
 void Balance::command(int narg, char **arg)
 {
   if (domain->box_exist == 0)
     error->all(FLERR,"Balance command before simulation box is defined");
 
   if (comm->me == 0 && screen) fprintf(screen,"Balancing ...\n");
 
   // parse arguments
 
   if (narg < 1) error->all(FLERR,"Illegal balance command");
 
   int dimension = domain->dimension;
   int *procgrid = comm->procgrid;
   xflag = yflag = zflag = NONE;
   dflag = 0;
   outflag = 0;
 
   int iarg = 0;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"x") == 0) {
       if (xflag == DYNAMIC) error->all(FLERR,"Illegal balance command");
       if (strcmp(arg[iarg+1],"uniform") == 0) {
         if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
         xflag = UNIFORM;
         iarg += 2;
       } else {
         if (1 + procgrid[0]-1 > narg)
           error->all(FLERR,"Illegal balance command");
         xflag = USER;
         delete [] user_xsplit;
         user_xsplit = new double[procgrid[0]+1];
         user_xsplit[0] = 0.0;
         iarg++;
         for (int i = 1; i < procgrid[0]; i++)
           user_xsplit[i] = force->numeric(FLERR,arg[iarg++]);
         user_xsplit[procgrid[0]] = 1.0;
       }
     } else if (strcmp(arg[iarg],"y") == 0) {
       if (yflag == DYNAMIC) error->all(FLERR,"Illegal balance command");
       if (strcmp(arg[iarg+1],"uniform") == 0) {
         if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
         yflag = UNIFORM;
         iarg += 2;
       } else {
         if (1 + procgrid[1]-1 > narg)
           error->all(FLERR,"Illegal balance command");
         yflag = USER;
         delete [] user_ysplit;
         user_ysplit = new double[procgrid[1]+1];
         user_ysplit[0] = 0.0;
         iarg++;
         for (int i = 1; i < procgrid[1]; i++)
           user_ysplit[i] = force->numeric(FLERR,arg[iarg++]);
         user_ysplit[procgrid[1]] = 1.0;
       }
     } else if (strcmp(arg[iarg],"z") == 0) {
       if (zflag == DYNAMIC) error->all(FLERR,"Illegal balance command");
       if (strcmp(arg[iarg+1],"uniform") == 0) {
         if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
         zflag = UNIFORM;
         iarg += 2;
       } else {
         if (1 + procgrid[2]-1 > narg)
           error->all(FLERR,"Illegal balance command");
         zflag = USER;
         delete [] user_zsplit;
         user_zsplit = new double[procgrid[2]+1];
         user_zsplit[0] = 0.0;
         iarg++;
         for (int i = 1; i < procgrid[2]; i++)
           user_zsplit[i] = force->numeric(FLERR,arg[iarg++]);
         user_zsplit[procgrid[2]] = 1.0;
       }
 
     } else if (strcmp(arg[iarg],"dynamic") == 0) {
       if (xflag != NONE || yflag != NONE || zflag != NONE)
         error->all(FLERR,"Illegal balance command");
       if (iarg+4 > narg) error->all(FLERR,"Illegal balance command");
       dflag = 1;
       xflag = yflag = DYNAMIC;
       if (dimension == 3) zflag = DYNAMIC;
       if (strlen(arg[iarg+1]) > 3) error->all(FLERR,"Illegal balance command");
       strcpy(bstr,arg[iarg+1]);
       nitermax = force->inumeric(FLERR,arg[iarg+2]);
       if (nitermax <= 0) error->all(FLERR,"Illegal balance command");
       thresh = force->numeric(FLERR,arg[iarg+3]);
       if (thresh < 1.0) error->all(FLERR,"Illegal balance command");
       iarg += 4;
 
     } else if (strcmp(arg[iarg],"out") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
       if (outflag) error->all(FLERR,"Illegal balance command");
       outflag = 1;
       if (me == 0) {
         fp = fopen(arg[iarg+1],"w");
         if (fp == NULL) error->one(FLERR,"Cannot open balance output file");
       }
       iarg += 2;
     } else error->all(FLERR,"Illegal balance command");
   }
 
   // error check
 
   if (zflag && dimension == 2)
     error->all(FLERR,"Cannot balance in z dimension for 2d simulation");
 
   if (xflag == USER)
     for (int i = 1; i <= procgrid[0]; i++)
       if (user_xsplit[i-1] >= user_xsplit[i])
         error->all(FLERR,"Illegal balance command");
   if (yflag == USER)
     for (int i = 1; i <= procgrid[1]; i++)
       if (user_ysplit[i-1] >= user_ysplit[i])
         error->all(FLERR,"Illegal balance command");
   if (zflag == USER)
     for (int i = 1; i <= procgrid[2]; i++)
       if (user_zsplit[i-1] >= user_zsplit[i])
         error->all(FLERR,"Illegal balance command");
 
   if (dflag) {
     for (int i = 0; i < strlen(bstr); i++) {
       if (bstr[i] != 'x' && bstr[i] != 'y' && bstr[i] != 'z')
         error->all(FLERR,"Balance dynamic string is invalid");
       if (bstr[i] == 'z' && dimension == 2)
         error->all(FLERR,"Balance dynamic string is invalid");
       for (int j = i+1; j < strlen(bstr); j++)
         if (bstr[i] == bstr[j])
           error->all(FLERR,"Balance dynamic string is invalid");
     }
   }
 
   // insure atoms are in current box & update box via shrink-wrap
   // no exchange() since doesn't matter if atoms are assigned to correct procs
 
   if (domain->triclinic) domain->x2lamda(atom->nlocal);
   domain->pbc();
   domain->reset_box();
   if (domain->triclinic) domain->lamda2x(atom->nlocal);
 
   // imbinit = initial imbalance
   // use current splits instead of nlocal since atoms may not be in sub-box
 
   domain->x2lamda(atom->nlocal);
   int maxinit;
   double imbinit = imbalance_splits(maxinit);
   domain->lamda2x(atom->nlocal);
 
   // debug output of initial state
 
 #ifdef BALANCE_DEBUG
   if (me == 0 && fp) dumpout(update->ntimestep,fp);
 #endif
 
   int niter = 0;
 
   // explicit setting of sub-domain sizes
 
   if (xflag == UNIFORM) {
     for (int i = 0; i < procgrid[0]; i++)
       comm->xsplit[i] = i * 1.0/procgrid[0];
     comm->xsplit[procgrid[0]] = 1.0;
   }
 
   if (yflag == UNIFORM) {
     for (int i = 0; i < procgrid[1]; i++)
       comm->ysplit[i] = i * 1.0/procgrid[1];
     comm->ysplit[procgrid[1]] = 1.0;
   }
 
   if (zflag == UNIFORM) {
     for (int i = 0; i < procgrid[2]; i++)
       comm->zsplit[i] = i * 1.0/procgrid[2];
     comm->zsplit[procgrid[2]] = 1.0;
   }
 
   if (xflag == USER)
     for (int i = 0; i <= procgrid[0]; i++) comm->xsplit[i] = user_xsplit[i];
 
   if (yflag == USER)
     for (int i = 0; i <= procgrid[1]; i++) comm->ysplit[i] = user_ysplit[i];
 
   if (zflag == USER)
     for (int i = 0; i <= procgrid[2]; i++) comm->zsplit[i] = user_zsplit[i];
 
   // static load-balance of sub-domain sizes
 
   if (dflag) {
     static_setup(bstr);
     niter = dynamic();
   }
 
   // output of final result
 
   if (outflag && me == 0) dumpout(update->ntimestep,fp);
 
   // reset comm->uniform flag if necessary
 
   if (comm->uniform) {
     if (xflag == USER || xflag == DYNAMIC) comm->uniform = 0;
     if (yflag == USER || yflag == DYNAMIC) comm->uniform = 0;
     if (zflag == USER || zflag == DYNAMIC) comm->uniform = 0;
   } else {
     if (dimension == 3) {
       if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM)
         comm->uniform = 1;
     } else {
       if (xflag == UNIFORM && yflag == UNIFORM) comm->uniform = 1;
     }
   }
 
   // reset proc sub-domains
 
   if (domain->triclinic) domain->set_lamda_box();
   domain->set_local_box();
 
   // move atoms to new processors via irregular()
 
   if (domain->triclinic) domain->x2lamda(atom->nlocal);
   Irregular *irregular = new Irregular(lmp);
   irregular->migrate_atoms();
   delete irregular;
   if (domain->triclinic) domain->lamda2x(atom->nlocal);
 
   // check if any atoms were lost
 
   bigint natoms;
   bigint nblocal = atom->nlocal;
   MPI_Allreduce(&nblocal,&natoms,1,MPI_LMP_BIGINT,MPI_SUM,world);
   if (natoms != atom->natoms) {
     char str[128];
     sprintf(str,"Lost atoms via balance: original " BIGINT_FORMAT
             " current " BIGINT_FORMAT,atom->natoms,natoms);
     error->all(FLERR,str);
   }
 
   // imbfinal = final imbalance based on final nlocal
 
   int maxfinal;
   double imbfinal = imbalance_nlocal(maxfinal);
 
   if (me == 0) {
     if (screen) {
       fprintf(screen,"  iteration count = %d\n",niter);
       fprintf(screen,"  initial/final max atoms/proc = %d %d\n",
               maxinit,maxfinal);
       fprintf(screen,"  initial/final imbalance factor = %g %g\n",
               imbinit,imbfinal);
     }
     if (logfile) {
       fprintf(logfile,"  iteration count = %d\n",niter);
       fprintf(logfile,"  initial/final max atoms/proc = %d %d\n",
               maxinit,maxfinal);
       fprintf(logfile,"  initial/final imbalance factor = %g %g\n",
               imbinit,imbfinal);
     }
   }
 
   if (me == 0) {
     if (screen) {
       fprintf(screen,"  x cuts:");
       for (int i = 0; i <= comm->procgrid[0]; i++)
         fprintf(screen," %g",comm->xsplit[i]);
       fprintf(screen,"\n");
       fprintf(screen,"  y cuts:");
       for (int i = 0; i <= comm->procgrid[1]; i++)
         fprintf(screen," %g",comm->ysplit[i]);
       fprintf(screen,"\n");
       fprintf(screen,"  z cuts:");
       for (int i = 0; i <= comm->procgrid[2]; i++)
         fprintf(screen," %g",comm->zsplit[i]);
       fprintf(screen,"\n");
     }
     if (logfile) {
       fprintf(logfile,"  x cuts:");
       for (int i = 0; i <= comm->procgrid[0]; i++)
         fprintf(logfile," %g",comm->xsplit[i]);
       fprintf(logfile,"\n");
       fprintf(logfile,"  y cuts:");
       for (int i = 0; i <= comm->procgrid[1]; i++)
         fprintf(logfile," %g",comm->ysplit[i]);
       fprintf(logfile,"\n");
       fprintf(logfile,"  z cuts:");
       for (int i = 0; i <= comm->procgrid[2]; i++)
         fprintf(logfile," %g",comm->zsplit[i]);
       fprintf(logfile,"\n");
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    calculate imbalance based on nlocal
    return max = max atom per proc
    return imbalance factor = max atom per proc / ave atom per proc
 ------------------------------------------------------------------------- */
 
 double Balance::imbalance_nlocal(int &max)
 {
   MPI_Allreduce(&atom->nlocal,&max,1,MPI_INT,MPI_MAX,world);
   double imbalance = 1.0;
   if (max) imbalance = max / (1.0 * atom->natoms / nprocs);
   return imbalance;
 }
 
 /* ----------------------------------------------------------------------
    calculate imbalance based on processor splits in 3 dims
    atoms must be in lamda coords (0-1) before called
    map atoms to 3d grid of procs
    return max = max atom per proc
    return imbalance factor = max atom per proc / ave atom per proc
 ------------------------------------------------------------------------- */
 
 double Balance::imbalance_splits(int &max)
 {
   double *xsplit = comm->xsplit;
   double *ysplit = comm->ysplit;
   double *zsplit = comm->zsplit;
 
   int nx = comm->procgrid[0];
   int ny = comm->procgrid[1];
   int nz = comm->procgrid[2];
 
   for (int i = 0; i < nprocs; i++) proccount[i] = 0;
 
   double **x = atom->x;
   int nlocal = atom->nlocal;
   int ix,iy,iz;
 
   for (int i = 0; i < nlocal; i++) {
     ix = binary(x[i][0],nx,xsplit);
     iy = binary(x[i][1],ny,ysplit);
     iz = binary(x[i][2],nz,zsplit);
     proccount[iz*nx*ny + iy*nx + ix]++;
   }
 
   MPI_Allreduce(proccount,allproccount,nprocs,MPI_INT,MPI_SUM,world);
   max = 0;
   for (int i = 0; i < nprocs; i++) max = MAX(max,allproccount[i]);
   double imbalance = 1.0;
   if (max) imbalance = max / (1.0 * atom->natoms / nprocs);
   return imbalance;
 }
 
 /* ----------------------------------------------------------------------
    setup static load balance operations
    called from command
    set rho = 0 for static balancing
 ------------------------------------------------------------------------- */
 
 void Balance::static_setup(char *str)
 {
   ndim = strlen(str);
   bdim = new int[ndim];
 
   for (int i = 0; i < strlen(str); i++) {
     if (str[i] == 'x') bdim[i] = X;
     if (str[i] == 'y') bdim[i] = Y;
     if (str[i] == 'z') bdim[i] = Z;
   }
 
   int max = MAX(comm->procgrid[0],comm->procgrid[1]);
   max = MAX(max,comm->procgrid[2]);
 
   count = new bigint[max];
   onecount = new bigint[max];
   sum = new bigint[max+1];
   target = new bigint[max+1];
   lo = new double[max+1];
   hi = new double[max+1];
   losum = new bigint[max+1];
   hisum = new bigint[max+1];
 
   rho = 0;
 }
 
 /* ----------------------------------------------------------------------
    setup dynamic load balance operations
    called from fix balance
    set rho = 1 for dynamic balancing after call to dynamic_setup()
 ------------------------------------------------------------------------- */
 
 void Balance::dynamic_setup(char *str, int nitermax_in, double thresh_in)
 {
   dflag = 1;
   static_setup(str);
   nitermax = nitermax_in;
   thresh = thresh_in;
 
   rho = 1;
 }
 
 /* ----------------------------------------------------------------------
    load balance by changing xyz split proc boundaries in Comm
    called one time from input script command or many times from fix balance
    return niter = iteration count
 ------------------------------------------------------------------------- */
 
 int Balance::dynamic()
 {
   int i,j,k,m,np,max;
   double *split;
 
   // no balancing if no atoms
 
   bigint natoms = atom->natoms;
   if (natoms == 0) return 0;
 
   // set delta for 1d balancing = root of threshhold
   // root = # of dimensions being balanced on
 
   double delta = pow(thresh,1.0/ndim) - 1.0;
   int *procgrid = comm->procgrid;
 
   // all balancing done in lamda coords
 
   domain->x2lamda(atom->nlocal);
 
   // loop over dimensions in balance string
 
   int niter = 0;
   for (int idim = 0; idim < ndim; idim++) {
 
     // split = ptr to xyz split in Comm
 
     if (bdim[idim] == X) split = comm->xsplit;
     else if (bdim[idim] == Y) split = comm->ysplit;
     else if (bdim[idim] == Z) split = comm->zsplit;
 
     // intial count and sum
 
     np = procgrid[bdim[idim]];
     tally(bdim[idim],np,split);
 
     // target[i] = desired sum at split I
 
     for (i = 0; i < np; i++)
       target[i] = static_cast<int> (1.0*natoms/np * i + 0.5);
     target[np] = natoms;
 
     // lo[i] = closest split <= split[i] with a sum <= target
     // hi[i] = closest split >= split[i] with a sum >= target
 
     lo[0] = hi[0] = 0.0;
     lo[np] = hi[np] = 1.0;
     losum[0] = hisum[0] = 0;
     losum[np] = hisum[np] = natoms;
 
     for (i = 1; i < np; i++) {
       for (j = i; j >= 0; j--)
         if (sum[j] <= target[i]) {
           lo[i] = split[j];
           losum[i] = sum[j];
           break;
         }
       for (j = i; j <= np; j++)
         if (sum[j] >= target[i]) {
           hi[i] = split[j];
           hisum[i] = sum[j];
           break;
         }
     }
 
     // iterate until balanced
 
 #ifdef BALANCE_DEBUG
     if (me == 0) debug_output(idim,0,np,split);
 #endif
 
     int doneflag;
     int change = 1;
     for (m = 0; m < nitermax; m++) {
       change = adjust(np,split);
       tally(bdim[idim],np,split);
       niter++;
 
 #ifdef BALANCE_DEBUG
       if (me == 0) debug_output(idim,m+1,np,split);
       if (me == 0 && fp) dumpout(update->ntimestep,fp);
 #endif
 
       // stop if no change in splits, b/c all targets are met exactly
 
       if (!change) break;
 
       // stop if all split sums are within delta of targets
       // this is a 1d test of particle count per slice
       // assumption is that this is sufficient accuracy
       //   for 3d imbalance factor to reach threshhold
 
       doneflag = 1;
       for (i = 1; i < np; i++)
         if (fabs(1.0*(sum[i]-target[i]))/target[i] > delta) doneflag = 0;
       if (doneflag) break;
     }
 
     // eliminate final adjacent splits that are duplicates
     // can happen if particle distribution is narrow and Nitermax is small
     // set lo = midpt between splits
     // spread duplicates out evenly between bounding midpts with non-duplicates
     // i,j = lo/hi indices of set of duplicate splits
     // delta = new spacing between duplicates
     // bounding midpts = lo[i-1] and lo[j]
 
     int duplicate = 0;
     for (i = 1; i < np-1; i++)
       if (split[i] == split[i+1]) duplicate = 1;
     if (duplicate) {
       for (i = 0; i < np; i++)
         lo[i] = 0.5 * (split[i] + split[i+1]);
       i = 1;
       while (i < np-1) {
         j = i+1;
         while (split[j] == split[i]) j++;
         j--;
         if (j > i) {
           delta = (lo[j] - lo[i-1]) / (j-i+2);
           for (k = i; k <= j; k++)
             split[k] = lo[i-1] + (k-i+1)*delta;
         }
         i = j+1;
       }
     }
 
     // sanity check on bad duplicate or inverted splits
     // zero or negative width sub-domains will break Comm class
     // should never happen if recursive multisection algorithm is correct
 
     int bad = 0;
     for (i = 0; i < np; i++)
       if (split[i] >= split[i+1]) bad = 1;
     if (bad) error->all(FLERR,"Balance produced bad splits");
     /*
       if (me == 0) {
       printf("BAD SPLITS %d %d %d\n",np+1,niter,delta);
       for (i = 0; i < np+1; i++)
       printf(" %g",split[i]);
       printf("\n");
       }
     */
 
     // stop at this point in bstr if imbalance factor < threshhold
     // this is a true 3d test of particle count per processor
 
     double imbfactor = imbalance_splits(max);
     if (imbfactor <= thresh) break;
   }
 
   // restore real coords
 
   domain->lamda2x(atom->nlocal);
 
   return niter;
 }
 
 /* ----------------------------------------------------------------------
    count atoms in each slice, based on their dim coordinate
    N = # of slices
    split = N+1 cuts between N slices
    return updated count = particles per slice
    retrun updated sum = cummulative count below each of N+1 splits
    use binary search to find which slice each atom is in
 ------------------------------------------------------------------------- */
 
 void Balance::tally(int dim, int n, double *split)
 {
   for (int i = 0; i < n; i++) onecount[i] = 0;
 
   double **x = atom->x;
   int nlocal = atom->nlocal;
   int index;
 
   for (int i = 0; i < nlocal; i++) {
     index = binary(x[i][dim],n,split);
     onecount[index]++;
   }
 
   MPI_Allreduce(onecount,count,n,MPI_LMP_BIGINT,MPI_SUM,world);
 
   sum[0] = 0;
   for (int i = 1; i < n+1; i++)
     sum[i] = sum[i-1] + count[i-1];
 }
 
 /* ----------------------------------------------------------------------
    adjust cuts between N slices in a dim via recursive multisectioning method
    split = current N+1 cuts, with 0.0 and 1.0 at end points
    sum = cummulative count up to each split
    target = desired cummulative count up to each split
    lo/hi = split values that bound current split
    update lo/hi to reflect sums at current split values
    overwrite split with new cuts
      guaranteed that splits will remain in ascending order,
      though adjacent values may be identical
    recursive bisectioning zooms in on each cut by halving lo/hi
    return 0 if no changes in any splits, b/c they are all perfect
 ------------------------------------------------------------------------- */
 
 int Balance::adjust(int n, double *split)
 {
   int i;
   double fraction;
 
   // reset lo/hi based on current sum and splits
   // insure lo is monotonically increasing, ties are OK
   // insure hi is monotonically decreasing, ties are OK
   // this effectively uses info from nearby splits
   // to possibly tighten bounds on lo/hi
 
   for (i = 1; i < n; i++) {
     if (sum[i] <= target[i]) {
       lo[i] = split[i];
       losum[i] = sum[i];
     }
     if (sum[i] >= target[i]) {
       hi[i] = split[i];
       hisum[i] = sum[i];
     }
   }
   for (i = 1; i < n; i++)
     if (lo[i] < lo[i-1]) {
       lo[i] = lo[i-1];
       losum[i] = losum[i-1];
     }
   for (i = n-1; i > 0; i--)
     if (hi[i] > hi[i+1]) {
       hi[i] = hi[i+1];
       hisum[i] = hisum[i+1];
     }
 
   int change = 0;
   for (int i = 1; i < n; i++)
     if (sum[i] != target[i]) {
       change = 1;
       if (rho == 0) split[i] = 0.5 * (lo[i]+hi[i]);
       else {
         fraction = 1.0*(target[i]-losum[i]) / (hisum[i]-losum[i]);
         split[i] = lo[i] + fraction * (hi[i]-lo[i]);
       }
     }
   return change;
 }
 
-/* ----------------------------------------------------------------------
-   OLD code: for local diffusion method that didn't work as well as RCB
-   adjust cuts between N slices in a dim via diffusive method
-   count = atoms per slice
-   split = current N+1 cuts, with 0.0 and 1.0 at end points
-   overwrite split with new cuts
-   diffusion means slices with more atoms than their neighbors "send" atoms,
-     by moving cut closer to sender, further from receiver
-------------------------------------------------------------------------- */
-
-void Balance::old_adjust(int iter, int n, bigint *count, double *split)
-{
-  // need to allocate this if start using it again
-
-  double *cuts;
-
-  // damping factor
-
-  double damp = 0.5;
-
-  // loop over slices
-  // cut I is between 2 slices (I-1 and I) with counts
-  // cut I+1 is between 2 slices (I and I+1) with counts
-  // for a cut between 2 slices, only slice with larger count adjusts it
-  // special treatment of end slices with only 1 neighbor
-
-  bigint leftcount,mycount,rightcount;
-  double rho,target,targetleft,targetright;
-
-  for (int i = 0; i < n; i++) {
-    if (i == 0) leftcount = MAXBIGINT;
-    else leftcount = count[i-1];
-    mycount = count[i];
-    if (i == n-1) rightcount = MAXBIGINT;
-    else rightcount = count[i+1];
-
-    // middle slice is <= both left and right, so do nothing
-    // special case if 2 slices both have count = 0 -> no change in cut
-
-    if (mycount <= leftcount && mycount <= rightcount) {
-      if (leftcount == 0) cuts[i] = split[i];
-      if (rightcount == 0) cuts[i+1] = split[i+1];
-      continue;
-    }
-
-    // rho = density of atoms in the slice
-
-    rho = mycount / (split[i+1] - split[i]);
-
-    // middle slice has more atoms than left or right slice
-    // send atoms in that dir
-
-    if (mycount > leftcount) {
-      target = damp * 0.5*(mycount-leftcount);
-      cuts[i] = split[i] + target/rho;
-    }
-    if (mycount > rightcount) {
-      target = damp * 0.5*(mycount-rightcount);
-      cuts[i+1] = split[i+1] - target/rho;
-    }
-
-    /*
-    // middle slice has more atoms then left or right slice
-    // if delta from middle to top slice > delta between top and bottom slice
-    //   then send atoms both dirs to bring all 3 slices to same count
-    // else bottom slice is very low, so send atoms only in that dir
-
-    if (mycount > leftcount && mycount > rightcount) {
-      if (mycount-MAX(leftcount,rightcount) >= fabs(leftcount-rightcount)) {
-        if (leftcount <= rightcount) {
-          targetleft = damp *
-            (rightcount-leftcount + (mycount-rightcount)/3.0);
-          targetright = damp * (mycount-rightcount)/3.0;
-          cuts[i] = split[i] + targetleft/rho;
-          cuts[i+1] = split[i+1] - targetright/rho;
-        } else {
-          targetleft = damp * (mycount-leftcount)/3.0;
-          targetright = damp *
-            (leftcount-rightcount + (mycount-leftcount)/3.0);
-          cuts[i] = split[i] + targetleft/rho;
-          cuts[i+1] = split[i+1] - targetright/rho;
-        }
-      } else if (leftcount < rightcount) {
-        target = damp * 0.5*(mycount-leftcount);
-        cuts[i] = split[i] + target/rho;
-        cuts[i+1] = split[i+1];
-      } else if (rightcount < leftcount) {
-        target = damp * 0.5*(mycount-rightcount);
-        cuts[i+1] = split[i+1] - target/rho;
-        cuts[i] = split[i];
-      }
-
-    // middle slice has more atoms than only left or right slice
-    // send atoms only in that dir
-
-    } else if (mycount > leftcount) {
-      target = damp * 0.5*(mycount-leftcount);
-      cuts[i] = split[i] + target/rho;
-    } else if (mycount > rightcount) {
-      target = damp * 0.5*(mycount-rightcount);
-      cuts[i+1] = split[i+1] - target/rho;
-    }
-    */
-  }
-
-  // overwrite adjustable splits with new cuts
-
-  for (int i = 1; i < n; i++) split[i] = cuts[i];
-}
-
 /* ----------------------------------------------------------------------
    binary search for where value falls in N-length vec
    note that vec actually has N+1 values, but ignore last one
    values in vec are monotonically increasing, but adjacent values can be ties
    value may be outside range of vec limits
    always return index from 0 to N-1 inclusive
    return 0 if value < vec[0]
    reutrn N-1 if value >= vec[N-1]
    return index = 1 to N-2 inclusive if vec[index] <= value < vec[index+1]
    note that for adjacent tie values, index of lower tie is not returned
      since never satisfies 2nd condition that value < vec[index+1]
 ------------------------------------------------------------------------- */
 
 int Balance::binary(double value, int n, double *vec)
 {
   int lo = 0;
   int hi = n-1;
 
   if (value < vec[lo]) return lo;
   if (value >= vec[hi]) return hi;
 
   // insure vec[lo] <= value < vec[hi] at every iteration
   // done when lo,hi are adjacent
 
   int index = (lo+hi)/2;
   while (lo < hi-1) {
     if (value < vec[index]) hi = index;
     else if (value >= vec[index]) lo = index;
     index = (lo+hi)/2;
   }
 
   return index;
 }
 
 /* ----------------------------------------------------------------------
    write dump snapshot of line segments in Pizza.py mdump mesh format
    write xy lines around each proc's sub-domain for 2d
    write xyz cubes around each proc's sub-domain for 3d
    only called by proc 0
 ------------------------------------------------------------------------- */
 
 void Balance::dumpout(bigint tstep, FILE *bfp)
 {
   int dimension = domain->dimension;
 
   // write out one square/cube per processor for 2d/3d
   // only write once since topology is static
 
   if (firststep) {
     firststep = 0;
     fprintf(bfp,"ITEM: TIMESTEP\n");
     fprintf(bfp,BIGINT_FORMAT "\n",tstep);
     if (dimension == 2) fprintf(bfp,"ITEM: NUMBER OF SQUARES\n");
     else fprintf(bfp,"ITEM: NUMBER OF CUBES\n");
     fprintf(bfp,"%d\n",nprocs);
     if (dimension == 2) fprintf(bfp,"ITEM: SQUARES\n");
     else fprintf(bfp,"ITEM: CUBES\n");
 
     int nx = comm->procgrid[0] + 1;
     int ny = comm->procgrid[1] + 1;
     int nz = comm->procgrid[2] + 1;
 
     if (dimension == 2) {
       int m = 0;
       for (int j = 0; j < comm->procgrid[1]; j++)
         for (int i = 0; i < comm->procgrid[0]; i++) {
           int c1 = j*nx + i + 1;
           int c2 = c1 + 1;
           int c3 = c2 + nx;
           int c4 = c3 - 1;
           fprintf(bfp,"%d %d %d %d %d %d\n",m+1,m+1,c1,c2,c3,c4);
           m++;
         }
 
     } else {
       int m = 0;
       for (int k = 0; k < comm->procgrid[2]; k++)
         for (int j = 0; j < comm->procgrid[1]; j++)
           for (int i = 0; i < comm->procgrid[0]; i++) {
             int c1 = k*ny*nx + j*nx + i + 1;
             int c2 = c1 + 1;
             int c3 = c2 + nx;
             int c4 = c3 - 1;
             int c5 = c1 + ny*nx;
             int c6 = c2 + ny*nx;
             int c7 = c3 + ny*nx;
             int c8 = c4 + ny*nx;
             fprintf(bfp,"%d %d %d %d %d %d %d %d %d %d\n",
                     m+1,m+1,c1,c2,c3,c4,c5,c6,c7,c8);
             m++;
           }
     }
   }
 
   // write out nodal coords, can be different every call
   // scale xsplit,ysplit,zsplit values to full box
   // only implmented for orthogonal boxes, not triclinic
 
   int nx = comm->procgrid[0] + 1;
   int ny = comm->procgrid[1] + 1;
   int nz = comm->procgrid[2] + 1;
 
   double *boxlo = domain->boxlo;
   double *boxhi = domain->boxhi;
   double *prd = domain->prd;
 
   fprintf(bfp,"ITEM: TIMESTEP\n");
   fprintf(bfp,BIGINT_FORMAT "\n",tstep);
   fprintf(bfp,"ITEM: NUMBER OF NODES\n");
   if (dimension == 2) fprintf(bfp,"%d\n",nx*ny);
   else fprintf(bfp,"%d\n",nx*ny*nz);
   fprintf(bfp,"ITEM: BOX BOUNDS\n");
   fprintf(bfp,"%g %g\n",boxlo[0],boxhi[0]);
   fprintf(bfp,"%g %g\n",boxlo[1],boxhi[1]);
   fprintf(bfp,"%g %g\n",boxlo[2],boxhi[2]);
   fprintf(bfp,"ITEM: NODES\n");
 
   if (dimension == 2) {
     int m = 0;
     for (int j = 0; j < ny; j++)
       for (int i = 0; i < nx; i++) {
         fprintf(bfp,"%d %d %g %g %g\n",m+1,1,
                 boxlo[0] + prd[0]*comm->xsplit[i],
                 boxlo[1] + prd[1]*comm->ysplit[j],
                 0.0);
         m++;
       }
   } else {
     int m = 0;
     for (int k = 0; k < nz; k++)
       for (int j = 0; j < ny; j++)
         for (int i = 0; i < nx; i++) {
           fprintf(bfp,"%d %d %g %g %g\n",m+1,1,
                   boxlo[0] + prd[0]*comm->xsplit[i],
                   boxlo[1] + prd[1]*comm->ysplit[j],
                   boxlo[2] + prd[2]*comm->zsplit[k]);
           m++;
       }
   }
 }
 
 /* ----------------------------------------------------------------------
    debug output for Idim and count
    only called by proc 0
 ------------------------------------------------------------------------- */
 
 void Balance::debug_output(int idim, int m, int np, double *split)
 {
   int i;
   const char *dim;
 
   double *boxlo = domain->boxlo;
   double *prd = domain->prd;
 
   if (bdim[idim] == X) dim = "X";
   else if (bdim[idim] == Y) dim = "Y";
   else if (bdim[idim] == Z) dim = "Z";
   printf("Dimension %s, Iteration %d\n",dim,m);
 
   printf("  Count:");
   for (i = 0; i < np; i++) printf(" " BIGINT_FORMAT,count[i]);
   printf("\n");
   printf("  Sum:");
   for (i = 0; i <= np; i++) printf(" " BIGINT_FORMAT,sum[i]);
   printf("\n");
   printf("  Target:");
   for (i = 0; i <= np; i++) printf(" " BIGINT_FORMAT,target[i]);
   printf("\n");
   printf("  Actual cut:");
   for (i = 0; i <= np; i++)
     printf(" %g",boxlo[bdim[idim]] + split[i]*prd[bdim[idim]]);
   printf("\n");
   printf("  Split:");
   for (i = 0; i <= np; i++) printf(" %g",split[i]);
   printf("\n");
   printf("  Low:");
   for (i = 0; i <= np; i++) printf(" %g",lo[i]);
   printf("\n");
   printf("  Low-sum:");
   for (i = 0; i <= np; i++) printf(" " BIGINT_FORMAT,losum[i]);
   printf("\n");
   printf("  Hi:");
   for (i = 0; i <= np; i++) printf(" %g",hi[i]);
   printf("\n");
   printf("  Hi-sum:");
   for (i = 0; i <= np; i++) printf(" " BIGINT_FORMAT,hisum[i]);
   printf("\n");
   printf("  Delta:");
   for (i = 0; i < np; i++) printf(" %g",split[i+1]-split[i]);
   printf("\n");
 
   bigint max = 0;
   for (i = 0; i < np; i++) max = MAX(max,count[i]);
   printf("  Imbalance factor: %g\n",1.0*max*np/target[np]);
 }
diff --git a/src/balance.h b/src/balance.h
index fda682cc9..ee1d17154 100644
--- a/src/balance.h
+++ b/src/balance.h
@@ -1,116 +1,115 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef COMMAND_CLASS
 
 CommandStyle(balance,Balance)
 
 #else
 
 #ifndef LMP_BALANCE_H
 #define LMP_BALANCE_H
 
 #include "stdio.h"
 #include "pointers.h"
 
 namespace LAMMPS_NS {
 
 class Balance : protected Pointers {
  public:
   Balance(class LAMMPS *);
   ~Balance();
   void command(int, char **);
   void dynamic_setup(char *, int, double);
   int dynamic();
   double imbalance_nlocal(int &);
   void dumpout(bigint, FILE *);
 
  private:
   int me,nprocs;
 
   int xflag,yflag,zflag;                            // xyz LB flags
   double *user_xsplit,*user_ysplit,*user_zsplit;    // params for xyz LB
 
   int dflag;                 // dynamic LB flag
   int nitermax;              // params for dynamic LB
   double thresh;
   char bstr[4];
 
   int ndim;                  // length of balance string bstr
   int *bdim;                 // XYZ for each character in bstr
   bigint *count;             // counts for slices in one dim
   bigint *onecount;          // work vector of counts in one dim
   bigint *sum;               // cummulative count for slices in one dim
   bigint *target;            // target sum for slices in one dim
   double *lo,*hi;            // lo/hi split coords that bound each target
   bigint *losum,*hisum;      // cummulative counts at lo/hi coords
   int rho;                   // 0 for geometric recursion
                              // 1 for density weighted recursion
 
   int *proccount;            // particle count per processor
   int *allproccount;
 
   int outflag;               // for output of balance results to file
   FILE *fp;
   int firststep;
 
   void static_setup(char *);
   double imbalance_splits(int &);
   void tally(int, int, double *);
   int adjust(int, double *);
-  void old_adjust(int, int, bigint *, double *);
   int binary(double, int, double *);
   void debug_output(int, int, int, double *);
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Balance command before simulation box is defined
 
 The balance command cannot be used before a read_data, read_restart,
 or create_box command.
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Cannot open balance output file
 
 Self-explanatory.
 
 E: Cannot balance in z dimension for 2d simulation
 
 Self-explanatory.
 
 E: Balance dynamic string is invalid
 
 The string can only contain the characters "x", "y", or "z".
 
 E: Lost atoms via balance: original %ld current %ld
 
 This should not occur.  Report the problem to the developers.
 
 E: Balance produced bad splits
 
 This should not occur.  It means two or more cutting plane locations
 are on top of each other or out of order.  Report the problem to the
 developers.
 
 */
diff --git a/src/compute_bond_local.h b/src/compute_bond_local.h
index ecf3daeca..a6639c211 100644
--- a/src/compute_bond_local.h
+++ b/src/compute_bond_local.h
@@ -1,74 +1,74 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef COMPUTE_CLASS
 
 ComputeStyle(bond/local,ComputeBondLocal)
 
 #else
 
 #ifndef LMP_COMPUTE_BOND_LOCAL_H
 #define LMP_COMPUTE_BOND_LOCAL_H
 
 #include "compute.h"
 
 namespace LAMMPS_NS {
 
 class ComputeBondLocal : public Compute {
  public:
   ComputeBondLocal(class LAMMPS *, int, char **);
   ~ComputeBondLocal();
   void init();
   void compute_local();
   double memory_usage();
 
  private:
-  int nvalues,dflag,eflag;
+  int nvalues;
   int ncount;
   int *bstyle;
   int singleflag;
 
   int nmax;
   double *vector;
   double **array;
 
   int compute_bonds(int);
   void reallocate(int);
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Compute bond/local used when bonds are not allowed
 
 The atom style does not support bonds.
 
 E: Invalid keyword in compute bond/local command
 
 Self-explanatory.
 
 E: No bond style is defined for compute bond/local
 
 Self-explanatory.
 
 */
diff --git a/src/compute_rdf.h b/src/compute_rdf.h
index 0ce45cc62..794fcc9ee 100644
--- a/src/compute_rdf.h
+++ b/src/compute_rdf.h
@@ -1,70 +1,69 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef COMPUTE_CLASS
 
 ComputeStyle(rdf,ComputeRDF)
 
 #else
 
 #ifndef LMP_COMPUTE_RDF_H
 #define LMP_COMPUTE_RDF_H
 
 #include "stdio.h"
 #include "compute.h"
 
 namespace LAMMPS_NS {
 
 class ComputeRDF : public Compute {
  public:
   ComputeRDF(class LAMMPS *, int, char **);
   ~ComputeRDF();
   void init();
   void init_list(int, class NeighList *);
   void compute_array();
 
  private:
-  int first;
-  int nbin;                         // # of rdf bins
-  int npairs;                     // # of rdf pairs
-  double delr,delrinv;                 // bin width and its inverse
-  int ***rdfpair;                       // map 2 type pair to rdf pair for each histo
-  int **nrdfpair;                // # of histograms for each type pair
+  int nbin;              // # of rdf bins
+  int npairs;            // # of rdf pairs
+  double delr,delrinv;   // bin width and its inverse
+  int ***rdfpair;        // map 2 type pair to rdf pair for each histo
+  int **nrdfpair;        // # of histograms for each type pair
   int *ilo,*ihi,*jlo,*jhi;
-  double **hist;                 // histogram bins
-  double **histall;                 // summed histogram bins across all procs
+  double **hist;         // histogram bins
+  double **histall;      // summed histogram bins across all procs
 
   int *typecount;
   int *icount,*jcount;
 
-  class NeighList *list;         // half neighbor list
+  class NeighList *list; // half neighbor list
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Compute rdf requires a pair style be defined
 
 Self-explanatory.
 
 */
diff --git a/src/fix_ave_histo.h b/src/fix_ave_histo.h
index 87eb21d0f..2d6bee353 100644
--- a/src/fix_ave_histo.h
+++ b/src/fix_ave_histo.h
@@ -1,227 +1,227 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef FIX_CLASS
 
 FixStyle(ave/histo,FixAveHisto)
 
 #else
 
 #ifndef LMP_FIX_AVE_HISTO_H
 #define LMP_FIX_AVE_HISTO_H
 
 #include "stdio.h"
 #include "fix.h"
 
 namespace LAMMPS_NS {
 
 class FixAveHisto : public Fix {
  public:
   FixAveHisto(class LAMMPS *, int, char **);
   ~FixAveHisto();
   int setmask();
   void init();
   void setup(int);
   void end_of_step();
   double compute_vector(int);
   double compute_array(int,int);
   void reset_timestep(bigint);
 
  private:
   int me,nvalues;
   int nrepeat,nfreq,irepeat;
   bigint nvalid;
   int *which,*argindex,*value2index;
   char **ids;
   FILE *fp;
   double lo,hi,binsize,bininv;
   int kind,beyond,overwrite;
   long filepos;
 
   double stats[4],stats_total[4],stats_all[4];
   double **stats_list;
 
   int nbins;
   double *bin,*bin_total,*bin_all;
   double **bin_list;
   double *coord;
 
   double *vector;
   int maxatom;
 
-  int ave,nwindow,nsum,startstep,mode;
+  int ave,nwindow,startstep,mode;
   char *title1,*title2,*title3;
   int iwindow,window_limit;
 
   void bin_one(double);
   void bin_vector(int, double *, int);
   void bin_atoms(double *, int);
   void options(int, char **);
   void allocate_values(int);
   bigint nextvalid();
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Compute ID for fix ave/histo does not exist
 
 Self-explanatory.
 
 E: Fix ID for fix ave/histo does not exist
 
 Self-explanatory.
 
 E: Fix ave/histo input is invalid compute
 
 Self-explanatory.
 
 E: Fix ave/histo input is invalid fix
 
 Self-explanatory.
 
 E: Fix ave/histo input is invalid variable
 
 Self-explanatory.
 
 E: Fix ave/histo inputs are not all global, peratom, or local
 
 All inputs in a single fix ave/histo command must be of the
 same style.
 
 E: Fix ave/histo cannot input per-atom values in scalar mode
 
 Self-explanatory.
 
 E: Fix ave/histo cannot input local values in scalar mode
 
 Self-explanatory.
 
 E: Fix ave/histo compute does not calculate a global scalar
 
 Self-explanatory.
 
 E: Fix ave/histo compute does not calculate a global vector
 
 Self-explanatory.
 
 E: Fix ave/histo compute vector is accessed out-of-range
 
 Self-explanatory.
 
 E: Fix ave/histo compute does not calculate a global array
 
 Self-explanatory.
 
 E: Fix ave/histo compute array is accessed out-of-range
 
 Self-explanatory.
 
 E: Fix ave/histo compute does not calculate per-atom values
 
 Self-explanatory.
 
 E: Fix ave/histo compute does not calculate a per-atom vector
 
 Self-explanatory.
 
 E: Fix ave/histo compute does not calculate a per-atom array
 
 Self-explanatory.
 
 E: Fix ave/histo compute does not calculate local values
 
 Self-explanatory.
 
 E: Fix ave/histo compute does not calculate a local vector
 
 Self-explanatory.
 
 E: Fix ave/histo compute does not calculate a local array
 
 Self-explanatory.
 
 E: Fix ave/histo fix does not calculate a global scalar
 
 Self-explanatory.
 
 E: Fix ave/histo fix does not calculate a global vector
 
 Self-explanatory.
 
 E: Fix ave/histo fix vector is accessed out-of-range
 
 Self-explanatory.
 
 E: Fix for fix ave/histo not computed at compatible time
 
 Fixes generate their values on specific timesteps.  Fix ave/histo is
 requesting a value on a non-allowed timestep.
 
 E: Fix ave/histo fix does not calculate a global array
 
 Self-explanatory.
 
 E: Fix ave/histo fix array is accessed out-of-range
 
 Self-explanatory.
 
 E: Fix ave/histo fix does not calculate per-atom values
 
 Self-explanatory.
 
 E: Fix ave/histo fix does not calculate a per-atom vector
 
 Self-explanatory.
 
 E: Fix ave/histo fix does not calculate a per-atom array
 
 Self-explanatory.
 
 E: Fix ave/histo fix does not calculate local values
 
 Self-explanatory.
 
 E: Fix ave/histo fix does not calculate a local vector
 
 Self-explanatory.
 
 E: Fix ave/histo fix does not calculate a local array
 
 Self-explanatory.
 
 E: Variable name for fix ave/histo does not exist
 
 Self-explanatory.
 
 E: Cannot open fix ave/histo file %s
 
 The specified file cannot be opened.  Check that the path and name are
 correct.
 
 E: Fix ave/histo missed timestep
 
 You cannot reset the timestep to a value beyond where the fix
 expects to next perform averaging.
 
 */
diff --git a/src/fix_indent.h b/src/fix_indent.h
index a80cd1185..a126b805a 100644
--- a/src/fix_indent.h
+++ b/src/fix_indent.h
@@ -1,85 +1,85 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef FIX_CLASS
 
 FixStyle(indent,FixIndent)
 
 #else
 
 #ifndef LMP_FIX_INDENT_H
 #define LMP_FIX_INDENT_H
 
 #include "fix.h"
 
 namespace LAMMPS_NS {
 
 class FixIndent : public Fix {
  public:
   FixIndent(class LAMMPS *, int, char **);
   ~FixIndent();
   int setmask();
   void init();
   void setup(int);
   void min_setup(int);
   void post_force(int);
   void post_force_respa(int, int, int);
   void min_post_force(int);
   double compute_scalar();
   double compute_vector(int);
 
  private:
-  int istyle,scaleflag,thermo_flag,eflag_enable,side;
+  int istyle,scaleflag,side;
   double k,k3;
   char *xstr,*ystr,*zstr,*rstr,*pstr;
   int xvar,yvar,zvar,rvar,pvar;
   double xvalue,yvalue,zvalue,rvalue,pvalue;
   int indenter_flag,planeside;
   double indenter[4],indenter_all[4];
   int cdim,varflag;
   int nlevels_respa;
 
   void options(int, char **);
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Variable name for fix indent does not exist
 
 Self-explanatory.
 
 E: Variable for fix indent is invalid style
 
 Only equal-style variables can be used.
 
 E: Variable for fix indent is not equal style
 
 Only equal-style variables can be used.
 
 U: Use of fix indent with undefined lattice
 
 The lattice command must be used to define a lattice before using the
 fix indent command.
 
 */
diff --git a/src/fix_setforce.h b/src/fix_setforce.h
index f9aaeded3..e18a34ea4 100644
--- a/src/fix_setforce.h
+++ b/src/fix_setforce.h
@@ -1,85 +1,85 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef FIX_CLASS
 
 FixStyle(setforce,FixSetForce)
 
 #else
 
 #ifndef LMP_FIX_SET_FORCE_H
 #define LMP_FIX_SET_FORCE_H
 
 #include "fix.h"
 
 namespace LAMMPS_NS {
 
 class FixSetForce : public Fix {
  public:
   FixSetForce(class LAMMPS *, int, char **);
   ~FixSetForce();
   int setmask();
   void init();
   void setup(int);
   void min_setup(int);
   void post_force(int);
   void post_force_respa(int, int, int);
   void min_post_force(int);
   double compute_vector(int);
   double memory_usage();
 
  private:
   double xvalue,yvalue,zvalue;
   int varflag,iregion;
   char *xstr,*ystr,*zstr;
   char *idregion;
   int xvar,yvar,zvar,xstyle,ystyle,zstyle;
   double foriginal[3],foriginal_all[3];
-  int varany,force_flag;
+  int force_flag;
   int nlevels_respa;
 
   int maxatom;
   double **sforce;
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Region ID for fix setforce does not exist
 
 Self-explanatory.
 
 E: Variable name for fix setforce does not exist
 
 Self-explanatory.
 
 E: Variable for fix setforce is invalid style
 
 Only equal-style variables can be used.
 
 E: Cannot use non-zero forces in an energy minimization
 
 Fix setforce cannot be used in this manner.  Use fix addforce
 instead.
 
 */
diff --git a/src/fix_spring.h b/src/fix_spring.h
index e32ef8158..f784ad9d3 100644
--- a/src/fix_spring.h
+++ b/src/fix_spring.h
@@ -1,82 +1,81 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef FIX_CLASS
 
 FixStyle(spring,FixSpring)
 
 #else
 
 #ifndef LMP_FIX_SPRING_H
 #define LMP_FIX_SPRING_H
 
 #include "fix.h"
 
 namespace LAMMPS_NS {
 
 class FixSpring : public Fix {
  public:
   FixSpring(class LAMMPS *, int, char **);
   ~FixSpring();
   int setmask();
   void init();
   void setup(int);
   void min_setup(int);
   void post_force(int);
   void post_force_respa(int, int, int);
   void min_post_force(int);
   double compute_scalar();
   double compute_vector(int);
 
  private:
   double xc,yc,zc,r0;
   double k_spring;
   int xflag,yflag,zflag;
   int styleflag;
   char *group2;
   int igroup2,group2bit;
   double masstotal,masstotal2;
   int nlevels_respa;
   double espring,ftotal[4];
-  int force_flag;
 
   void spring_tether();
   void spring_couple();
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: R0 < 0 for fix spring command
 
 Equilibrium spring length is invalid.
 
 E: Fix spring couple group ID does not exist
 
 Self-explanatory.
 
 E: Two groups cannot be the same in fix spring couple
 
 Self-explanatory.
 
 */
diff --git a/src/fix_wall_harmonic.h b/src/fix_wall_harmonic.h
index 01186dfa7..b9efea740 100644
--- a/src/fix_wall_harmonic.h
+++ b/src/fix_wall_harmonic.h
@@ -1,49 +1,46 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef FIX_CLASS
 
 FixStyle(wall/harmonic,FixWallHarmonic)
 
 #else
 
 #ifndef LMP_FIX_WALL_HARMONIC_H
 #define LMP_FIX_WALL_HARMONIC_H
 
 #include "fix_wall.h"
 
 namespace LAMMPS_NS {
 
 class FixWallHarmonic : public FixWall {
  public:
   FixWallHarmonic(class LAMMPS *, int, char **);
   void precompute(int) {}
   void wall_particle(int, int, double);
-
- private:
-  double offset[6];
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Particle on or inside fix wall surface
 
 Particles must be "exterior" to the wall in order for energy/force to
 be calculated.
 
 */
diff --git a/src/read_restart.h b/src/read_restart.h
index 063e5e117..23f56c858 100644
--- a/src/read_restart.h
+++ b/src/read_restart.h
@@ -1,153 +1,152 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef COMMAND_CLASS
 
 CommandStyle(read_restart,ReadRestart)
 
 #else
 
 #ifndef LMP_READ_RESTART_H
 #define LMP_READ_RESTART_H
 
 #include "stdio.h"
 #include "pointers.h"
 
 namespace LAMMPS_NS {
 
 class ReadRestart : protected Pointers {
  public:
   ReadRestart(class LAMMPS *);
   void command(int, char **);
 
  private:
   int me,nprocs,nprocs_file;
   FILE *fp;
-  int nfix_restart_global,nfix_restart_peratom;
   int swapflag;
 
   void file_search(char *, char *);
   void header();
   void type_arrays();
   void force_fields();
 
   void nread_int(int *, int, FILE *);
   void nread_double(double *, int, FILE *);
   void nread_char(char *, int, FILE *);
   int read_int();
   double read_double();
   char *read_char();
   bigint read_bigint();
   int autodetect(FILE **, char *);
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Cannot read_restart after simulation box is defined
 
 The read_restart command cannot be used after a read_data,
 read_restart, or create_box command.
 
 E: Cannot open restart file %s
 
 Self-explanatory.
 
 E: Did not assign all atoms correctly
 
 Atoms read in from a data file were not assigned correctly to
 processors.  This is likely due to some atom coordinates being
 outside a non-periodic simulation box.
 
 E: Cannot open dir to search for restart file
 
 Using a "*" in the name of the restart file will open the current
 directory to search for matching file names.
 
 E: Found no restart file matching pattern
 
 When using a "*" in the restart file name, no matching file was found.
 
 W: Restart file version does not match LAMMPS version
 
 This may cause problems when reading the restart file.
 
 E: Smallint setting in lmptype.h is not compatible
 
 Smallint stored in restart file is not consistent with LAMMPS version
 you are running.
 
 E: Tagint setting in lmptype.h is not compatible
 
 Smallint stored in restart file is not consistent with LAMMPS version
 you are running.
 
 E: Bigint setting in lmptype.h is not compatible
 
 Bigint stored in restart file is not consistent with LAMMPS version
 you are running.
 
 E: Cannot run 2d simulation with nonperiodic Z dimension
 
 Use the boundary command to make the z dimension periodic in order to
 run a 2d simulation.
 
 W: Restart file used different # of processors
 
 The restart file was written out by a LAMMPS simulation running on a
 different number of processors.  Due to round-off, the trajectories of
 your restarted simulation may diverge a little more quickly than if
 you ran on the same # of processors.
 
 W: Restart file used different 3d processor grid
 
 The restart file was written out by a LAMMPS simulation running on a
 different 3d grid of processors.  Due to round-off, the trajectories
 of your restarted simulation may diverge a little more quickly than if
 you ran on the same # of processors.
 
 W: Restart file used different newton pair setting, using input script value
 
 The input script value will override the setting in the restart file.
 
 W: Restart file used different newton bond setting, using restart file value
 
 The restart file value will override the setting in the input script.
 
 W: Restart file used different boundary settings, using restart file values
 
 Your input script cannot change these restart file settings.
 
 E: Invalid flag in header section of restart file
 
 Unrecognized entry in restart file.
 
 E: Invalid flag in type arrays section of restart file
 
 Unrecognized entry in restart file.
 
 E: Invalid flag in force field section of restart file
 
 Unrecognized entry in restart file.
 
 */
diff --git a/src/special.h b/src/special.h
index 914585f41..b44de6b6e 100644
--- a/src/special.h
+++ b/src/special.h
@@ -1,74 +1,73 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifndef LMP_SPECIAL_H
 #define LMP_SPECIAL_H
 
 #include "pointers.h"
 
 namespace LAMMPS_NS {
 
 class Special : protected Pointers {
  public:
   Special(class LAMMPS *);
   ~Special();
   void build();
 
  private:
   int me,nprocs;
   int **onetwo,**onethree,**onefour;
-  int dihedral_flag;
 
   // data used by ring callback methods
 
   int *count;
   int **dflag;
 
   void dedup();
   void angle_trim();
   void dihedral_trim();
   void combine();
 
   // static variable for ring communication callback to access class data
   // callback functions for ring communication
 
   static Special *sptr;
   static void ring_one(int, char *);
   static void ring_two(int, char *);
   static void ring_three(int, char *);
   static void ring_four(int, char *);
   static void ring_five(int, char *);
   static void ring_six(int, char *);
   static void ring_seven(int, char *);
   static void ring_eight(int, char *);
 };
 
 }
 
 #endif
 
 /* ERROR/WARNING messages:
 
 E: 1-3 bond count is inconsistent
 
 An inconsistency was detected when computing the number of 1-3
 neighbors for each atom.  This likely means something is wrong with
 the bond topologies you have defined.
 
 E: 1-4 bond count is inconsistent
 
 An inconsistency was detected when computing the number of 1-4
 neighbors for each atom.  This likely means something is wrong with
 the bond topologies you have defined.
 
 */