diff --git a/src/MOLECULE/fix_cmap.cpp b/src/MOLECULE/fix_cmap.cpp
index eedb9bf55..da7d337b9 100644
--- a/src/MOLECULE/fix_cmap.cpp
+++ b/src/MOLECULE/fix_cmap.cpp
@@ -1,1451 +1,1451 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
-   Implementation of the CHARMM CMAP; adds an extra energy term for the
-   peptide backbone dihedrals.  The tools/ch2lmp/charmm2lammps.pl
-   conversion script, which generates an extra section in the LAMMPS data
-   file, is needed in order to generate the info used by this fix style.
-
    Contributing authors:
    Xiaohu Hu, CMB/ORNL (hux2@ornl.gov)
    David Hyde-Volpe, Tigran Abramyan, and Robert A. Latour (Clemson University)
    Chris Lorenz (Kings College-London)
 
+   Implementation of the CHARMM CMAP; adds an extra energy term for the
+   peptide backbone dihedrals.  The tools/ch2lmp/charmm2lammps.pl
+   conversion script, which generates an extra section in the LAMMPS data
+   file, is needed in order to generate the info used by this fix style.
+
    References:
    - MacKerell et al., J. Am. Chem. Soc. 126(2004):698-699.
    - MacKerell et al., J. Comput. Chem. 25(2004):1400-1415.
- -------------------------------------------------------------------------*/
+------------------------------------------------------------------------- */
 
 #include <mpi.h>
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include "fix_cmap.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "update.h"
 #include "respa.h"
 #include "modify.h"
 #include "domain.h"
 #include "force.h"
 #include "group.h"
 #include "comm.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;
 
 #define MAXLINE 256
 #define LISTDELTA 10000
 #define LB_FACTOR 1.5
 
 #define CMAPMAX 6   // max # of CMAP terms stored by one atom
 #define CMAPDIM 24  // grid map dimension is 24 x 24
 #define CMAPXMIN -360.0
 #define CMAPXMIN2 -180.0
 #define CMAPDX 15.0 // 360/CMAPDIM
 
 /* ---------------------------------------------------------------------- */
 
 FixCMAP::FixCMAP(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg),
   crosstermlist(NULL), num_crossterm(NULL), crossterm_type(NULL), crossterm_atom1(NULL),
   crossterm_atom2(NULL), crossterm_atom3(NULL), crossterm_atom4(NULL), crossterm_atom5(NULL),
   g_axis(NULL), cmapgrid(NULL), d1cmapgrid(NULL), d2cmapgrid(NULL), d12cmapgrid(NULL)
 {
   if (narg != 4) error->all(FLERR,"Illegal fix cmap command");
 
   restart_global = 1;
   restart_peratom = 1;
   peatom_flag = 1;
   virial_flag = 1;
   peratom_freq = 1;
   scalar_flag = 1;
   global_freq = 1;
   extscalar = 1;
   extvector = 1;
   wd_header = 1;
   wd_section = 1;
 
   MPI_Comm_rank(world,&me);
   MPI_Comm_size(world,&nprocs);
 
   // allocate memory for CMAP data
 
   memory->create(g_axis,CMAPDIM,"cmap:g_axis");
   memory->create(cmapgrid,6,CMAPDIM,CMAPDIM,"cmap:grid");
   memory->create(d1cmapgrid,6,CMAPDIM,CMAPDIM,"cmap:d1grid");
   memory->create(d2cmapgrid,6,CMAPDIM,CMAPDIM,"cmap:d2grid");
   memory->create(d12cmapgrid,6,CMAPDIM,CMAPDIM,"cmap:d12grid");
 
   // read and setup CMAP data
 
   read_grid_map(arg[3]);
 
   // perform initial allocation of atom-based arrays
   // register with Atom class
 
   num_crossterm = NULL;
   crossterm_type = NULL;
   crossterm_atom1 = NULL;
   crossterm_atom2 = NULL;
   crossterm_atom3 = NULL;
   crossterm_atom4 = NULL;
   crossterm_atom5 = NULL;
 
   nmax_previous = 0;
   grow_arrays(atom->nmax);
   atom->add_callback(0);
   atom->add_callback(1);
 
   // local list of crossterms
 
   ncmap = 0;
   maxcrossterm = 0;
   crosstermlist = NULL;
 }
 
 /* --------------------------------------------------------------------- */
 
 FixCMAP::~FixCMAP()
 {
   // unregister callbacks to this fix from Atom class
 
   atom->delete_callback(id,0);
   atom->delete_callback(id,1);
 
   memory->destroy(g_axis);
   memory->destroy(cmapgrid);
   memory->destroy(d1cmapgrid);
   memory->destroy(d2cmapgrid);
   memory->destroy(d12cmapgrid);
 
   memory->destroy(crosstermlist);
 
   memory->destroy(num_crossterm);
   memory->destroy(crossterm_type);
   memory->destroy(crossterm_atom1);
   memory->destroy(crossterm_atom2);
   memory->destroy(crossterm_atom3);
   memory->destroy(crossterm_atom4);
   memory->destroy(crossterm_atom5);
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixCMAP::setmask()
 {
   int mask = 0;
   mask |= PRE_NEIGHBOR;
   mask |= PRE_REVERSE;
   mask |= POST_FORCE;
   mask |= THERMO_ENERGY;
   mask |= POST_FORCE_RESPA;
   mask |= MIN_POST_FORCE;
   return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixCMAP::init()
 {
   int i;
   double angle;
 
   i = 0;
   angle = -180.0;
   while (angle < 180.0) {
     g_axis[i] = angle;
     angle += CMAPDX;
     i++;
   }
 
   // pre-compute the derivatives of the maps
 
   for (i = 0; i < 6; i++)
     set_map_derivatives(cmapgrid[i],d1cmapgrid[i],d2cmapgrid[i],d12cmapgrid[i]);
 
   // define newton_bond here in case restart file was read (not data file)
 
   newton_bond = force->newton_bond;
 }
 
 /* --------------------------------------------------------------------- */
 
 void FixCMAP::setup(int vflag)
 {
   pre_neighbor();
 
   if (strstr(update->integrate_style,"verlet"))
     post_force(vflag);
   else {
     ((Respa *) update->integrate)->copy_flevel_f(nlevels_respa-1);
     post_force_respa(vflag,nlevels_respa-1,0);
     ((Respa *) update->integrate)->copy_f_flevel(nlevels_respa-1);
   }
 }
 
 /* --------------------------------------------------------------------- */
 
 void FixCMAP::setup_pre_neighbor()
 {
   pre_neighbor();
 }
 
 /* --------------------------------------------------------------------- */
 
 void FixCMAP::min_setup(int vflag)
 {
   pre_neighbor();
   post_force(vflag);
 }
 
 /* ----------------------------------------------------------------------
    store local neighbor list as if newton_bond = OFF, even if actually ON
 ------------------------------------------------------------------------- */
 
 void FixCMAP::pre_neighbor()
 {
   int i,m,itype,atom1,atom2,atom3,atom4,atom5;
 
   // guesstimate initial length of local crossterm list
   // if ncmap was not set (due to read_restart, no read_data),
   //   then list will grow by LISTDELTA chunks
 
   if (maxcrossterm == 0) {
     if (nprocs == 1) maxcrossterm = ncmap;
     else maxcrossterm = static_cast<int> (LB_FACTOR*ncmap/nprocs);
     memory->create(crosstermlist,maxcrossterm,6,"cmap:crosstermlist");
   }
 
   int nlocal = atom->nlocal;
 
   ncrosstermlist = 0;
 
   for (i = 0; i < nlocal; i++) {
     for (m = 0; m < num_crossterm[i]; m++) {
       atom1 = atom->map(crossterm_atom1[i][m]);
       atom2 = atom->map(crossterm_atom2[i][m]);
       atom3 = atom->map(crossterm_atom3[i][m]);
       atom4 = atom->map(crossterm_atom4[i][m]);
       atom5 = atom->map(crossterm_atom5[i][m]);
 
       if (atom1 == -1 || atom2 == -1 || atom3 == -1 ||
           atom4 == -1 || atom5 == -1) {
         char str[128];
         sprintf(str,"CMAP atoms "
                 TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT " "
                 TAGINT_FORMAT " " TAGINT_FORMAT
                 " missing on proc %d at step " BIGINT_FORMAT,
                 crossterm_atom1[i][m],crossterm_atom2[i][m],
                 crossterm_atom3[i][m],crossterm_atom4[i][m],
                 crossterm_atom5[i][m],me,update->ntimestep);
         error->one(FLERR,str);
       }
       atom1 = domain->closest_image(i,atom1);
       atom2 = domain->closest_image(i,atom2);
       atom3 = domain->closest_image(i,atom3);
       atom4 = domain->closest_image(i,atom4);
       atom5 = domain->closest_image(i,atom5);
 
       if (i <= atom1 && i <= atom2 && i <= atom3 &&
           i <= atom4 && i <= atom5) {
         if (ncrosstermlist == maxcrossterm) {
           maxcrossterm += LISTDELTA;
           memory->grow(crosstermlist,maxcrossterm,6,"cmap:crosstermlist");
         }
         crosstermlist[ncrosstermlist][0] = atom1;
         crosstermlist[ncrosstermlist][1] = atom2;
         crosstermlist[ncrosstermlist][2] = atom3;
         crosstermlist[ncrosstermlist][3] = atom4;
         crosstermlist[ncrosstermlist][4] = atom5;
         crosstermlist[ncrosstermlist][5] = crossterm_type[i][m];
         ncrosstermlist++;
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    store eflag, so can use it in post_force to tally per-atom energies
 ------------------------------------------------------------------------- */
 
 void FixCMAP::pre_reverse(int eflag, int vflag)
 {
   eflag_caller = eflag;
 }
 
 /* ----------------------------------------------------------------------
    compute CMAP terms as if newton_bond = OFF, even if actually ON
 ------------------------------------------------------------------------- */
 
 void FixCMAP::post_force(int vflag)
 {
   int n,i1,i2,i3,i4,i5,type,nlist;
   int li1, li2, mli1,mli2,mli11,mli21,t1,li3,li4,mli3,mli4,mli31,mli41;
   int list[5];
   // vectors needed to calculate the cross-term dihedral angles
   double vb21x,vb21y,vb21z,vb32x,vb32y,vb32z,vb34x,vb34y,vb34z;
   double vb23x,vb23y,vb23z;
   double vb43x,vb43y,vb43z,vb45x,vb45y,vb45z,a1x,a1y,a1z,b1x,b1y,b1z;
   double a2x,a2y,a2z,b2x,b2y,b2z,r32,a1sq,b1sq,a2sq,b2sq,dpr21r32,dpr34r32;
   double dpr32r43,dpr45r43,r43,vb12x,vb12y,vb12z,vb54x,vb54y,vb54z;
   // cross-term dihedral angles
   double phi,psi,phi1,psi1;
   double f1[3],f2[3],f3[3],f4[3],f5[3],vcmap[6];
   double gs[4],d1gs[4],d2gs[4],d12gs[4];
   double engfraction;
   // vectors needed for the gradient/force calculation
   double dphidr1x,dphidr1y,dphidr1z,dphidr2x,dphidr2y,dphidr2z;
   double dphidr3x,dphidr3y,dphidr3z,dphidr4x,dphidr4y,dphidr4z;
   double dpsidr1x,dpsidr1y,dpsidr1z,dpsidr2x,dpsidr2y,dpsidr2z;
   double dpsidr3x,dpsidr3y,dpsidr3z,dpsidr4x,dpsidr4y,dpsidr4z;
 
   // Definition of cross-term dihedrals
 
   //         phi dihedral
   //   |--------------------|
   //   a1-----a2-----a3-----a4-----a5    cross-term atoms
   //   C      N      CA     C      N     cross-term atom types
   //          |--------------------|
   //               psi dihedral
 
   double **x = atom->x;
   double **f = atom->f;
   int nlocal = atom->nlocal;
 
   ecmap = 0.0;
   int eflag = eflag_caller;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = 0;
 
   for (n = 0; n < ncrosstermlist; n++) {
     i1 = crosstermlist[n][0];
     i2 = crosstermlist[n][1];
     i3 = crosstermlist[n][2];
     i4 = crosstermlist[n][3];
     i5 = crosstermlist[n][4];
 
     type = crosstermlist[n][5];
     if (type == 0) continue;
 
     // calculate bond vectors for both dihedrals
 
     // phi
     // vb21 = r2 - r1
 
       vb21x = x[i2][0] - x[i1][0];
       vb21y = x[i2][1] - x[i1][1];
       vb21z = x[i2][2] - x[i1][2];
       vb12x = -1.0*vb21x;
       vb12y = -1.0*vb21y;
       vb12z = -1.0*vb21z;
       vb32x = x[i3][0] - x[i2][0];
       vb32y = x[i3][1] - x[i2][1];
       vb32z = x[i3][2] - x[i2][2];
       vb23x = -1.0*vb32x;
       vb23y = -1.0*vb32y;
       vb23z = -1.0*vb32z;
 
       vb34x = x[i3][0] - x[i4][0];
       vb34y = x[i3][1] - x[i4][1];
       vb34z = x[i3][2] - x[i4][2];
 
       // psi
       // bond vectors same as for phi: vb32
 
       vb43x = -1.0*vb34x;
       vb43y = -1.0*vb34y;
       vb43z = -1.0*vb34z;
 
       vb45x = x[i4][0] - x[i5][0];
       vb45y = x[i4][1] - x[i5][1];
       vb45z = x[i4][2] - x[i5][2];
       vb54x = -1.0*vb45x;
       vb54y = -1.0*vb45y;
       vb54z = -1.0*vb45z;
 
       // calculate normal vectors for planes that define the dihedral angles
 
       a1x = vb12y*vb23z - vb12z*vb23y;
       a1y = vb12z*vb23x - vb12x*vb23z;
       a1z = vb12x*vb23y - vb12y*vb23x;
 
       b1x = vb43y*vb23z - vb43z*vb23y;
       b1y = vb43z*vb23x - vb43x*vb23z;
       b1z = vb43x*vb23y - vb43y*vb23x;
 
       a2x = vb23y*vb34z - vb23z*vb34y;
       a2y = vb23z*vb34x - vb23x*vb34z;
       a2z = vb23x*vb34y - vb23y*vb34x;
 
       b2x = vb45y*vb43z - vb45z*vb43y;
       b2y = vb45z*vb43x - vb45x*vb43z;
       b2z = vb45x*vb43y - vb45y*vb43x;
 
       // calculate terms used later in calculations
 
       r32 = sqrt(vb32x*vb32x + vb32y*vb32y + vb32z*vb32z);
       a1sq = a1x*a1x + a1y*a1y + a1z*a1z;
       b1sq = b1x*b1x + b1y*b1y + b1z*b1z;
 
       r43 = sqrt(vb43x*vb43x + vb43y*vb43y + vb43z*vb43z);
       a2sq = a2x*a2x + a2y*a2y + a2z*a2z;
       b2sq = b2x*b2x + b2y*b2y + b2z*b2z;
       //if (a1sq<0.0001 || b1sq<0.0001 || a2sq<0.0001 || b2sq<0.0001)
       //  printf("a1sq b1sq a2sq b2sq: %f %f %f %f \n",a1sq,b1sq,a2sq,b2sq);
       if (a1sq<0.0001 || b1sq<0.0001 || a2sq<0.0001 || b2sq<0.0001) continue;
       dpr21r32 = vb21x*vb32x + vb21y*vb32y + vb21z*vb32z;
       dpr34r32 = vb34x*vb32x + vb34y*vb32y + vb34z*vb32z;
       dpr32r43 = vb32x*vb43x + vb32y*vb43y + vb32z*vb43z;
       dpr45r43 = vb45x*vb43x + vb45y*vb43y + vb45z*vb43z;
 
       // calculate the backbone dihedral angles as VMD and GROMACS
 
       phi = dihedral_angle_atan2(vb21x,vb21y,vb21z,a1x,a1y,a1z,b1x,b1y,b1z,r32);
       psi = dihedral_angle_atan2(vb32x,vb32y,vb32z,a2x,a2y,a2z,b2x,b2y,b2z,r43);
 
       if (phi == 180.0) phi= -180.0;
       if (psi == 180.0) psi= -180.0;
 
       phi1 = phi;
       if (phi1 < 0.0) phi1 += 360.0;
       psi1 = psi;
       if (psi1 < 0.0) psi1 += 360.0;
 
       // find the neighbor grid point index
 
       li1 = int(((phi1+CMAPXMIN2)/CMAPDX)+((CMAPDIM*1.0)/2.0));
       li2 = int(((psi1+CMAPXMIN2)/CMAPDX)+((CMAPDIM*1.0)/2.0));
 
       li3 = int((phi-CMAPXMIN2)/CMAPDX);
       li4 = int((psi-CMAPXMIN2)/CMAPDX);
       mli3 = li3 % CMAPDIM;
       mli4 = li4 % CMAPDIM;
       mli31 = (li3+1) % CMAPDIM;
       mli41 = (li4+1)  %CMAPDIM;
       mli1 = li1 % CMAPDIM;
       mli2 = li2 % CMAPDIM;
       mli11 = (li1+1) % CMAPDIM;
       mli21 = (li2+1)  %CMAPDIM;
       t1 = type-1;
       if (t1 < 0 || t1 > 5) error->all(FLERR,"Invalid CMAP crossterm_type");
 
       // determine the values and derivatives for the grid square points
 
       gs[0] = cmapgrid[t1][mli3][mli4];
       gs[1] = cmapgrid[t1][mli31][mli4];
       gs[2] = cmapgrid[t1][mli31][mli41];
       gs[3] = cmapgrid[t1][mli3][mli41];
       d1gs[0] = d1cmapgrid[t1][mli1][mli2];
       d1gs[1] = d1cmapgrid[t1][mli11][mli2];
       d1gs[2] = d1cmapgrid[t1][mli11][mli21];
       d1gs[3] = d1cmapgrid[t1][mli1][mli21];
 
       d2gs[0] = d2cmapgrid[t1][mli1][mli2];
       d2gs[1] = d2cmapgrid[t1][mli11][mli2];
       d2gs[2] = d2cmapgrid[t1][mli11][mli21];
       d2gs[3] = d2cmapgrid[t1][mli1][mli21];
 
       d12gs[0] = d12cmapgrid[t1][mli1][mli2];
       d12gs[1] = d12cmapgrid[t1][mli11][mli2];
       d12gs[2] = d12cmapgrid[t1][mli11][mli21];
       d12gs[3] = d12cmapgrid[t1][mli1][mli21];
 
       // calculate the cmap energy and the gradient (dE/dphi,dE/dpsi)
 
       bc_interpol(phi,psi,li3,li4,gs,d1gs,d2gs,d12gs);
 
       // sum up cmap energy contributions
 
       engfraction = 0.2 * E;
       if (i1 < nlocal) ecmap += engfraction;
       if (i2 < nlocal) ecmap += engfraction;
       if (i3 < nlocal) ecmap += engfraction;
       if (i4 < nlocal) ecmap += engfraction;
       if (i5 < nlocal) ecmap += engfraction;
 
       // calculate the derivatives dphi/dr_i
 
       dphidr1x = 1.0*r32/a1sq*a1x;
       dphidr1y = 1.0*r32/a1sq*a1y;
       dphidr1z = 1.0*r32/a1sq*a1z;
 
       dphidr2x = -1.0*r32/a1sq*a1x - dpr21r32/a1sq/r32*a1x +
         dpr34r32/b1sq/r32*b1x;
       dphidr2y = -1.0*r32/a1sq*a1y - dpr21r32/a1sq/r32*a1y +
         dpr34r32/b1sq/r32*b1y;
       dphidr2z = -1.0*r32/a1sq*a1z - dpr21r32/a1sq/r32*a1z +
         dpr34r32/b1sq/r32*b1z;
 
       dphidr3x = dpr34r32/b1sq/r32*b1x - dpr21r32/a1sq/r32*a1x - r32/b1sq*b1x;
       dphidr3y = dpr34r32/b1sq/r32*b1y - dpr21r32/a1sq/r32*a1y - r32/b1sq*b1y;
       dphidr3z = dpr34r32/b1sq/r32*b1z - dpr21r32/a1sq/r32*a1z - r32/b1sq*b1z;
 
       dphidr4x = r32/b1sq*b1x;
       dphidr4y = r32/b1sq*b1y;
       dphidr4z = r32/b1sq*b1z;
 
       // calculate the derivatives dpsi/dr_i
 
       dpsidr1x = 1.0*r43/a2sq*a2x;
       dpsidr1y = 1.0*r43/a2sq*a2y;
       dpsidr1z = 1.0*r43/a2sq*a2z;
 
       dpsidr2x = r43/a2sq*a2x + dpr32r43/a2sq/r43*a2x - dpr45r43/b2sq/r43*b2x;
       dpsidr2y = r43/a2sq*a2y + dpr32r43/a2sq/r43*a2y - dpr45r43/b2sq/r43*b2y;
       dpsidr2z = r43/a2sq*a2z + dpr32r43/a2sq/r43*a2z - dpr45r43/b2sq/r43*b2z;
 
       dpsidr3x = dpr45r43/b2sq/r43*b2x - dpr32r43/a2sq/r43*a2x - r43/b2sq*b2x;
       dpsidr3y = dpr45r43/b2sq/r43*b2y - dpr32r43/a2sq/r43*a2y - r43/b2sq*b2y;
       dpsidr3z = dpr45r43/b2sq/r43*b2z - dpr32r43/a2sq/r43*a2z - r43/b2sq*b2z;
 
       dpsidr4x = r43/b2sq*b2x;
       dpsidr4y = r43/b2sq*b2y;
       dpsidr4z = r43/b2sq*b2z;
 
       // calculate forces on cross-term atoms: F = -(dE/dPhi)*(dPhi/dr)
 
       f1[0] = dEdPhi*dphidr1x;
       f1[1] = dEdPhi*dphidr1y;
       f1[2] = dEdPhi*dphidr1z;
       f2[0] = dEdPhi*dphidr2x + dEdPsi*dpsidr1x;
       f2[1] = dEdPhi*dphidr2y + dEdPsi*dpsidr1y;
       f2[2] = dEdPhi*dphidr2z + dEdPsi*dpsidr1z;
       f3[0] = -dEdPhi*dphidr3x - dEdPsi*dpsidr2x;
       f3[1] = -dEdPhi*dphidr3y - dEdPsi*dpsidr2y;
       f3[2] = -dEdPhi*dphidr3z - dEdPsi*dpsidr2z;
       f4[0] = -dEdPhi*dphidr4x - dEdPsi*dpsidr3x;
       f4[1] = -dEdPhi*dphidr4y - dEdPsi*dpsidr3y;
       f4[2] = -dEdPhi*dphidr4z - dEdPsi*dpsidr3z;
       f5[0] = -dEdPsi*dpsidr4x;
       f5[1] = -dEdPsi*dpsidr4y;
       f5[2] = -dEdPsi*dpsidr4z;
 
       // apply force to each of the 5 atoms
 
       if (i1 < nlocal) {
         f[i1][0] += f1[0];
         f[i1][1] += f1[1];
         f[i1][2] += f1[2];
       }
       if (i2 < nlocal) {
         f[i2][0] += f2[0];
         f[i2][1] += f2[1];
         f[i2][2] += f2[2];
       }
       if (i3 < nlocal) {
         f[i3][0] += f3[0];
         f[i3][1] += f3[1];
         f[i3][2] += f3[2];
       }
       if (i4 < nlocal) {
         f[i4][0] += f4[0];
         f[i4][1] += f4[1];
         f[i4][2] += f4[2];
       }
       if (i5 < nlocal) {
         f[i5][0] += f5[0];
         f[i5][1] += f5[1];
         f[i5][2] += f5[2];
       }
 
       // tally energy and/or virial
 
       if (evflag) {
         nlist = 0;
         if (i1 < nlocal) list[nlist++] = i1;
         if (i2 < nlocal) list[nlist++] = i2;
         if (i3 < nlocal) list[nlist++] = i3;
         if (i4 < nlocal) list[nlist++] = i4;
         if (i5 < nlocal) list[nlist++] = i5;
         vcmap[0] = (vb12x*f1[0])+(vb32x*f3[0])+((vb43x+vb32x)*f4[0])+
           ((vb54x+vb43x+vb32x)*f5[0]);
         vcmap[1] = (vb12y*f1[1])+(vb32y*f3[1])+((vb43y+vb32y)*f4[1])+
           ((vb54y+vb43y+vb32y)*f5[1]);
         vcmap[2] = (vb12z*f1[2])+(vb32z*f3[2])+((vb43z+vb32z)*f4[2])+
           ((vb54z+vb43z+vb32z)*f5[2]);
         vcmap[3] = (vb12x*f1[1])+(vb32x*f3[1])+((vb43x+vb32x)*f4[1])+
           ((vb54x+vb43x+vb32x)*f5[1]);
         vcmap[4] = (vb12x*f1[2])+(vb32x*f3[2])+((vb43x+vb32x)*f4[2])+
           ((vb54x+vb43x+vb32x)*f5[2]);
         vcmap[5] = (vb12y*f1[2])+(vb32y*f3[2])+((vb43y+vb32y)*f4[2])+
           ((vb54y+vb43y+vb32y)*f5[2]);
         ev_tally(nlist,list,5.0,E,vcmap);
         //ev_tally(5,list,nlocal,newton_bond,E,vcmap);
       }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixCMAP::post_force_respa(int vflag, int ilevel, int iloop)
 {
   if (ilevel == nlevels_respa-1) post_force(vflag);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixCMAP::min_post_force(int vflag)
 {
   post_force(vflag);
 }
 
 /* ----------------------------------------------------------------------
    energy of CMAP term
 ------------------------------------------------------------------------- */
 
 double FixCMAP::compute_scalar()
 {
   double all;
   MPI_Allreduce(&ecmap,&all,1,MPI_DOUBLE,MPI_SUM,world);
   return all;
 }
 
 // ----------------------------------------------------------------------
 // ----------------------------------------------------------------------
 // methods to read CMAP potential file, perform interpolation
 // ----------------------------------------------------------------------
 // ----------------------------------------------------------------------
 
 void FixCMAP::read_grid_map(char *cmapfile)
 {
   char linebuf[MAXLINE];
   char *chunk,*line;
   int i1, i2, i3, i4, i5, i6, j1, j2, j3, j4, j5, j6, counter;
 
   FILE *fp = NULL;
   if (comm->me == 0) {
     fp = force->open_potential(cmapfile);
     if (fp == NULL) {
       char str[128];
       sprintf(str,"Cannot open fix cmap file %s",cmapfile);
       error->one(FLERR,str);
     }
   }
 
   for (int ix1 = 0; ix1 < 6; ix1++)
     for (int ix2 = 0; ix2 < CMAPDIM; ix2++)
       for (int ix3 = 0; ix3 < CMAPDIM; ix3++)
         cmapgrid[ix1][ix2][ix3] = 0.0;
 
   counter = 0;
   i1 = i2 = i3 = i4 = i5 = i6 = 0;
   j1 = j2 = j3 = j4 = j5 = j6 = 0;
 
   int done = 0;
 
   while (!done) {
     // only read on rank 0 and broadcast to all other ranks
     if (comm->me == 0)
       done = (fgets(linebuf,MAXLINE,fp) == NULL);
 
     MPI_Bcast(&done,1,MPI_INT,0,world);
     if (done) continue;
 
     MPI_Bcast(linebuf,MAXLINE,MPI_CHAR,0,world);
 
     // remove leading whitespace
     line = linebuf;
     while (line && (*line == ' ' || *line == '\t' || *line == '\r')) ++line;
 
     // skip if empty line or comment
     if (!line || *line =='\n' || *line == '\0' || *line == '#') continue;
 
     // read in the cmap grid point values
     // NOTE: The order to read the 6 grid maps is HARD-CODED, thus errors
     //       will occur if content of the file "cmap.data" is altered
     //
     // Reading order of the maps:
     // 1. Alanine map
     // 2. Alanine before proline map
     // 3. Proline map
     // 4. Two adjacent prolines map
     // 5. Glycine map
     // 6. Glycine before proline map
 
     chunk = strtok(line, " \r\n");
     while (chunk != NULL) {
 
       // alanine map
 
       if (counter < CMAPDIM*CMAPDIM) {
         cmapgrid[0][i1][j1] = atof(chunk);
         chunk = strtok(NULL, " \r\n");
         j1++;
         if (j1 == CMAPDIM) {
           j1 = 0;
           i1++;
         }
         counter++;
       }
 
       // alanine-proline map
 
       else if (counter >= CMAPDIM*CMAPDIM &&
                counter < 2*CMAPDIM*CMAPDIM) {
         cmapgrid[1][i2][j2]= atof(chunk);
         chunk = strtok(NULL, " \r\n");
         j2++;
         if (j2 == CMAPDIM) {
           j2 = 0;
           i2++;
         }
         counter++;
       }
 
       // proline map
 
       else if (counter >= 2*CMAPDIM*CMAPDIM &&
                counter < 3*CMAPDIM*CMAPDIM) {
         cmapgrid[2][i3][j3] = atof(chunk);
         chunk = strtok(NULL, " \r\n");
         j3++;
         if (j3 == CMAPDIM) {
           j3 = 0;
           i3++;
         }
         counter++;
       }
 
       // 2 adjacent prolines map
 
       else if (counter >= 3*CMAPDIM*CMAPDIM &&
                counter < 4*CMAPDIM*CMAPDIM) {
         cmapgrid[3][i4][j4] = atof(chunk);
         chunk = strtok(NULL, " \r\n");
         j4++;
         if (j4 == CMAPDIM) {
           j4 = 0;
           i4++;
         }
         counter++;
       }
 
       // glycine map
 
       else if (counter >= 4*CMAPDIM*CMAPDIM &&
                counter < 5*CMAPDIM*CMAPDIM) {
         cmapgrid[4][i5][j5] = atof(chunk);
         chunk = strtok(NULL, " \r\n");
         j5++;
         if (j5 == CMAPDIM) {
           j5 = 0;
           i5++;
         }
         counter++;
       }
 
       // glycine-proline map
 
       else if (counter >= 5*CMAPDIM*CMAPDIM &&
                counter < 6*CMAPDIM*CMAPDIM) {
         cmapgrid[5][i6][j6] = atof(chunk);
         chunk = strtok(NULL, " \r\n");
         j6++;
         if (j6 == CMAPDIM) {
           j6 = 0;
           i6++;
         }
         counter++;
       }
 
       else break;
     }
   }
 
   if (comm->me == 0) fclose(fp);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixCMAP::spline(double *y, double *ddy, int n)
 {
   // create the 2nd dervatives of a taublated function y_i(x_i)
   // at the tabulated points
 
   int i, j;
   double p, *u;
 
   memory->create(u,n-1,"cmap:u");
 
   ddy[0] = u[0] = 0.0;
 
   for (i = 1; i <= n-2; i++) {
     p = 1.0/(ddy[i-1]+4.0);
     ddy[i] = -p;
     u[i] = ((((6.0*y[i+1])-(12.0*y[i])+(6.0*y[i-1]))/(CMAPDX*CMAPDX))-u[i-1])*p;
   }
 
   ddy[n-1] = 0.0;
 
   for (j = n-2; j >= 0; j--)
     ddy[j] = ddy[j]*ddy[j+1] + u[j];
 
   memory->destroy(u);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixCMAP::spl_interpolate(double x, double *y, double *ddy, double &yo,
                               double &dyo)
 {
   // perform a 1D cubic spline interpolation
 
   int ix;
   double a,b,a1,b1,a2,b2;
 
   ix = int((x-CMAPXMIN)/CMAPDX-(1./2.));
 
   a = (CMAPXMIN+(ix*1.0)*CMAPDX-x)/CMAPDX;
   b = (x-CMAPXMIN-(((ix-1)*1.0)*CMAPDX))/CMAPDX;
 
   a1 = a*a*a-a;
   b1 = b*b*b-b;
 
   a2 = 3.0*a*a-1.0;
   b2 = 3.0*b*b-1.0;
   yo = a*y[ix]+b*y[ix+1]+(a1*ddy[ix]+b1*ddy[ix+1])*(CMAPDX*CMAPDX)/6.0;
   dyo = (y[ix+1]-y[ix])/CMAPDX-a2/6.0*CMAPDX*ddy[ix]+b2/6.0*CMAPDX*ddy[ix+1];
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixCMAP::set_map_derivatives(double **map, double **d1yo, double **d2yo,
                                   double **d12yo)
 {
   // precompute the gradient and cross-derivatives of the map grid points.
   // use the bicubic spline to calculate the derivatives
 
   int i, j, k, ii, jj, xm, p;
   double phi, psi, y, d1y, d2y, d12y, tyyk,tdyk;
   double *tmp_y, *tmp_dy, *tmp_ddy, **tmap, **tddmap;
   int ix;
   double a,b,a1,b1,a2,b2;
 
   xm = CMAPDIM/2;
   p = CMAPDIM;
 
   y = 0.;
   d1y = 0.;
   d2y = 0.;
   d12y = 0.;
 
   memory->create(tmp_y,CMAPDIM*2,"cmap:tmp_y");
   memory->create(tmp_dy,CMAPDIM*2,"cmap:tmp_dy");
   memory->create(tmp_ddy,CMAPDIM*2,"cmap:tmp_ddy");
   memory->create(tmap,CMAPDIM*2,CMAPDIM*2,"cmap:tmap");
   memory->create(tddmap,CMAPDIM*2,CMAPDIM*2,"cmap:tddmap");
 
   // periodically expand the original map
   // use the expanded map for bicubic spline interpolation,
   //   which is used to obtain the derivatives
   // actual interpolation is done with bicubic interpolation
 
   for (i = 0; i < CMAPDIM*2; i++) {
     ii = ((i+CMAPDIM-xm)%CMAPDIM);
     for (j = 0; j < CMAPDIM*2; j++) {
       jj = ((j+CMAPDIM-xm)%CMAPDIM);
       tmap[i][j] = map[ii][jj];
     }
   }
 
   for (i = 0; i < CMAPDIM*2; i++)
     spline(tmap[i], tddmap[i], CMAPDIM*2);
 
   for (i = xm; i < CMAPDIM+xm; i++) {
     phi = (i-xm)*CMAPDX-180.0;
     for (j = xm; j < CMAPDIM+xm; j++) {
       psi = (j-xm)*CMAPDX-180.0;
       ix = int((psi-CMAPXMIN)/CMAPDX);
       a = (CMAPXMIN+((ix+1)*1.0)*CMAPDX-psi)/CMAPDX;
       b = (psi-CMAPXMIN-((ix)*1.0)*CMAPDX)/CMAPDX;
       a1 = a*a*a-a;
       b1 = b*b*b-b;
       a2 = 3.0*a*a-1.0;
       b2 = 3.0*b*b-1.0;
       for (k = 0; k < CMAPDIM*2; k++) {
         tyyk = tmp_y[k];
         tdyk = tmp_dy[k];
         tyyk = a*tmap[k][ix]+b*tmap[k][ix+1]+
           (a1*tddmap[k][ix]+b1*tddmap[k][ix+1])*(CMAPDX*CMAPDX)/6.0;
         tdyk = (tmap[k][ix+1]-tmap[k][ix])/CMAPDX-
           (a2/6.0*CMAPDX*tddmap[k][ix])+(b2/6.0*CMAPDX*tddmap[k][ix+1]);
         tmp_y[k] = tyyk;
         tmp_dy[k] = tdyk;
       }
 
       spline(tmp_y,tmp_ddy,CMAPDIM+xm+xm);
       ix = int((phi-CMAPXMIN)/CMAPDX);
       a = (CMAPXMIN+((ix+1)*1.0)*CMAPDX-phi)/CMAPDX;
       b = (phi-CMAPXMIN-(ix*1.0)*CMAPDX)/CMAPDX;
       a1 = a*a*a-a;
       b1 = b*b*b-b;
       a2 = 3.0*a*a-1.0;
       b2 = 3.0*b*b-1.0;
       y = a*tmp_y[ix]+b*tmp_y[ix+1]+
         (a1*tmp_ddy[ix]+b1*tmp_ddy[ix+1])*(CMAPDX*CMAPDX)/6.0;
       d1y = (tmp_y[ix+1]-tmp_y[ix])/CMAPDX-
         a2/6.0*CMAPDX*tmp_ddy[ix]+b2/6.0*CMAPDX*tmp_ddy[ix+1];
       spline(tmp_dy,tmp_ddy,CMAPDIM+xm+xm);
       ix = int((phi-CMAPXMIN)/CMAPDX);
       a = (CMAPXMIN+((ix+1)*1.0)*CMAPDX-phi)/CMAPDX;
       b = (phi-CMAPXMIN-(ix*1.0)*CMAPDX)/CMAPDX;
       a1 = a*a*a-a;
       b1 = b*b*b-b;
       a2 = 3.0*a*a-1.0;
       b2 = 3.0*b*b-1.0;
       d2y = a*tmp_dy[ix]+b*tmp_dy[ix+1]+
         (a1*tmp_ddy[ix]+b1*tmp_ddy[ix+1])*(CMAPDX*CMAPDX)/6.0;
       d12y = (tmp_dy[ix+1]-tmp_dy[ix])/CMAPDX-
         a2/6.0*CMAPDX*tmp_ddy[ix]+b2/6.0*CMAPDX*tmp_ddy[ix+1];
       d1yo[i%p][j%p] = d1y;
       d2yo[i%p][j%p] = d2y;
       d12yo[i%p][j%p] = d12y;
     }
   }
 
   memory->destroy(tmp_y);
   memory->destroy(tmp_dy);
   memory->destroy(tmp_ddy);
   memory->destroy(tmap);
   memory->destroy(tddmap);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double FixCMAP::dihedral_angle_atan2(double fx, double fy, double fz,
                                       double ax, double ay, double az,
                                       double bx, double by, double bz,
                                       double absg)
 {
   // calculate the dihedral angle
 
   double angle, arg1, arg2;
 
   arg1 = absg*(fx*bx+fy*by+fz*bz);
   arg2 = ax*bx+ay*by+az*bz;
 
   if (arg1 == 0 && arg2 == 0)
     error->all(FLERR,"CMAP: atan2 function cannot take 2 zero arguments");
   else {
     angle = atan2(arg1,arg2);
     angle = angle*180.0/MY_PI;
   }
 
   return angle;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixCMAP::bc_coeff(double *gs, double *d1gs, double *d2gs, double *d12gs)
 {
   // calculate the bicubic interpolation coefficients c_ij
 
   static int wt[16][16] =
     { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       -3, 0, 0, 3, 0, 0, 0, 0,-2, 0, 0,-1, 0, 0, 0, 0,
       2, 0, 0,-2, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0,-3, 0, 0, 3, 0, 0, 0, 0,-2, 0, 0,-1,
       0, 0, 0, 0, 2, 0, 0,-2, 0, 0, 0, 0, 1, 0, 0, 1,
       -3, 3, 0, 0,-2,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0,-3, 3, 0, 0,-2,-1, 0, 0,
       9,-9, 9,-9, 6, 3,-3,-6, 6,-6,-3, 3, 4, 2, 1, 2,
       -6, 6,-6, 6,-4,-2, 2, 4,-3, 3, 3,-3,-2,-1,-1,-2,
       2,-2, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 2,-2, 0, 0, 1, 1, 0, 0,
       -6, 6,-6, 6,-3,-3, 3, 3,-4, 4, 2,-2,-2,-2,-1,-1,
       4,-4, 4,-4, 2, 2,-2,-2, 2,-2,-2, 2, 1, 1, 1, 1
     };
 
   int i, j, k, in;
   double xx, x[16];
 
   for (i = 0; i < 4; i++) {
     x[i] = gs[i];
     x[i+4] = d1gs[i]*CMAPDX;
     x[i+8] = d2gs[i]*CMAPDX;
     x[i+12] = d12gs[i]*CMAPDX*CMAPDX;
   }
 
   in = 0;
   for (i = 0; i < 4; i++) {
     for (j = 0; j < 4; j++) {
       xx = 0.0;
       for (k = 0; k < 16; k++) xx += wt[in][k]*x[k];
       in++;
       cij[i][j] = xx;
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixCMAP::bc_interpol(double x1, double x2, int low1, int low2, double *gs,
                            double *d1gs, double *d2gs, double *d12gs)
 {
   // for a given point of interest and its corresponding grid square values,
   //   gradients and cross-derivatives
   // calculate the interpolated value of the point of interest (POI)
 
   int i, p=12;
   double t, u, fac, gs1l, gs2l, gs1u, gs2u;
 
   // set the interpolation coefficients
 
   bc_coeff(gs,d1gs,d2gs,d12gs);
 
   gs1l = g_axis[low1];
   gs2l = g_axis[low2];
 
   t = (x1-gs1l)/CMAPDX;
   u = (x2-gs2l)/CMAPDX;
 
   E = dEdPhi = dEdPsi = 0.0;
 
   for (i = 3; i >= 0; i--) {
     E = t*E + ((cij[i][3]*u+cij[i][2])*u+cij[i][1])*u+cij[i][0];
     dEdPhi = u*dEdPhi + (3.0*cij[3][i]*t+2.0*cij[2][i])*t+cij[1][i];
     dEdPsi = t*dEdPsi + (3.0*cij[i][3]*u+2.0*cij[i][2])*u+cij[i][1];
   }
 
   dEdPhi *= (180.0/MY_PI/CMAPDX);
   dEdPsi *= (180.0/MY_PI/CMAPDX);
 }
 
 // ----------------------------------------------------------------------
 // ----------------------------------------------------------------------
 // methods to read and write data file
 // ----------------------------------------------------------------------
 // ----------------------------------------------------------------------
 
 void FixCMAP::read_data_header(char *line)
 {
   if (strstr(line,"crossterms")) {
     sscanf(line,BIGINT_FORMAT,&ncmap);
   } else error->all(FLERR,"Invalid read data header line for fix cmap");
 
   // didn't set in constructor b/c this fix could be defined
   // before newton command
 
   newton_bond = force->newton_bond;
 }
 
 /* ----------------------------------------------------------------------
    unpack N lines in buf from section of data file labeled by keyword
    id_offset is applied to atomID fields if multiple data files are read
    store CMAP interactions as if newton_bond = OFF, even if actually ON
 ------------------------------------------------------------------------- */
 
 void FixCMAP::read_data_section(char *keyword, int n, char *buf,
                                  tagint id_offset)
 {
   int m,tmp,itype;
   tagint atom1,atom2,atom3,atom4,atom5;
   char *next;
 
   next = strchr(buf,'\n');
   *next = '\0';
   int nwords = atom->count_words(buf);
   *next = '\n';
 
   if (nwords != 7) {
     char str[128];
     sprintf(str,"Incorrect %s format in data file",keyword);
     error->all(FLERR,str);
   }
 
   // loop over lines of CMAP crossterms
   // tokenize the line into values
   // add crossterm to one of my atoms, depending on newton_bond
 
   for (int i = 0; i < n; i++) {
     next = strchr(buf,'\n');
     *next = '\0';
     sscanf(buf,"%d %d " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT
            " " TAGINT_FORMAT " " TAGINT_FORMAT,
            &tmp,&itype,&atom1,&atom2,&atom3,&atom4,&atom5);
 
     atom1 += id_offset;
     atom2 += id_offset;
     atom3 += id_offset;
     atom4 += id_offset;
     atom5 += id_offset;
 
     if ((m = atom->map(atom1)) >= 0) {
       if (num_crossterm[m] == CMAPMAX)
         error->one(FLERR,"Too many CMAP crossterms for one atom");
       crossterm_type[m][num_crossterm[m]] = itype;
       crossterm_atom1[m][num_crossterm[m]] = atom1;
       crossterm_atom2[m][num_crossterm[m]] = atom2;
       crossterm_atom3[m][num_crossterm[m]] = atom3;
       crossterm_atom4[m][num_crossterm[m]] = atom4;
       crossterm_atom5[m][num_crossterm[m]] = atom5;
       num_crossterm[m]++;
     }
 
     if ((m = atom->map(atom2)) >= 0) {
       if (num_crossterm[m] == CMAPMAX)
         error->one(FLERR,"Too many CMAP crossterms for one atom");
       crossterm_type[m][num_crossterm[m]] = itype;
       crossterm_atom1[m][num_crossterm[m]] = atom1;
       crossterm_atom2[m][num_crossterm[m]] = atom2;
       crossterm_atom3[m][num_crossterm[m]] = atom3;
       crossterm_atom4[m][num_crossterm[m]] = atom4;
       crossterm_atom5[m][num_crossterm[m]] = atom5;
       num_crossterm[m]++;
     }
 
     if ((m = atom->map(atom3)) >= 0) {
       if (num_crossterm[m] == CMAPMAX)
         error->one(FLERR,"Too many CMAP crossterms for one atom");
       crossterm_type[m][num_crossterm[m]] = itype;
       crossterm_atom1[m][num_crossterm[m]] = atom1;
       crossterm_atom2[m][num_crossterm[m]] = atom2;
       crossterm_atom3[m][num_crossterm[m]] = atom3;
       crossterm_atom4[m][num_crossterm[m]] = atom4;
       crossterm_atom5[m][num_crossterm[m]] = atom5;
       num_crossterm[m]++;
     }
 
     if ((m = atom->map(atom4)) >= 0) {
       if (num_crossterm[m] == CMAPMAX)
         error->one(FLERR,"Too many CMAP crossterms for one atom");
       crossterm_type[m][num_crossterm[m]] = itype;
       crossterm_atom1[m][num_crossterm[m]] = atom1;
       crossterm_atom2[m][num_crossterm[m]] = atom2;
       crossterm_atom3[m][num_crossterm[m]] = atom3;
       crossterm_atom4[m][num_crossterm[m]] = atom4;
       crossterm_atom5[m][num_crossterm[m]] = atom5;
       num_crossterm[m]++;
     }
 
     if ((m = atom->map(atom5)) >= 0) {
       if (num_crossterm[m] == CMAPMAX)
         error->one(FLERR,"Too many CMAP crossterms for one atom");
       crossterm_type[m][num_crossterm[m]] = itype;
       crossterm_atom1[m][num_crossterm[m]] = atom1;
       crossterm_atom2[m][num_crossterm[m]] = atom2;
       crossterm_atom3[m][num_crossterm[m]] = atom3;
       crossterm_atom4[m][num_crossterm[m]] = atom4;
       crossterm_atom5[m][num_crossterm[m]] = atom5;
       num_crossterm[m]++;
     }
 
     buf = next + 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 bigint FixCMAP::read_data_skip_lines(char *keyword)
 {
   return ncmap;
 }
 
 /* ----------------------------------------------------------------------
    write Mth header line to file
    only called by proc 0
 ------------------------------------------------------------------------- */
 
 void FixCMAP::write_data_header(FILE *fp, int mth)
 {
   fprintf(fp,BIGINT_FORMAT " cmap crossterms\n",ncmap);
 }
 
 /* ----------------------------------------------------------------------
    return size I own for Mth data section
    # of data sections = 1 for this fix
    nx = # of crossterms owned by my local atoms
      if newton_bond off, atom only owns crossterm if it is atom3
    ny = columns = type + 5 atom IDs
 ------------------------------------------------------------------------- */
 
 void FixCMAP::write_data_section_size(int mth, int &nx, int &ny)
 {
   int i,m;
 
   tagint *tag = atom->tag;
   int nlocal = atom->nlocal;
 
   nx = 0;
   for (i = 0; i < nlocal; i++)
     for (m = 0; m < num_crossterm[i]; m++)
       if (crossterm_atom3[i][m] == tag[i]) nx++;
 
   ny = 6;
 }
 
 /* ----------------------------------------------------------------------
    pack values for Mth data section into 2d buf
    buf allocated by caller as owned crossterms by 6
 ------------------------------------------------------------------------- */
 
 void FixCMAP::write_data_section_pack(int mth, double **buf)
 {
   int i,m;
 
   // 1st column = CMAP type
   // 2nd-6th columns = 5 atom IDs
 
   tagint *tag = atom->tag;
   int nlocal = atom->nlocal;
 
   int n = 0;
   for (i = 0; i < nlocal; i++) {
     for (m = 0; m < num_crossterm[i]; m++) {
       if (crossterm_atom3[i][m] != tag[i]) continue;
       buf[n][0] = ubuf(crossterm_type[i][m]).d;
       buf[n][1] = ubuf(crossterm_atom1[i][m]).d;
       buf[n][2] = ubuf(crossterm_atom2[i][m]).d;
       buf[n][3] = ubuf(crossterm_atom3[i][m]).d;
       buf[n][4] = ubuf(crossterm_atom4[i][m]).d;
       buf[n][5] = ubuf(crossterm_atom5[i][m]).d;
       n++;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    write section keyword for Mth data section to file
    use Molecules or Charges if that is only field, else use fix ID
    only called by proc 0
 ------------------------------------------------------------------------- */
 
 void FixCMAP::write_data_section_keyword(int mth, FILE *fp)
 {
   fprintf(fp,"\nCMAP\n\n");
 }
 
 /* ----------------------------------------------------------------------
    write N lines from buf to file
    convert buf fields to int or double depending on styles
    index can be used to prepend global numbering
    only called by proc 0
 ------------------------------------------------------------------------- */
 
 void FixCMAP::write_data_section(int mth, FILE *fp,
                                   int n, double **buf, int index)
 {
   for (int i = 0; i < n; i++)
     fprintf(fp,"%d %d " TAGINT_FORMAT " " TAGINT_FORMAT
             " " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT "\n",
             index+i,(int) ubuf(buf[i][0]).i,(tagint) ubuf(buf[i][1]).i,
             (tagint) ubuf(buf[i][2]).i,(tagint) ubuf(buf[i][3]).i,
             (tagint) ubuf(buf[i][4]).i,(tagint) ubuf(buf[i][5]).i);
 }
 
 // ----------------------------------------------------------------------
 // ----------------------------------------------------------------------
 // methods for restart and communication
 // ----------------------------------------------------------------------
 // ----------------------------------------------------------------------
 
 /* ----------------------------------------------------------------------
    pack entire state of Fix into one write
 ------------------------------------------------------------------------- */
 
 void FixCMAP::write_restart(FILE *fp)
 {
   if (comm->me == 0) {
     int size = sizeof(bigint);
     fwrite(&size,sizeof(int),1,fp);
     fwrite(&ncmap,sizeof(bigint),1,fp);
   }
 }
 
 /* ----------------------------------------------------------------------
    use state info from restart file to restart the Fix
 ------------------------------------------------------------------------- */
 
 void FixCMAP::restart(char *buf)
 {
   ncmap = *((bigint *) buf);
 }
 
 /* ----------------------------------------------------------------------
    pack values in local atom-based arrays for restart file
 ------------------------------------------------------------------------- */
 
 int FixCMAP::pack_restart(int i, double *buf)
 {
   int n = 1;
   for (int m = 0; m < num_crossterm[i]; m++) {
     buf[n++] = ubuf(MAX(crossterm_type[i][m],-crossterm_type[i][m])).d;
     buf[n++] = ubuf(crossterm_atom1[i][m]).d;
     buf[n++] = ubuf(crossterm_atom2[i][m]).d;
     buf[n++] = ubuf(crossterm_atom3[i][m]).d;
     buf[n++] = ubuf(crossterm_atom4[i][m]).d;
     buf[n++] = ubuf(crossterm_atom5[i][m]).d;
   }
   buf[0] = n;
 
   return n;
 }
 
 /* ----------------------------------------------------------------------
    unpack values from atom->extra array to restart the fix
 ------------------------------------------------------------------------- */
 
 void FixCMAP::unpack_restart(int nlocal, int nth)
 {
   double **extra = atom->extra;
 
   // skip to Nth set of extra values
 
    int n = 0;
    for (int i = 0; i < nth; i++) n += static_cast<int> (extra[nlocal][n]);
 
    int count = static_cast<int> (extra[nlocal][n++]);
    num_crossterm[nlocal] = (count-1)/6;
 
    for (int m = 0; m < num_crossterm[nlocal]; m++) {
      crossterm_type[nlocal][m] = (int) ubuf(extra[nlocal][n++]).i;
      crossterm_atom1[nlocal][m] = (tagint) ubuf(extra[nlocal][n++]).i;
      crossterm_atom2[nlocal][m] = (tagint) ubuf(extra[nlocal][n++]).i;
      crossterm_atom3[nlocal][m] = (tagint) ubuf(extra[nlocal][n++]).i;
      crossterm_atom4[nlocal][m] = (tagint) ubuf(extra[nlocal][n++]).i;
      crossterm_atom5[nlocal][m] = (tagint) ubuf(extra[nlocal][n++]).i;
    }
 }
 
 /* ----------------------------------------------------------------------
    maxsize of any atom's restart data
 ------------------------------------------------------------------------- */
 
 int FixCMAP::maxsize_restart()
 {
   return 1 + CMAPMAX*6;
 }
 
 /* ----------------------------------------------------------------------
    size of atom nlocal's restart data
 ------------------------------------------------------------------------- */
 
 int FixCMAP::size_restart(int nlocal)
 {
   return 1 + num_crossterm[nlocal]*6;
 }
 
 /* ----------------------------------------------------------------------
    allocate atom-based array
 ------------------------------------------------------------------------- */
 
 void FixCMAP::grow_arrays(int nmax)
 {
   num_crossterm = memory->grow(num_crossterm,nmax,"cmap:num_crossterm");
   crossterm_type = memory->grow(crossterm_type,nmax,CMAPMAX,
                                 "cmap:crossterm_type");
   crossterm_atom1 = memory->grow(crossterm_atom1,nmax,CMAPMAX,
                                  "cmap:crossterm_atom1");
   crossterm_atom2 = memory->grow(crossterm_atom2,nmax,CMAPMAX,
                                  "cmap:crossterm_atom2");
   crossterm_atom3 = memory->grow(crossterm_atom3,nmax,CMAPMAX,
                                  "cmap:crossterm_atom3");
   crossterm_atom4 = memory->grow(crossterm_atom4,nmax,CMAPMAX,
                                  "cmap:crossterm_atom4");
   crossterm_atom5 = memory->grow(crossterm_atom5,nmax,CMAPMAX,
                                  "cmap:crossterm_atom5");
 
   // must initialize num_crossterm to 0 for added atoms
   // may never be set for some atoms when data file is read
 
   for (int i = nmax_previous; i < nmax; i++) num_crossterm[i] = 0;
   nmax_previous = nmax;
 }
 
 /* ----------------------------------------------------------------------
    copy values within local atom-based array
 ------------------------------------------------------------------------- */
 
 void FixCMAP::copy_arrays(int i, int j, int delflag)
 {
   num_crossterm[j] = num_crossterm[i];
 
   for (int k = 0; k < num_crossterm[j]; k++){
     crossterm_type[j][k] = crossterm_type[i][k];
     crossterm_atom1[j][k] = crossterm_atom1[i][k];
     crossterm_atom2[j][k] = crossterm_atom2[i][k];
     crossterm_atom3[j][k] = crossterm_atom3[i][k];
     crossterm_atom4[j][k] = crossterm_atom4[i][k];
     crossterm_atom5[j][k] = crossterm_atom5[i][k];
   }
 }
 
 /* ----------------------------------------------------------------------
    initialize one atom's array values, called when atom is created
 ------------------------------------------------------------------------- */
 
 void FixCMAP::set_arrays(int i)
 {
   num_crossterm[i] = 0;
 }
 
 /* ----------------------------------------------------------------------
    pack values in local atom-based array for exchange with another proc
 ------------------------------------------------------------------------- */
 
 int FixCMAP::pack_exchange(int i, double *buf)
 {
   int n = 0;
   buf[n++] = ubuf(num_crossterm[i]).d;
   for (int m = 0; m < num_crossterm[i]; m++) {
     buf[n++] = ubuf(crossterm_type[i][m]).d;
     buf[n++] = ubuf(crossterm_atom1[i][m]).d;
     buf[n++] = ubuf(crossterm_atom2[i][m]).d;
     buf[n++] = ubuf(crossterm_atom3[i][m]).d;
     buf[n++] = ubuf(crossterm_atom4[i][m]).d;
     buf[n++] = ubuf(crossterm_atom5[i][m]).d;
   }
   return n;
 }
 
 /* ----------------------------------------------------------------------
    unpack values in local atom-based array from exchange with another proc
 ------------------------------------------------------------------------- */
 
 int FixCMAP::unpack_exchange(int nlocal, double *buf)
 {
   int n = 0;
   num_crossterm[nlocal] = (int) ubuf(buf[n++]).i;
   for (int m = 0; m < num_crossterm[nlocal]; m++) {
     crossterm_type[nlocal][m] = (int) ubuf(buf[n++]).i;
     crossterm_atom1[nlocal][m] = (tagint) ubuf(buf[n++]).i;
     crossterm_atom2[nlocal][m] = (tagint) ubuf(buf[n++]).i;
     crossterm_atom3[nlocal][m] = (tagint) ubuf(buf[n++]).i;
     crossterm_atom4[nlocal][m] = (tagint) ubuf(buf[n++]).i;
     crossterm_atom5[nlocal][m] = (tagint) ubuf(buf[n++]).i;
   }
   return n;
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 double FixCMAP::memory_usage()
 {
   int nmax = atom->nmax;
   double bytes = nmax * sizeof(int);        // num_crossterm
   bytes += nmax*CMAPMAX * sizeof(int);      // crossterm_type
   bytes += 5*nmax*CMAPMAX * sizeof(int);    // crossterm_atom 12345
   bytes += maxcrossterm*6 * sizeof(int);    // crosstermlist
   return bytes;
 }
diff --git a/src/USER-CG-CMM/angle_sdk.cpp b/src/USER-CG-CMM/angle_sdk.cpp
index cc5498599..a4f979961 100644
--- a/src/USER-CG-CMM/angle_sdk.cpp
+++ b/src/USER-CG-CMM/angle_sdk.cpp
@@ -1,504 +1,505 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+
    Variant of the harmonic angle potential for use with the
    lj/sdk potential for coarse grained MD simulations.
-   Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include <math.h>
 #include <stdlib.h>
 #include "angle_sdk.h"
 #include "atom.h"
 #include "neighbor.h"
 #include "pair.h"
 #include "domain.h"
 #include "comm.h"
 #include "force.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 #include "lj_sdk_common.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace LJSDKParms;
 
 #define SMALL 0.001
 
 /* ---------------------------------------------------------------------- */
 
 AngleSDK::AngleSDK(LAMMPS *lmp) : Angle(lmp) { repflag = 0;}
 
 /* ---------------------------------------------------------------------- */
 
 AngleSDK::~AngleSDK()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(k);
     memory->destroy(theta0);
     memory->destroy(repscale);
 
     allocated = 0;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AngleSDK::compute(int eflag, int vflag)
 {
   int i1,i2,i3,n,type;
   double delx1,dely1,delz1,delx2,dely2,delz2,delx3,dely3,delz3;
   double eangle,f1[3],f3[3],e13,f13;
   double dtheta,tk;
   double rsq1,rsq2,rsq3,r1,r2,c,s,a,a11,a12,a22;
 
   eangle = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   int **anglelist = neighbor->anglelist;
   int nanglelist = neighbor->nanglelist;
   int nlocal = atom->nlocal;
   int newton_bond = force->newton_bond;
 
   for (n = 0; n < nanglelist; n++) {
     i1 = anglelist[n][0];
     i2 = anglelist[n][1];
     i3 = anglelist[n][2];
     type = anglelist[n][3];
 
     // 1st bond
 
     delx1 = x[i1][0] - x[i2][0];
     dely1 = x[i1][1] - x[i2][1];
     delz1 = x[i1][2] - x[i2][2];
 
     rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
     r1 = sqrt(rsq1);
 
     // 2nd bond
 
     delx2 = x[i3][0] - x[i2][0];
     dely2 = x[i3][1] - x[i2][1];
     delz2 = x[i3][2] - x[i2][2];
 
     rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
     r2 = sqrt(rsq2);
 
     // angle (cos and sin)
 
     c = delx1*delx2 + dely1*dely2 + delz1*delz2;
     c /= r1*r2;
 
     if (c > 1.0) c = 1.0;
     if (c < -1.0) c = -1.0;
 
     s = sqrt(1.0 - c*c);
     if (s < SMALL) s = SMALL;
     s = 1.0/s;
 
     // 1-3 LJ interaction.
     // we only want to use the repulsive part,
     // and it can be scaled (or off).
     // so this has to be done here and not in the
     // general non-bonded code.
 
     f13 = e13 = delx3 = dely3 = delz3 = 0.0;
 
     if (repflag) {
 
       delx3 = x[i1][0] - x[i3][0];
       dely3 = x[i1][1] - x[i3][1];
       delz3 = x[i1][2] - x[i3][2];
       rsq3 = delx3*delx3 + dely3*dely3 + delz3*delz3;
 
       const int type1 = atom->type[i1];
       const int type3 = atom->type[i3];
 
       f13=0.0;
       e13=0.0;
 
       if (rsq3 < rminsq[type1][type3]) {
         const int ljt = lj_type[type1][type3];
         const double r2inv = 1.0/rsq3;
 
         if (ljt == LJ12_4) {
           const double r4inv=r2inv*r2inv;
 
           f13 = r4inv*(lj1[type1][type3]*r4inv*r4inv - lj2[type1][type3]);
           if (eflag) e13 = r4inv*(lj3[type1][type3]*r4inv*r4inv - lj4[type1][type3]);
 
         } else if (ljt == LJ9_6) {
           const double r3inv = r2inv*sqrt(r2inv);
           const double r6inv = r3inv*r3inv;
 
           f13 = r6inv*(lj1[type1][type3]*r3inv - lj2[type1][type3]);
           if (eflag) e13 = r6inv*(lj3[type1][type3]*r3inv - lj4[type1][type3]);
 
         } else if (ljt == LJ12_6) {
           const double r6inv = r2inv*r2inv*r2inv;
 
           f13 = r6inv*(lj1[type1][type3]*r6inv - lj2[type1][type3]);
           if (eflag) e13 = r6inv*(lj3[type1][type3]*r6inv - lj4[type1][type3]);
         }
 
         // make sure energy is 0.0 at the cutoff.
         if (eflag) e13 -= emin[type1][type3];
 
         f13 *= r2inv;
       }
     }
 
     // force & energy
 
     dtheta = acos(c) - theta0[type];
     tk = k[type] * dtheta;
 
     if (eflag) eangle = tk*dtheta;
 
     a = -2.0 * tk * s;
     a11 = a*c / rsq1;
     a12 = -a / (r1*r2);
     a22 = a*c / rsq2;
 
     f1[0] = a11*delx1 + a12*delx2;
     f1[1] = a11*dely1 + a12*dely2;
     f1[2] = a11*delz1 + a12*delz2;
     f3[0] = a22*delx2 + a12*delx1;
     f3[1] = a22*dely2 + a12*dely1;
     f3[2] = a22*delz2 + a12*delz1;
 
     // apply force to each of the 3 atoms
 
     if (newton_bond || i1 < nlocal) {
       f[i1][0] += f1[0] + f13*delx3;
       f[i1][1] += f1[1] + f13*dely3;
       f[i1][2] += f1[2] + f13*delz3;
     }
 
     if (newton_bond || i2 < nlocal) {
       f[i2][0] -= f1[0] + f3[0];
       f[i2][1] -= f1[1] + f3[1];
       f[i2][2] -= f1[2] + f3[2];
     }
 
     if (newton_bond || i3 < nlocal) {
       f[i3][0] += f3[0] - f13*delx3;
       f[i3][1] += f3[1] - f13*dely3;
       f[i3][2] += f3[2] - f13*delz3;
     }
 
     if (evflag) {
       ev_tally(i1,i2,i3,nlocal,newton_bond,eangle,f1,f3,
                          delx1,dely1,delz1,delx2,dely2,delz2);
       if (repflag)
         ev_tally13(i1,i3,nlocal,newton_bond,e13,f13,delx3,dely3,delz3);
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AngleSDK::allocate()
 {
   allocated = 1;
   int n = atom->nangletypes;
 
   memory->create(k,n+1,"angle:k");
   memory->create(theta0,n+1,"angle:theta0");
   memory->create(repscale,n+1,"angle:repscale");
 
   memory->create(setflag,n+1,"angle:setflag");
   for (int i = 1; i <= n; i++) setflag[i] = 0;
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more types
 ------------------------------------------------------------------------- */
 
 void AngleSDK::coeff(int narg, char **arg)
 {
   if ((narg < 3) || (narg > 6))
     error->all(FLERR,"Incorrect args for angle coefficients");
 
   if (!allocated) allocate();
 
   int ilo,ihi;
   force->bounds(FLERR,arg[0],atom->nangletypes,ilo,ihi);
 
   double k_one = force->numeric(FLERR,arg[1]);
   double theta0_one = force->numeric(FLERR,arg[2]);
   double repscale_one=1.0;
 
   // backward compatibility with old cg/cmm style input:
   // this had <lj_type> <epsilon> <sigma>
   // if epsilon is set to 0.0 we accept it as repscale 0.0
   // otherwise assume repscale 1.0, since we were using
   // epsilon to turn repulsion on or off.
   if (narg == 6) {
     repscale_one = force->numeric(FLERR,arg[4]);
     if (repscale_one > 0.0) repscale_one = 1.0;
   } else if (narg == 4) repscale_one = force->numeric(FLERR,arg[3]);
   else if (narg == 3) repscale_one = 1.0;
   else error->all(FLERR,"Incorrect args for angle coefficients");
 
   // convert theta0 from degrees to radians and store coefficients
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     k[i] = k_one;
     theta0[i] = theta0_one/180.0 * MY_PI;
     repscale[i] = repscale_one;
     setflag[i] = 1;
     count++;
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for angle coefficients");
 }
 
 /* ----------------------------------------------------------------------
    error check and initialize all values needed for force computation
 ------------------------------------------------------------------------- */
 
 void AngleSDK::init_style()
 {
 
   // make sure we use an SDK pair_style and that we need the 1-3 repulsion
 
   repflag = 0;
   for (int i = 1; i <= atom->nangletypes; i++)
     if (repscale[i] > 0.0) repflag = 1;
 
   // set up pointers to access SDK LJ parameters for 1-3 interactions
 
   if (repflag) {
     int itmp;
     if (force->pair == NULL)
       error->all(FLERR,"Angle style SDK requires use of a compatible with Pair style");
 
     lj1 = (double **) force->pair->extract("lj1",itmp);
     lj2 = (double **) force->pair->extract("lj2",itmp);
     lj3 = (double **) force->pair->extract("lj3",itmp);
     lj4 = (double **) force->pair->extract("lj4",itmp);
     lj_type = (int **) force->pair->extract("lj_type",itmp);
     rminsq = (double **) force->pair->extract("rminsq",itmp);
     emin = (double **) force->pair->extract("emin",itmp);
 
     if (!lj1 || !lj2 || !lj3 || !lj4 || !lj_type || !rminsq || !emin)
       error->all(FLERR,"Angle style SDK is incompatible with Pair style");
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double AngleSDK::equilibrium_angle(int i)
 {
   return theta0[i];
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes out coeffs to restart file
 ------------------------------------------------------------------------- */
 
 void AngleSDK::write_restart(FILE *fp)
 {
   fwrite(&k[1],sizeof(double),atom->nangletypes,fp);
   fwrite(&theta0[1],sizeof(double),atom->nangletypes,fp);
   fwrite(&repscale[1],sizeof(double),atom->nangletypes,fp);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads coeffs from restart file, bcasts them
 ------------------------------------------------------------------------- */
 
 void AngleSDK::read_restart(FILE *fp)
 {
   allocate();
 
   if (comm->me == 0) {
     fread(&k[1],sizeof(double),atom->nangletypes,fp);
     fread(&theta0[1],sizeof(double),atom->nangletypes,fp);
     fread(&repscale[1],sizeof(double),atom->nangletypes,fp);
   }
   MPI_Bcast(&k[1],atom->nangletypes,MPI_DOUBLE,0,world);
   MPI_Bcast(&theta0[1],atom->nangletypes,MPI_DOUBLE,0,world);
   MPI_Bcast(&repscale[1],atom->nangletypes,MPI_DOUBLE,0,world);
 
   for (int i = 1; i <= atom->nangletypes; i++) setflag[i] = 1;
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to data file
 ------------------------------------------------------------------------- */
 
 void AngleSDK::write_data(FILE *fp)
 {
   for (int i = 1; i <= atom->nangletypes; i++)
     fprintf(fp,"%d %g %g\n",i,k[i],theta0[i]/MY_PI*180.0);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AngleSDK::ev_tally13(int i, int j, int nlocal, int newton_bond,
                           double evdwl, double fpair,
                           double delx, double dely, double delz)
 {
   double v[6];
 
   if (eflag_either) {
     if (eflag_global) {
       if (newton_bond) {
         energy += evdwl;
       } else {
         if (i < nlocal)
           energy += 0.5*evdwl;
         if (j < nlocal)
           energy += 0.5*evdwl;
       }
     }
     if (eflag_atom) {
       if (newton_bond || i < nlocal) eatom[i] += 0.5*evdwl;
       if (newton_bond || j < nlocal) eatom[j] += 0.5*evdwl;
     }
   }
 
   if (vflag_either) {
     v[0] = delx*delx*fpair;
     v[1] = dely*dely*fpair;
     v[2] = delz*delz*fpair;
     v[3] = delx*dely*fpair;
     v[4] = delx*delz*fpair;
     v[5] = dely*delz*fpair;
 
     if (vflag_global) {
       if (newton_bond) {
         virial[0] += v[0];
         virial[1] += v[1];
         virial[2] += v[2];
         virial[3] += v[3];
         virial[4] += v[4];
         virial[5] += v[5];
       } else {
         if (i < nlocal) {
           virial[0] += 0.5*v[0];
           virial[1] += 0.5*v[1];
           virial[2] += 0.5*v[2];
           virial[3] += 0.5*v[3];
           virial[4] += 0.5*v[4];
           virial[5] += 0.5*v[5];
         }
         if (j < nlocal) {
           virial[0] += 0.5*v[0];
           virial[1] += 0.5*v[1];
           virial[2] += 0.5*v[2];
           virial[3] += 0.5*v[3];
           virial[4] += 0.5*v[4];
           virial[5] += 0.5*v[5];
         }
       }
     }
 
     if (vflag_atom) {
       if (newton_bond || i < nlocal) {
         vatom[i][0] += 0.5*v[0];
         vatom[i][1] += 0.5*v[1];
         vatom[i][2] += 0.5*v[2];
         vatom[i][3] += 0.5*v[3];
         vatom[i][4] += 0.5*v[4];
         vatom[i][5] += 0.5*v[5];
       }
       if (newton_bond || j < nlocal) {
         vatom[j][0] += 0.5*v[0];
         vatom[j][1] += 0.5*v[1];
         vatom[j][2] += 0.5*v[2];
         vatom[j][3] += 0.5*v[3];
         vatom[j][4] += 0.5*v[4];
         vatom[j][5] += 0.5*v[5];
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double AngleSDK::single(int type, int i1, int i2, int i3)
 {
   double **x = atom->x;
 
   double delx1 = x[i1][0] - x[i2][0];
   double dely1 = x[i1][1] - x[i2][1];
   double delz1 = x[i1][2] - x[i2][2];
   domain->minimum_image(delx1,dely1,delz1);
   double r1 = sqrt(delx1*delx1 + dely1*dely1 + delz1*delz1);
 
   double delx2 = x[i3][0] - x[i2][0];
   double dely2 = x[i3][1] - x[i2][1];
   double delz2 = x[i3][2] - x[i2][2];
   domain->minimum_image(delx2,dely2,delz2);
   double r2 = sqrt(delx2*delx2 + dely2*dely2 + delz2*delz2);
 
   double c = delx1*delx2 + dely1*dely2 + delz1*delz2;
   c /= r1*r2;
   if (c > 1.0) c = 1.0;
   if (c < -1.0) c = -1.0;
 
   double e13=0.0;
   if (repflag) {
 
     // 1-3 LJ interaction.
     double delx3 = x[i1][0] - x[i3][0];
     double dely3 = x[i1][1] - x[i3][1];
     double delz3 = x[i1][2] - x[i3][2];
     domain->minimum_image(delx3,dely3,delz3);
 
     const int type1 = atom->type[i1];
     const int type3 = atom->type[i3];
 
     const double rsq3 = delx3*delx3 + dely3*dely3 + delz3*delz3;
 
     if (rsq3 < rminsq[type1][type3]) {
       const int ljt = lj_type[type1][type3];
       const double r2inv = 1.0/rsq3;
 
       if (ljt == LJ12_4) {
         const double r4inv=r2inv*r2inv;
 
         e13 = r4inv*(lj3[type1][type3]*r4inv*r4inv - lj4[type1][type3]);
 
       } else if (ljt == LJ9_6) {
         const double r3inv = r2inv*sqrt(r2inv);
         const double r6inv = r3inv*r3inv;
 
         e13 = r6inv*(lj3[type1][type3]*r3inv - lj4[type1][type3]);
 
       } else if (ljt == LJ12_6) {
         const double r6inv = r2inv*r2inv*r2inv;
 
         e13 = r6inv*(lj3[type1][type3]*r6inv - lj4[type1][type3]);
       }
 
       // make sure energy is 0.0 at the cutoff.
       e13 -= emin[type1][type3];
     }
   }
 
   double dtheta = acos(c) - theta0[type];
   double tk = k[type] * dtheta;
   return tk*dtheta + e13;
 }
diff --git a/src/USER-COLVARS/ndx_group.cpp b/src/USER-COLVARS/ndx_group.cpp
index 10ccf000b..31d8332c9 100644
--- a/src/USER-COLVARS/ndx_group.cpp
+++ b/src/USER-COLVARS/ndx_group.cpp
@@ -1,248 +1,249 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
+
 /* ----------------------------------------------------------------------
-   Contributing author:  Axel Kohlmeyer (Temple U)
+   Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "ndx_group.h"
 #include "atom.h"
 #include "comm.h"
 #include "group.h"
 #include "memory.h"
 #include "force.h"
 #include "error.h"
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 using namespace LAMMPS_NS;
 #define BUFLEN 4096
 #define DELTA 16384
 
 static char *find_section(FILE *fp, const char *name)
 {
   char linebuf[BUFLEN];
   char *n,*p,*t,*r;
   
   while ((p = fgets(linebuf,BUFLEN,fp))) {
     t = strtok(p," \t\n\r\f");
     if ((t != NULL) && *t == '[') {
       t = strtok(NULL," \t\n\r\f");
       if (t != NULL) {
         n = t;
         t = strtok(NULL," \t\n\r\f");
         if ((t != NULL) && *t == ']') {
           if ((name == NULL) || strcmp(name,n) == 0) {
             int l = strlen(n);
             r = new char[l+1];
             strncpy(r,n,l+1);
             return r;
           }
         }
       }
     }
   }
   return NULL;
 }
 
 static tagint *read_section(FILE *fp, bigint &num)
 {
   char linebuf[BUFLEN];
   char *p,*t;
   tagint *tagbuf;
   bigint nmax;
 
   num = 0;
   nmax = DELTA;
   tagbuf = (tagint *)malloc(sizeof(tagint)*nmax);
 
   while ((p = fgets(linebuf,BUFLEN,fp))) {
     t = strtok(p," \t\n\r\f");
     while (t != NULL) {
       // start of a new section. we are done here.
       if (*t == '[') return tagbuf;
 
       tagbuf[num++] = ATOTAGINT(t);
       if (num == nmax) {
         nmax += DELTA;
         tagbuf = (tagint *)realloc(tagbuf,sizeof(tagint)*nmax);
       }
       t = strtok(NULL," \t\n\r\f");
     }
   }
   return tagbuf;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void Ndx2Group::command(int narg, char **arg)
 {
   int len;
   bigint num;
   FILE *fp;
   char *name = NULL;
   tagint *tags;
 
   if (narg < 1) error->all(FLERR,"Illegal ndx2group command");
 
   if (atom->tag_enable == 0)
       error->all(FLERR,"Must have atom IDs for ndx2group command");
 
   if (comm->me == 0) {
     fp = fopen(arg[0], "r");
     if (fp == NULL)
       error->one(FLERR,"Cannot open index file for reading");
 
     if (screen)
       fprintf(screen, "Reading groups from index file %s:\n",arg[0]);
     if (logfile)
       fprintf(logfile,"Reading groups from index file %s:\n",arg[0]);
   }
 
   if (narg == 1) {    // restore all groups
 
     do {
       if (comm->me == 0) {
         len = 0;
 
         // find the next section.
         // if we had processed a section, before we need to step back
         if (name != NULL) {
           rewind(fp);
           char *tmp = find_section(fp,name);
           delete[] tmp;
           delete[] name;
           name = NULL;
         }
         name = find_section(fp,NULL);
         if (name != NULL) {
           len=strlen(name);
 
           // skip over group "all", which is called "System" in gromacs
           if (strcmp(name,"System") == 0) continue;
 
           if (screen)
             fprintf(screen," Processing group '%s'\n",name);
           if (logfile)
             fprintf(logfile," Processing group '%s'\n",name);
         }
         MPI_Bcast(&len,1,MPI_INT,0,world);
         if (len > 0) {
           MPI_Bcast(name,len,MPI_CHAR,0,world);
 
           // read tags for atoms in group and broadcast
           num = 0;
           tags = read_section(fp,num);
           MPI_Bcast(&num,1,MPI_LMP_BIGINT,0,world);
           MPI_Bcast(tags,num,MPI_LMP_TAGINT,0,world);
           create(name,num,tags);
           free(tags);
         }
       } else {
         MPI_Bcast(&len,1,MPI_INT,0,world);
         if (len > 0) {
           delete[] name;
           name = new char[len+1];
           MPI_Bcast(name,len+1,MPI_CHAR,0,world);
 
           MPI_Bcast(&num,1,MPI_LMP_BIGINT,0,world);
           tags = (tagint *)malloc(sizeof(tagint)*(num ? num : 1));
           MPI_Bcast(tags,num,MPI_LMP_TAGINT,0,world);
           create(name,num,tags);
           free(tags);
         }
       }
     } while (len);
 
   } else {            // restore selected groups
     for (int idx=1; idx < narg; ++idx) {
 
       if (comm->me == 0) {
         len = 0;
 
         // find named section, search from beginning of file
         if (name != NULL) delete[] name;
         rewind(fp);
         name = find_section(fp,arg[idx]);
         if (name != NULL) len=strlen(name);
 
         if (screen)
           fprintf(screen," %s group '%s'\n",
                   len ? "Processing" : "Skipping",arg[idx]);
         if (logfile)
           fprintf(logfile,"%s group '%s'\n",
                   len ? "Processing" : "Skipping",arg[idx]);
 
         MPI_Bcast(&len,1,MPI_INT,0,world);
         if (len > 0) {
           MPI_Bcast(name,len+1,MPI_CHAR,0,world);
           // read tags for atoms in group and broadcast
           num = 0;
           tags = read_section(fp,num);
           MPI_Bcast(&num,1,MPI_LMP_BIGINT,0,world);
           MPI_Bcast(tags,num,MPI_LMP_TAGINT,0,world);
           create(name,num,tags);
           free(tags);
         }
       } else {
 
         MPI_Bcast(&len,1,MPI_INT,0,world);
         if (len > 0) {
           delete[] name;
           name = new char[len+1];
           MPI_Bcast(name,len+1,MPI_CHAR,0,world);
 
           MPI_Bcast(&num,1,MPI_LMP_BIGINT,0,world);
           tags = (tagint *)malloc(sizeof(tagint)*(num ? num : 1));
           MPI_Bcast(tags,num,MPI_LMP_TAGINT,0,world);
           create(name,num,tags);
           free(tags);
         }
       }
     }
   }
 
   delete[] name;
   if (comm->me == 0) {
     if (screen) fputs("\n",screen);
     if (logfile) fputs("\n",logfile);
     fclose(fp);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void Ndx2Group::create(char *name, bigint num, tagint *tags)
 {
   // wipe out all members if the group exists. gid==0 is group "all"
   int gid = group->find(name);
   if (gid > 0) {
     char *cmd[2];
     cmd[0] = name;
     cmd[1] = (char *)"clear";
     group->assign(2,cmd);
   }
 
   // map from global to local
   const int nlocal = atom->nlocal;
   int *flags = (int *)calloc(nlocal,sizeof(int));
   for (bigint i=0; i < num; ++i) {
     const int id = atom->map(tags[i]);
     if (id < nlocal && id >= 0)
       flags[id] = 1;
   }
   group->create(name,flags);
   free(flags);
 }
 
diff --git a/src/USER-EFF/fix_nh_eff.cpp b/src/USER-EFF/fix_nh_eff.cpp
index bbf5ac31f..464498969 100644
--- a/src/USER-EFF/fix_nh_eff.cpp
+++ b/src/USER-EFF/fix_nh_eff.cpp
@@ -1,113 +1,113 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
- Contributing author: Andres Jaramillo-Botero (Caltech)
+   Contributing author: Andres Jaramillo-Botero (Caltech)
 ------------------------------------------------------------------------- */
 
 #include <math.h>
 #include <stdlib.h>
 #include "fix_nh_eff.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "group.h"
 #include "error.h"
 #include "domain.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
 enum{NOBIAS,BIAS};
 
 /* ---------------------------------------------------------------------- */
 
 FixNHEff::FixNHEff(LAMMPS *lmp, int narg, char **arg) : FixNH(lmp, narg, arg)
 {
   if (!atom->electron_flag)
     error->all(FLERR,"Fix nvt/nph/npt/eff requires atom style electron");
 }
 
 /* ----------------------------------------------------------------------
    perform half-step update of electron radial velocities
 -----------------------------------------------------------------------*/
 
 void FixNHEff::nve_v()
 {
   // standard nve_v velocity update
 
   FixNH::nve_v();
 
   double *erforce = atom->erforce;
   double *ervel = atom->ervel;
   double *mass = atom->mass;
   int *spin = atom->spin;
   double mefactor = domain->dimension/4.0;
   int *type = atom->type;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
   if (igroup == atom->firstgroup) nlocal = atom->nfirst;
 
   double dtfm;
 
   for (int i = 0; i < nlocal; i++) {
     if (mask[i] & groupbit) {
       if (abs(spin[i])==1) {
         dtfm = dtf / mass[type[i]];
         ervel[i] = dtfm * erforce[i] / mefactor;
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    perform full-step update of electron radii
 -----------------------------------------------------------------------*/
 
 void FixNHEff::nve_x()
 {
   // standard nve_x position update
 
   FixNH::nve_x();
 
   double *eradius = atom->eradius;
   double *ervel = atom->ervel;
   int *spin = atom->spin;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
   if (igroup == atom->firstgroup) nlocal = atom->nfirst;
 
   for (int i = 0; i < nlocal; i++)
     if (mask[i] & groupbit)
       if (abs(spin[i])==1) eradius[i] += dtv * ervel[i];
 }
 
 /* ----------------------------------------------------------------------
    perform half-step scaling of electron radial velocities
 -----------------------------------------------------------------------*/
 
 void FixNHEff::nh_v_temp()
 {
   // standard nh_v_temp velocity scaling
 
   FixNH::nh_v_temp();
 
   double *ervel = atom->ervel;
   int *spin = atom->spin;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
   if (igroup == atom->firstgroup) nlocal = atom->nfirst;
 
   for (int i = 0; i < nlocal; i++)
     if (mask[i] & groupbit)
       if (abs(spin[i])==1) ervel[i] *= factor_eta;
 }
diff --git a/src/USER-H5MD/dump_h5md.cpp b/src/USER-H5MD/dump_h5md.cpp
index ba31c22d2..7456d6fa4 100644
--- a/src/USER-H5MD/dump_h5md.cpp
+++ b/src/USER-H5MD/dump_h5md.cpp
@@ -1,555 +1,558 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
    Contributing author: Pierre de Buyl (KU Leuven)
 ------------------------------------------------------------------------- */
 
 #include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <limits.h>
 #include "ch5md.h"
 #include "dump_h5md.h"
 #include "domain.h"
 #include "atom.h"
 #include "update.h"
 #include "group.h"
 #include "output.h"
 #include "error.h"
 #include "force.h"
 #include "memory.h"
 #include "version.h"
 
 using namespace LAMMPS_NS;
 
 #define MYMIN(a,b) ((a) < (b) ? (a) : (b))
 #define MYMAX(a,b) ((a) > (b) ? (a) : (b))
 
 /** Scan common options for the dump elements
  */
 static int element_args(int narg, char **arg, int *every)
 {
   int iarg=0;
   while (iarg<narg) {
     if (strcmp(arg[iarg], "every")==0) {
       if (narg<2) return -1;
       *every = atoi(arg[iarg+1]);
       iarg+=2;
     } else {
       break;
     }
   }
   return iarg;
 }
 
 /* ---------------------------------------------------------------------- */
 
 DumpH5MD::DumpH5MD(LAMMPS *lmp, int narg, char **arg) : Dump(lmp, narg, arg)
 {
   if (narg<6) error->all(FLERR,"Illegal dump h5md command");
   if (binary || compressed || multifile || multiproc)
     error->all(FLERR,"Invalid dump h5md filename");
 
   if (domain->triclinic!=0)
     error->all(FLERR,"Invalid domain for dump h5md. Only triclinic domains supported.");
 
   size_one = 6;
   sort_flag = 1;
   sortcol = 0;
   format_default = NULL;
   flush_flag = 0;
   unwrap_flag = 0;
   datafile_from_dump = -1;
   author_name=NULL;
 
   every_dump = force->inumeric(FLERR,arg[3]);
   every_position = every_image = -1;
   every_velocity = every_force = every_species = -1;
   every_charge = -1;
 
   do_box=true;
   create_group=true;
 
   bool box_is_set, create_group_is_set;
   box_is_set = create_group_is_set = false;
   int iarg=5;
   int n_parsed, default_every;
   size_one=0;
   if (every_dump==0) default_every=0; else default_every=1;
 
   while (iarg<narg) {
     if (strcmp(arg[iarg], "position")==0) {
       every_position=default_every;
       iarg+=1;
       n_parsed = element_args(narg-iarg, &arg[iarg], &every_position);
       if (n_parsed<0) error->all(FLERR, "Illegal dump h5md command");
       iarg += n_parsed;
       size_one+=domain->dimension;
     } else if (strcmp(arg[iarg], "image")==0) {
       if (every_position<0) error->all(FLERR, "Illegal dump h5md command");
       iarg+=1;
       size_one+=domain->dimension;
       every_image = every_position;
     } else if (strcmp(arg[iarg], "velocity")==0) {
       every_velocity = default_every;
       iarg+=1;
       n_parsed = element_args(narg-iarg, &arg[iarg], &every_velocity);
       if (n_parsed<0) error->all(FLERR, "Illegal dump h5md command");
       iarg += n_parsed;
       size_one+=domain->dimension;
     } else if (strcmp(arg[iarg], "force")==0) {
       every_force = default_every;
       iarg+=1;
       n_parsed = element_args(narg-iarg, &arg[iarg], &every_force);
       if (n_parsed<0) error->all(FLERR, "Illegal dump h5md command");
       iarg += n_parsed;
       size_one+=domain->dimension;
     } else if (strcmp(arg[iarg], "species")==0) {
       every_species=default_every;
       iarg+=1;
       n_parsed = element_args(narg-iarg, &arg[iarg], &every_species);
       if (n_parsed<0) error->all(FLERR, "Illegal dump h5md command");
       iarg += n_parsed;
       size_one+=1;
     } else if (strcmp(arg[iarg], "charge")==0) {
       if (!atom->q_flag)
 	error->all(FLERR, "Requesting non-allocated quantity q in dump_h5md");
       every_charge = default_every;
       iarg+=1;
       n_parsed = element_args(narg-iarg, &arg[iarg], &every_charge);
       if (n_parsed<0) error->all(FLERR, "Illegal dump h5md command");
       iarg += n_parsed;
       size_one+=1;
     } else if (strcmp(arg[iarg], "file_from")==0) {
       if (iarg+1>=narg) {
         error->all(FLERR, "Invalid number of arguments in dump h5md");
       }
       if (box_is_set||create_group_is_set)
 	error->all(FLERR, "Cannot set file_from in dump h5md after box or create_group");
       int idump;
       for (idump = 0; idump < output->ndump; idump++)
 	if (strcmp(arg[iarg+1],output->dump[idump]->id) == 0) break;
       if (idump == output->ndump) error->all(FLERR,"Cound not find dump_modify ID");
       datafile_from_dump = idump;
       do_box=false;
       create_group=false;
       iarg+=2;
     } else if (strcmp(arg[iarg], "box")==0) {
       if (iarg+1>=narg) {
         error->all(FLERR, "Invalid number of arguments in dump h5md");
       }
       box_is_set = true;
       if (strcmp(arg[iarg+1], "yes")==0)
 	do_box=true;
       else if (strcmp(arg[iarg+1], "no")==0)
 	do_box=false;
       else
 	error->all(FLERR, "Illegal dump h5md command");
       iarg+=2;
     } else  if (strcmp(arg[iarg], "create_group")==0) {
       if (iarg+1>=narg) {
         error->all(FLERR, "Invalid number of arguments in dump h5md");
       }
       create_group_is_set = true;
       if (strcmp(arg[iarg+1], "yes")==0)
 	create_group=true;
       else if (strcmp(arg[iarg+1], "no")==0) {
 	create_group=false;
       }
       else
 	error->all(FLERR, "Illegal dump h5md command");
       iarg+=2;
     } else if (strcmp(arg[iarg], "author")==0) {
       if (iarg+1>=narg) {
         error->all(FLERR, "Invalid number of arguments in dump h5md");
       }
       if (author_name==NULL) {
 	author_name = new char[strlen(arg[iarg])+1];
 	strcpy(author_name, arg[iarg+1]);
       } else {
 	error->all(FLERR, "Illegal dump h5md command: author argument repeated");
       }
       iarg+=2;
     } else {
       error->all(FLERR, "Invalid argument to dump h5md");
     }
   }
 
   // allocate global array for atom coords
 
   bigint n = group->count(igroup);
   natoms = static_cast<int> (n);
 
   if (every_position>=0)
     memory->create(dump_position,domain->dimension*natoms,"dump:position");
   if (every_image>=0)
     memory->create(dump_image,domain->dimension*natoms,"dump:image");
   if (every_velocity>=0)
     memory->create(dump_velocity,domain->dimension*natoms,"dump:velocity");
   if (every_force>=0)
     memory->create(dump_force,domain->dimension*natoms,"dump:force");
   if (every_species>=0)
     memory->create(dump_species,natoms,"dump:species");
   if (every_charge>=0)
     memory->create(dump_charge,natoms,"dump:charge");
 
   openfile();
   ntotal = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 DumpH5MD::~DumpH5MD()
 {
   if (every_position>=0) {
     memory->destroy(dump_position);
     if (me==0) {
       h5md_close_element(particles_data.position);
       if (do_box)
 	h5md_close_element(particles_data.box_edges);
     }
   }
   if (every_image>=0) {
     memory->destroy(dump_image);
     if (me==0) h5md_close_element(particles_data.image);
   }
   if (every_velocity>=0) {
     memory->destroy(dump_velocity);
     if (me==0) h5md_close_element(particles_data.velocity);
   }
   if (every_force>=0) {
     memory->destroy(dump_force);
     if (me==0) h5md_close_element(particles_data.force);
   }
   if (every_species>=0) {
     memory->destroy(dump_species);
     if (me==0) h5md_close_element(particles_data.species);
   }
   if (every_charge>=0) {
     memory->destroy(dump_charge);
     if (me==0) h5md_close_element(particles_data.charge);
   }
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void DumpH5MD::init_style()
 {
   if (sort_flag == 0 || sortcol != 0)
     error->all(FLERR,"Dump h5md requires sorting by atom ID");
 }
 
 /* ---------------------------------------------------------------------- */
 
 void DumpH5MD::openfile()
 {
   char *group_name;
   int group_name_length;
   int dims[2];
   char *boundary[3];
   for (int i=0; i<3; i++) {
     boundary[i] = new char[9];
     if (domain->periodicity[i]==1) {
       strcpy(boundary[i], "periodic");
     } else {
       strcpy(boundary[i], "none");
     }
   }
 
   if (me == 0) {
     if (datafile_from_dump<0) {
       if (author_name==NULL) {
 	datafile = h5md_create_file(filename, "N/A", NULL, "lammps", LAMMPS_VERSION);
       } else {
 	datafile = h5md_create_file(filename, author_name, NULL, "lammps", LAMMPS_VERSION);
       }
       group_name_length = strlen(group->names[igroup])+1;
       group_name = new char[group_name_length];
       strcpy(group_name, group->names[igroup]);
       if (create_group) {
 	particles_data = h5md_create_particles_group(datafile, group_name);
       } else {
 	particles_data.group = h5md_open_particles_group(datafile.particles, group_name);
       }
       delete [] group_name;
       dims[0] = natoms;
       dims[1] = domain->dimension;
       if (every_position>0) {
 	particles_data.position = h5md_create_time_data(particles_data.group, "position", 2, dims, H5T_NATIVE_DOUBLE, NULL);
 	h5md_create_box(&particles_data, dims[1], boundary, true, NULL, &particles_data.position);
       }
       if (every_image>0)
 	particles_data.image = h5md_create_time_data(particles_data.group, "image", 2, dims, H5T_NATIVE_INT, &particles_data.position);
       if (every_velocity>0)
 	particles_data.velocity = h5md_create_time_data(particles_data.group, "velocity", 2, dims, H5T_NATIVE_DOUBLE, NULL);
       if (every_force>0)
 	particles_data.force = h5md_create_time_data(particles_data.group, "force", 2, dims, H5T_NATIVE_DOUBLE, NULL);
       if (every_species>0)
 	particles_data.species = h5md_create_time_data(particles_data.group, "species", 1, dims, H5T_NATIVE_INT, NULL);
       if (every_charge>0) {
 	particles_data.charge = h5md_create_time_data(particles_data.group, "charge", 1, dims, H5T_NATIVE_DOUBLE, NULL);
 	h5md_write_string_attribute(particles_data.group, "charge", "type", "effective");
       }
     } else {
       DumpH5MD* other_dump;
       other_dump=(DumpH5MD*)output->dump[datafile_from_dump];
       datafile = other_dump->datafile;
       group_name_length = strlen(group->names[igroup]);
       group_name = new char[group_name_length];
       strcpy(group_name, group->names[igroup]);
       if (create_group) {
 	particles_data = h5md_create_particles_group(datafile, group_name);
       } else {
 	particles_data = other_dump->particles_data;
       }
       dims[0] = natoms;
       dims[1] = domain->dimension;
       if (every_position>0) {
 	particles_data.position = h5md_create_time_data(particles_data.group, "position", 2, dims, H5T_NATIVE_DOUBLE, NULL);
 	h5md_create_box(&particles_data, dims[1], boundary, true, NULL, &particles_data.position);
       }
       if (every_image>0)
 	particles_data.image = h5md_create_time_data(particles_data.group, "image", 2, dims, H5T_NATIVE_INT, &particles_data.position);
       if (every_velocity>0)
 	particles_data.velocity = h5md_create_time_data(particles_data.group, "velocity", 2, dims, H5T_NATIVE_DOUBLE, NULL);
       if (every_force>0)
 	particles_data.force = h5md_create_time_data(particles_data.group, "force", 2, dims, H5T_NATIVE_DOUBLE, NULL);
       if (every_species>0)
 	particles_data.species = h5md_create_time_data(particles_data.group, "species", 1, dims, H5T_NATIVE_INT, NULL);
       if (every_charge>0) {
 	particles_data.charge = h5md_create_time_data(particles_data.group, "charge", 1, dims, H5T_NATIVE_DOUBLE, NULL);
 	h5md_write_string_attribute(particles_data.group, "charge", "type", "effective");
       }
 
     }
   }
 
   if (author_name!=NULL) delete [] author_name;
   for (int i=0; i<3; i++) {
     delete [] boundary[i];
   }
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void DumpH5MD::write_header(bigint nbig)
 {
   return;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void DumpH5MD::pack(tagint *ids)
 {
   int m,n;
 
   tagint *tag = atom->tag;
   double **x = atom->x;
   double **v = atom->v;
   double **f = atom->f;
   int *species = atom->type;
   double *q = atom->q;
   imageint *image = atom->image;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
   int dim=domain->dimension;
 
   double xprd = domain->xprd;
   double yprd = domain->yprd;
   double zprd = domain->zprd;
 
   m = n = 0;
   for (int i = 0; i < nlocal; i++)
     if (mask[i] & groupbit) {
       if (every_position>=0) {
 	int ix = (image[i] & IMGMASK) - IMGMAX;
 	int iy = (image[i] >> IMGBITS & IMGMASK) - IMGMAX;
 	int iz = (image[i] >> IMG2BITS) - IMGMAX;
 	if (unwrap_flag == 1) {
 	  buf[m++] = (x[i][0] + ix * xprd);
 	  buf[m++] = (x[i][1] + iy * yprd);
 	  if (dim>2) buf[m++] = (x[i][2] + iz * zprd);
 	} else {
 	  buf[m++] = x[i][0];
 	  buf[m++] = x[i][1];
 	  if (dim>2) buf[m++] = x[i][2];
 	}
 	if (every_image>=0) {
 	  buf[m++] = ix;
 	  buf[m++] = iy;
 	  if (dim>2) buf[m++] = iz;
 	}
       }
       if (every_velocity>=0) {
 	buf[m++] = v[i][0];
 	buf[m++] = v[i][1];
 	if (dim>2) buf[m++] = v[i][2];
       }
       if (every_force>=0) {
 	buf[m++] = f[i][0];
 	buf[m++] = f[i][1];
 	if (dim>2) buf[m++] = f[i][2];
       }
       if (every_species>=0)
 	buf[m++] = species[i];
       if (every_charge>=0)
 	buf[m++] = q[i];
       ids[n++] = tag[i];
     }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void DumpH5MD::write_data(int n, double *mybuf)
 {
   // copy buf atom coords into global array
 
   int m = 0;
   int dim = domain->dimension;
   int k = dim*ntotal;
   int k_image = dim*ntotal;
   int k_velocity = dim*ntotal;
   int k_force = dim*ntotal;
   int k_species = ntotal;
   int k_charge = ntotal;
   for (int i = 0; i < n; i++) {
     if (every_position>=0) {
       for (int j=0; j<dim; j++) {
 	dump_position[k++] = mybuf[m++];
       }
       if (every_image>=0)
 	for (int j=0; j<dim; j++) {
 	  dump_image[k_image++] = mybuf[m++];
 	}
     }
     if (every_velocity>=0)
       for (int j=0; j<dim; j++) {
 	dump_velocity[k_velocity++] = mybuf[m++];
       }
     if (every_force>=0)
       for (int j=0; j<dim; j++) {
 	dump_force[k_force++] = mybuf[m++];
       }
     if (every_species>=0)
       dump_species[k_species++] = mybuf[m++];
     if (every_charge>=0)
       dump_charge[k_charge++] = mybuf[m++];
     ntotal++;
   }
 
   // if last chunk of atoms in this snapshot, write global arrays to file
 
   if (ntotal == natoms) {
     if (every_dump>0) {
       write_frame();
       ntotal = 0;
     } else {
       write_fixed_frame();
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int DumpH5MD::modify_param(int narg, char **arg)
 {
   if (strcmp(arg[0],"unwrap") == 0) {
     if (narg < 2) error->all(FLERR,"Illegal dump_modify command");
     if (strcmp(arg[1],"yes") == 0) unwrap_flag = 1;
     else if (strcmp(arg[1],"no") == 0) unwrap_flag = 0;
     else error->all(FLERR,"Illegal dump_modify command");
     return 2;
   }
   return 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void DumpH5MD::write_frame()
 {
   int local_step;
   double local_time;
   double edges[3];
   local_step = update->ntimestep;
   local_time = local_step * update->dt;
   edges[0] = boxxhi - boxxlo;
   edges[1] = boxyhi - boxylo;
   edges[2] = boxzhi - boxzlo;
   if (every_position>0) {
     if (local_step % (every_position*every_dump) == 0) {
       h5md_append(particles_data.position, dump_position, local_step, local_time);
       h5md_append(particles_data.box_edges, edges, local_step, local_time);
       if (every_image>0)
 	h5md_append(particles_data.image, dump_image, local_step, local_time);
     }
   } else {
     if (do_box) h5md_append(particles_data.box_edges, edges, local_step, local_time);
   }
   if (every_velocity>0 && local_step % (every_velocity*every_dump) == 0) {
     h5md_append(particles_data.velocity, dump_velocity, local_step, local_time);
   }
   if (every_force>0 && local_step % (every_force*every_dump) == 0) {
     h5md_append(particles_data.force, dump_force, local_step, local_time);
   }
   if (every_species>0 && local_step % (every_species*every_dump) == 0) {
     h5md_append(particles_data.species, dump_species, local_step, local_time);
   }
   if (every_charge>0 && local_step % (every_charge*every_dump) == 0) {
     h5md_append(particles_data.charge, dump_charge, local_step, local_time);
   }
 }
 
 void DumpH5MD::write_fixed_frame()
 {
   double edges[3];
   int dims[2];
   char *boundary[3];
 
   for (int i=0; i<3; i++) {
     boundary[i] = new char[9];
     if (domain->periodicity[i]==1) {
       strcpy(boundary[i], "periodic");
     } else {
       strcpy(boundary[i], "none");
     }
   }
 
   dims[0] = natoms;
   dims[1] = domain->dimension;
 
   edges[0] = boxxhi - boxxlo;
   edges[1] = boxyhi - boxylo;
   edges[2] = boxzhi - boxzlo;
   if (every_position==0) {
     particles_data.position = h5md_create_fixed_data_simple(particles_data.group, "position", 2, dims, H5T_NATIVE_DOUBLE, dump_position);
     h5md_create_box(&particles_data, dims[1], boundary, false, edges, NULL);
     if (every_image==0)
       particles_data.image = h5md_create_fixed_data_simple(particles_data.group, "image", 2, dims, H5T_NATIVE_INT, dump_image);
   }
   if (every_velocity==0)
     particles_data.velocity = h5md_create_fixed_data_simple(particles_data.group, "velocity", 2, dims, H5T_NATIVE_DOUBLE, dump_velocity);
   if (every_force==0)
     particles_data.force = h5md_create_fixed_data_simple(particles_data.group, "force", 2, dims, H5T_NATIVE_DOUBLE, dump_force);
   if (every_species==0)
     particles_data.species = h5md_create_fixed_data_simple(particles_data.group, "species", 1, dims, H5T_NATIVE_INT, dump_species);
   if (every_charge==0) {
     particles_data.charge = h5md_create_fixed_data_simple(particles_data.group, "charge", 1, dims, H5T_NATIVE_INT, dump_charge);
     h5md_write_string_attribute(particles_data.group, "charge", "type", "effective");
   }
 
   for (int i=0; i<3; i++) {
     delete [] boundary[i];
   }
 }
 
diff --git a/src/USER-MISC/fix_ti_spring.cpp b/src/USER-MISC/fix_ti_spring.cpp
index 6b9e3d63b..fbbc747c3 100644
--- a/src/USER-MISC/fix_ti_spring.cpp
+++ b/src/USER-MISC/fix_ti_spring.cpp
@@ -1,386 +1,386 @@
 /* -------------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* -------------------------------------------------------------------------
-    Contributing authors:
+   Contributing authors:
              Rodrigo Freitas (UC Berkeley) - rodrigof@berkeley.edu
              Mark Asta (UC Berkeley) - mdasta@berkeley.edu
              Maurice de Koning (Unicamp/Brazil) - dekoning@ifi.unicamp.br
 ------------------------------------------------------------------------- */
 
 #include <stdlib.h>
 #include <string.h>
 #include "fix_ti_spring.h"
 #include "atom.h"
 #include "update.h"
 #include "domain.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 #include "citeme.h"
 #include "force.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
 static const char cite_fix_ti_spring[] =
   "ti/spring command:\n\n"
   "@article{freitas2016,\n"
   "  author={Freitas, Rodrigo and Asta, Mark and de Koning, Maurice},\n"
   "  title={Nonequilibrium free-energy calculation of solids using LAMMPS},\n"
   "  journal={Computational Materials Science},\n"
   "  volume={112},\n"
   "  pages={333--341},\n"
   "  year={2016},\n"
   "  publisher={Elsevier}\n"
   "}\n\n";
 
 /* ---------------------------------------------------------------------- */
 
 FixTISpring::FixTISpring(LAMMPS *lmp, int narg, char **arg) :
   Fix(lmp, narg, arg)
 {
   if (lmp->citeme) lmp->citeme->add(cite_fix_ti_spring);
 
   if (narg < 6 || narg > 8)
     error->all(FLERR,"Illegal fix ti/spring command");
 
   // Flags.
   restart_peratom = 1;
   scalar_flag = 1;
   global_freq = 1;
   vector_flag = 1;
   size_vector = 2;
   global_freq = 1;
   extscalar = 1;
   extvector = 1;
 
   // disallow resetting the time step, while this fix is defined
   time_depend = 1;
 
   // Spring constant.
   k = force->numeric(FLERR,arg[3]);
   if (k <= 0.0) error->all(FLERR,"Illegal fix ti/spring command");
 
   // Perform initial allocation of atom-based array
   // Register with Atom class
   xoriginal = NULL;
   grow_arrays(atom->nmax);
   atom->add_callback(0);
   atom->add_callback(1);
 
   // xoriginal = initial unwrapped positions of atoms
 
   double **x = atom->x;
   int *mask = atom->mask;
   imageint *image = atom->image;
   int nlocal = atom->nlocal;
 
   for (int i = 0; i < nlocal; i++) {
     if (mask[i] & groupbit) domain->unmap(x[i],image[i],xoriginal[i]);
     else xoriginal[i][0] = xoriginal[i][1] = xoriginal[i][2] = 0.0;
   }
 
   // Time variables.
   t0 = update->ntimestep;  // timestep of original/starting coordinates
   t_switch = force->bnumeric(FLERR,arg[4]); // Number of steps for switching
   t_equil  = force->bnumeric(FLERR,arg[5]); // Number of steps for equilibration
   if ((t_switch <= 0) || (t_equil < 0))
     error->all(FLERR,"Illegal fix ti/spring command");
 
   // Coupling parameter initialization
   sf = 1;
   if (narg > 6) {
     if (strcmp(arg[6], "function") == 0) sf = force->inumeric(FLERR,arg[7]);
     else error->all(FLERR,"Illegal fix ti/spring switching function");
     if ((sf!=1) && (sf!=2))
       error->all(FLERR,"Illegal fix ti/spring switching function");
   }
   lambda  =  switch_func(0);
   dlambda = dswitch_func(0);
 
   espring = 0.0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 FixTISpring::~FixTISpring()
 {
   // unregister callbacks to this fix from Atom class
   atom->delete_callback(id,0);
   atom->delete_callback(id,1);
 
   // delete locally stored array
   memory->destroy(xoriginal);
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixTISpring::setmask()
 {
   int mask = 0;
   mask |= INITIAL_INTEGRATE;
   mask |= POST_FORCE;
   mask |= POST_FORCE_RESPA;
   mask |= MIN_POST_FORCE;
   mask |= THERMO_ENERGY;
   return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixTISpring::init()
 {
   if (strstr(update->integrate_style,"respa"))
     nlevels_respa = ((Respa *) update->integrate)->nlevels;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixTISpring::setup(int vflag)
 {
   if (strstr(update->integrate_style,"verlet"))
     post_force(vflag);
   else {
     ((Respa *) update->integrate)->copy_flevel_f(nlevels_respa-1);
     post_force_respa(vflag,nlevels_respa-1,0);
     ((Respa *) update->integrate)->copy_f_flevel(nlevels_respa-1);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixTISpring::min_setup(int vflag)
 {
   post_force(vflag);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixTISpring::post_force(int vflag)
 {
   // do not calculate forces during equilibration
   if ((update->ntimestep - t0) < t_equil) return;
 
   double **x = atom->x;
   double **f = atom->f;
   int *mask = atom->mask;
   imageint *image = atom->image;
   int nlocal = atom->nlocal;
 
   double dx, dy, dz;
   double unwrap[3];
 
   espring = 0.0;
 
   for (int i = 0; i < nlocal; i++)
     if (mask[i] & groupbit) {
       domain->unmap(x[i],image[i],unwrap);
       dx = unwrap[0] - xoriginal[i][0];
       dy = unwrap[1] - xoriginal[i][1];
       dz = unwrap[2] - xoriginal[i][2];
       f[i][0] = (1-lambda) * f[i][0] + lambda * (-k*dx);
       f[i][1] = (1-lambda) * f[i][1] + lambda * (-k*dy);
       f[i][2] = (1-lambda) * f[i][2] + lambda * (-k*dz);
       espring += k * (dx*dx + dy*dy + dz*dz);
     }
 
   espring *= 0.5;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixTISpring::post_force_respa(int vflag, int ilevel, int iloop)
 {
   if (ilevel == nlevels_respa-1) post_force(vflag);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixTISpring::min_post_force(int vflag)
 {
   post_force(vflag);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixTISpring::initial_integrate(int vflag)
 {
   // Update the coupling parameter value if needed
   if ((update->ntimestep - t0) < t_equil) return;
 
   const bigint t = update->ntimestep - (t0+t_equil);
   const double r_switch = 1.0/t_switch;
 
   if ( (t >= 0) && (t <= t_switch) ) {
     lambda  =  switch_func(t*r_switch);
     dlambda = dswitch_func(t*r_switch);
   }
 
   if ( (t >= t_equil+t_switch) && (t <= (t_equil+2*t_switch)) ) {
     lambda  =    switch_func(1.0 - (t - t_switch - t_equil)*r_switch);
     dlambda = - dswitch_func(1.0 - (t - t_switch - t_equil)*r_switch);
   }
 }
 
 /* ----------------------------------------------------------------------
    energy of stretched springs
 ------------------------------------------------------------------------- */
 
 double FixTISpring::compute_scalar()
 {
   double all;
   MPI_Allreduce(&espring,&all,1,MPI_DOUBLE,MPI_SUM,world);
   return all;
 }
 
 /* ----------------------------------------------------------------------
    information about coupling parameter
 ------------------------------------------------------------------------- */
 
 double FixTISpring::compute_vector(int n)
 {
   linfo[0] = lambda;
   linfo[1] = dlambda;
   return linfo[n];
 }
 
 /* ----------------------------------------------------------------------
      memory usage of local atom-based array
 ------------------------------------------------------------------------- */
 
 double FixTISpring::memory_usage()
 {
   double bytes = atom->nmax*3 * sizeof(double);
   return bytes;
 }
 
 /* ----------------------------------------------------------------------
      allocate atom-based array
 ------------------------------------------------------------------------- */
 
 void FixTISpring::grow_arrays(int nmax)
 {
   memory->grow(xoriginal,nmax,3,"fix_ti/spring:xoriginal");
 }
 
 /* ----------------------------------------------------------------------
      copy values within local atom-based array
 ------------------------------------------------------------------------- */
 
 void FixTISpring::copy_arrays(int i, int j, int delflag)
 {
   xoriginal[j][0] = xoriginal[i][0];
   xoriginal[j][1] = xoriginal[i][1];
   xoriginal[j][2] = xoriginal[i][2];
 }
 
 /* ----------------------------------------------------------------------
     pack values in local atom-based array for exchange with another proc
 ------------------------------------------------------------------------- */
 
 int FixTISpring::pack_exchange(int i, double *buf)
 {
   buf[0] = xoriginal[i][0];
   buf[1] = xoriginal[i][1];
   buf[2] = xoriginal[i][2];
   return 3;
 }
 
 /* ----------------------------------------------------------------------
     unpack values in local atom-based array from exchange with another proc
  ------------------------------------------------------------------------- */
 
 int FixTISpring::unpack_exchange(int nlocal, double *buf)
 {
   xoriginal[nlocal][0] = buf[0];
   xoriginal[nlocal][1] = buf[1];
   xoriginal[nlocal][2] = buf[2];
   return 3;
 }
 
 /* ----------------------------------------------------------------------
     pack values in local atom-based arrays for restart file
 ------------------------------------------------------------------------- */
 
 int FixTISpring::pack_restart(int i, double *buf)
 {
   buf[0] = 4;
   buf[1] = xoriginal[i][0];
   buf[2] = xoriginal[i][1];
   buf[3] = xoriginal[i][2];
   return 4;
 }
 
 /* ----------------------------------------------------------------------
     unpack values from atom->extra array to restart the fix
 ------------------------------------------------------------------------- */
 
 void FixTISpring::unpack_restart(int nlocal, int nth)
 {
   double **extra = atom->extra;
 
   // skip to Nth set of extra values
 
   int m = 0;
   for (int i = 0; i < nth; i++) m += static_cast<int> (extra[nlocal][m]);
   m++;
 
   xoriginal[nlocal][0] = extra[nlocal][m++];
   xoriginal[nlocal][1] = extra[nlocal][m++];
   xoriginal[nlocal][2] = extra[nlocal][m++];
 }
 
 /* ----------------------------------------------------------------------
      maxsize of any atom's restart data
 ------------------------------------------------------------------------- */
 
 int FixTISpring::maxsize_restart()
 {
   return 4;
 }
 
 /* ----------------------------------------------------------------------
      size of atom nlocal's restart data
 ------------------------------------------------------------------------- */
 
 int FixTISpring::size_restart(int nlocal)
 {
   return 4;
 }
 
 /* ----------------------------------------------------------------------
      Switching function
 ------------------------------------------------------------------------- */
 
 double FixTISpring::switch_func(double t)
 {
   if (sf == 1) return t;
 
   double t2 = t*t;
   double t5 = t2*t2*t;
   return ((70.0*t2*t2 - 315.0*t2*t + 540.0*t2 - 420.0*t + 126.0)*t5);
 }
 
 /* ----------------------------------------------------------------------
      Switching function derivative
 ------------------------------------------------------------------------- */
 
 double FixTISpring::dswitch_func(double t)
 {
   if(sf == 1) return 1.0/t_switch;
 
   double t2 = t*t;
   double t4 = t2*t2;
   return ((630*t2*t2 - 2520*t2*t + 3780*t2 - 2520*t + 630)*t4) / t_switch;
 }
diff --git a/src/USER-OMP/fix_omp.cpp b/src/USER-OMP/fix_omp.cpp
index 20e60bab2..b3fe2c29e 100644
--- a/src/USER-OMP/fix_omp.cpp
+++ b/src/USER-OMP/fix_omp.cpp
@@ -1,367 +1,367 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
-   OpenMP based threading support for LAMMPS
    Contributing author: Axel Kohlmeyer (Temple U)
+   OpenMP based threading support for LAMMPS
 ------------------------------------------------------------------------- */
 
 #include "atom.h"
 #include "comm.h"
 #include "error.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "integrate.h"
 #include "min.h"
 #include "timer.h"
 
 #include "fix_omp.h"
 #include "thr_data.h"
 #include "thr_omp.h"
 
 #include "pair_hybrid.h"
 #include "bond_hybrid.h"
 #include "angle_hybrid.h"
 #include "dihedral_hybrid.h"
 #include "improper_hybrid.h"
 #include "kspace.h"
 
 #include <string.h>
 #include <stdlib.h>
 #include <stdio.h>
 
 #include "suffix.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
 static int get_tid()
 {
   int tid = 0;
 #if defined(_OPENMP)
   tid = omp_get_thread_num();
 #endif
   return tid;
 }
 
 /* ---------------------------------------------------------------------- */
 
 FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg)
   :  Fix(lmp, narg, arg),
      thr(NULL), last_omp_style(NULL), last_pair_hybrid(NULL),
      _nthr(-1), _neighbor(true), _mixed(false), _reduced(true)
 {
   if (narg < 4) error->all(FLERR,"Illegal package omp command");
 
   int nthreads = 1;
   if (narg > 3) {
 #if defined(_OPENMP)
     if (strcmp(arg[3],"0") == 0)
 #pragma omp parallel default(none) shared(nthreads)
       nthreads = omp_get_num_threads();
     else
       nthreads = force->inumeric(FLERR,arg[3]);
 #endif
   }
 
   if (nthreads < 1)
     error->all(FLERR,"Illegal number of OpenMP threads requested");
 
   int reset_thr = 0;
   if (nthreads != comm->nthreads) {
 #if defined(_OPENMP)
     reset_thr = 1;
     omp_set_num_threads(nthreads);
 #endif
     comm->nthreads = nthreads;
   }
 
   // optional keywords
 
   int iarg = 4;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"neigh") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal package omp command");
       if (strcmp(arg[iarg+1],"yes") == 0) _neighbor = true;
       else if (strcmp(arg[iarg+1],"no") == 0) _neighbor = false;
       else error->all(FLERR,"Illegal package omp command");
       iarg += 2;
     } else error->all(FLERR,"Illegal package omp command");
   }
 
   // print summary of settings
 
   if (comm->me == 0) {
 #if defined(_OPENMP)
     const char * const nmode = _neighbor ? "multi-threaded" : "serial";
 
     if (screen) {
       if (reset_thr)
 	fprintf(screen,"set %d OpenMP thread(s) per MPI task\n", nthreads);
       fprintf(screen,"using %s neighbor list subroutines\n", nmode);
     }
 
     if (logfile) {
       if (reset_thr)
 	fprintf(logfile,"set %d OpenMP thread(s) per MPI task\n", nthreads);
       fprintf(logfile,"using %s neighbor list subroutines\n", nmode);
     }
 #else
     error->warning(FLERR,"OpenMP support not enabled during compilation; "
                          "using 1 thread only.");
 #endif
   }
 
   // allocate list for per thread accumulator manager class instances
   // and then have each thread create an instance of this class to
   // encourage the OS to use storage that is "close" to each thread's CPU.
 
   thr = new ThrData *[nthreads];
   _nthr = nthreads;
 #if defined(_OPENMP)
 #pragma omp parallel default(none) shared(lmp)
 #endif
   {
     const int tid = get_tid();
     Timer *t = new Timer(lmp);
     thr[tid] = new ThrData(tid,t);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 FixOMP::~FixOMP()
 {
   for (int i=0; i < _nthr; ++i)
     delete thr[i];
 
   delete[] thr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixOMP::setmask()
 {
   int mask = 0;
   mask |= PRE_FORCE;
   mask |= PRE_FORCE_RESPA;
   mask |= MIN_PRE_FORCE;
   return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixOMP::init()
 {
   // USER-OMP package cannot be used with atom_style template
   if (atom->molecular == 2)
     error->all(FLERR,"USER-OMP package does not (yet) work with "
                "atom_style template");
 
   // adjust number of data objects when the number of OpenMP
   // threads has been changed somehow
   const int nthreads = comm->nthreads;
   if (_nthr != nthreads) {
     if (screen) fprintf(screen,"Re-init USER-OMP for %d OpenMP thread(s)\n", nthreads);
     if (logfile) fprintf(logfile,"Re-init USER-OMP for %d OpenMP thread(s)\n", nthreads);
 
     for (int i=0; i < _nthr; ++i)
       delete thr[i];
 
     thr = new ThrData *[nthreads];
     _nthr = nthreads;
 #if defined(_OPENMP)
 #pragma omp parallel default(none)
 #endif
     {
       const int tid = get_tid();
       Timer *t = new Timer(lmp);
       thr[tid] = new ThrData(tid,t);
     }
   }
 
   // reset per thread timer
   for (int i=0; i < nthreads; ++i) {
     thr[i]->_timer_active=1;
     thr[i]->timer(Timer::RESET);
     thr[i]->_timer_active=-1;
   }
 
   if ((strstr(update->integrate_style,"respa") != NULL)
       && (strstr(update->integrate_style,"respa/omp") == NULL))
     error->all(FLERR,"Need to use respa/omp for r-RESPA with /omp styles");
 
   int check_hybrid, kspace_split;
   last_pair_hybrid = NULL;
   last_omp_style = NULL;
   const char *last_omp_name = NULL;
   const char *last_hybrid_name = NULL;
   const char *last_force_name = NULL;
 
   // support for verlet/split operation.
   // kspace_split == 0 : regular processing
   // kspace_split < 0  : master partition, does not do kspace
   // kspace_split > 0  : slave partition, only does kspace
 
   if (strstr(update->integrate_style,"verlet/split") != NULL) {
     if (universe->iworld == 0) kspace_split = -1;
     else kspace_split = 1;
   } else {
     kspace_split = 0;
   }
 
 // determine which is the last force style with OpenMP
 // support as this is the one that has to reduce the forces
 
 #define CheckStyleForOMP(name)						\
   check_hybrid = 0;							\
   if (force->name) {							\
     if ( (strcmp(force->name ## _style,"hybrid") == 0) ||		\
          (strcmp(force->name ## _style,"hybrid/overlay") == 0) )	\
       check_hybrid=1;							\
     if (force->name->suffix_flag & Suffix::OMP) {			\
       last_force_name = (const char *) #name;				\
       last_omp_name = force->name ## _style;				\
       last_omp_style = (void *) force->name;				\
     }									\
   }
 
 #define CheckHybridForOMP(name,Class) \
   if (check_hybrid) {					      \
     Class ## Hybrid *style = (Class ## Hybrid *) force->name; \
     for (int i=0; i < style->nstyles; i++) {		      \
       if (style->styles[i]->suffix_flag & Suffix::OMP) {      \
         last_force_name = (const char *) #name;		      \
         last_omp_name = style->keywords[i];		      \
         last_omp_style = style->styles[i];		      \
       }							      \
     }							      \
   }
 
   if (kspace_split <= 0) {
     CheckStyleForOMP(pair);
     CheckHybridForOMP(pair,Pair);
     if (check_hybrid) {
       last_pair_hybrid = last_omp_style;
       last_hybrid_name = last_omp_name;
     }
 
     CheckStyleForOMP(bond);
     CheckHybridForOMP(bond,Bond);
 
     CheckStyleForOMP(angle);
     CheckHybridForOMP(angle,Angle);
 
     CheckStyleForOMP(dihedral);
     CheckHybridForOMP(dihedral,Dihedral);
 
     CheckStyleForOMP(improper);
     CheckHybridForOMP(improper,Improper);
   }
 
   if (kspace_split >= 0) {
     CheckStyleForOMP(kspace);
   }
 
 #undef CheckStyleForOMP
 #undef CheckHybridForOMP
   set_neighbor_omp();
 
   // diagnostic output
   if (comm->me == 0) {
     if (last_omp_style) {
       if (last_pair_hybrid) {
         if (screen)
           fprintf(screen,"Hybrid pair style last /omp style %s\n", last_hybrid_name);
         if (logfile)
           fprintf(logfile,"Hybrid pair style last /omp style %s\n", last_hybrid_name);
       }
       if (screen)
         fprintf(screen,"Last active /omp style is %s_style %s\n",
                 last_force_name, last_omp_name);
       if (logfile)
         fprintf(logfile,"Last active /omp style is %s_style %s\n",
                 last_force_name, last_omp_name);
     } else {
       if (screen)
         fprintf(screen,"No /omp style for force computation currently active\n");
       if (logfile)
         fprintf(logfile,"No /omp style for force computation currently active\n");
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixOMP::set_neighbor_omp()
 {
   // select or deselect multi-threaded neighbor
   // list build depending on setting in package omp.
   // NOTE: since we are at the top of the list of
   // fixes, we cannot adjust neighbor lists from
   // other fixes. those have to be re-implemented
   // as /omp fix styles. :-(
 
   const int neigh_omp = _neighbor ? 1 : 0;
   const int nrequest = neighbor->nrequest;
 
   for (int i = 0; i < nrequest; ++i)
     neighbor->requests[i]->omp = neigh_omp;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixOMP::setup(int)
 {
   // we are post the force compute in setup. turn on timers
   for (int i=0; i < _nthr; ++i)
     thr[i]->_timer_active=0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 // adjust size and clear out per thread accumulator arrays
 void FixOMP::pre_force(int)
 {
   const int nall = atom->nlocal + atom->nghost;
 
   double **f = atom->f;
   double **torque = atom->torque;
   double *erforce = atom->erforce;
   double *de = atom->de;
   double *drho = atom->drho;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(none) shared(f,torque,erforce,de,drho)
 #endif
   {
     const int tid = get_tid();
     thr[tid]->check_tid(tid);
     thr[tid]->init_force(nall,f,torque,erforce,de,drho);
   } // end of omp parallel region
 
   _reduced = false;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double FixOMP::memory_usage()
 {
   double bytes = _nthr * (sizeof(ThrData *) + sizeof(ThrData));
   bytes += _nthr * thr[0]->memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pppm_disp_omp.cpp b/src/USER-OMP/pppm_disp_omp.cpp
index 277da9d4b..16d3001dd 100644
--- a/src/USER-OMP/pppm_disp_omp.cpp
+++ b/src/USER-OMP/pppm_disp_omp.cpp
@@ -1,1872 +1,1873 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
-   Contributing author: Axel Kohlmeyer (Temple U), Rolf Isele-Holder (RWTH Aachen University)
+   Contributing authors: Axel Kohlmeyer (Temple U)
+                         Rolf Isele-Holder (RWTH Aachen University)
 ------------------------------------------------------------------------- */
 
 #include "pppm_disp_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "domain.h"
 #include "force.h"
 #include "memory.h"
 #include "math_const.h"
 
 #include <string.h>
 #include <math.h>
 
 #include "suffix.h"
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 #ifdef FFT_SINGLE
 #define ZEROF 0.0f
 #define ONEF  1.0f
 #else
 #define ZEROF 0.0
 #define ONEF  1.0
 #endif
 
 #define OFFSET 16384
 
 
 /* ---------------------------------------------------------------------- */
 
 PPPMDispOMP::PPPMDispOMP(LAMMPS *lmp, int narg, char **arg) :
   PPPMDisp(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE)
 {
   triclinic_support = 0;
   suffix_flag |= Suffix::OMP;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PPPMDispOMP::~PPPMDispOMP()
 {
   deallocate();
 }
 
 /* ----------------------------------------------------------------------
    allocate memory that depends on # of K-vectors and order
 ------------------------------------------------------------------------- */
 
 void PPPMDispOMP::allocate()
 {
   PPPMDisp::allocate();
 
 #if defined(_OPENMP)
 #pragma omp parallel default(none)
 #endif
   {
 #if defined(_OPENMP)
     const int tid = omp_get_thread_num();
 #else
     const int tid = 0;
 #endif
 
     if (function[0]) {
       ThrData *thr = fix->get_thr(tid);
       thr->init_pppm(order,memory);
     }
     if (function[1] + function[2]) {
       ThrData * thr = fix->get_thr(tid);
       thr->init_pppm_disp(order_6,memory);
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    free memory that depends on # of K-vectors and order
 ------------------------------------------------------------------------- */
 
 void PPPMDispOMP::deallocate()
 {
   PPPMDisp::deallocate();
 
 #if defined(_OPENMP)
 #pragma omp parallel default(none)
 #endif
   {
 #if defined(_OPENMP)
     const int tid = omp_get_thread_num();
 #else
     const int tid = 0;
 #endif
     if (function[0]) {
       ThrData * thr = fix->get_thr(tid);
       thr->init_pppm(-order,memory);
     }
     if (function[1] + function[2]) {
       ThrData * thr = fix->get_thr(tid);
       thr->init_pppm_disp(-order_6,memory);
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    Compute the modified (hockney-eastwood) coulomb green function
 ------------------------------------------------------------------------- */
 
 void PPPMDispOMP::compute_gf()
 {
 #if defined(_OPENMP)
 #pragma omp parallel default(none)
 #endif
   {
 
     double *prd;
     if (triclinic == 0) prd = domain->prd;
     else prd = domain->prd_lamda;
 
     double xprd = prd[0];
     double yprd = prd[1];
     double zprd = prd[2];
     double zprd_slab = zprd*slab_volfactor;
 
     double unitkx = (2.0*MY_PI/xprd);
     double unitky = (2.0*MY_PI/yprd);
     double unitkz = (2.0*MY_PI/zprd_slab);
 
     int tid,nn,nnfrom,nnto,k,l,m;
     int kper,lper,mper;
     double snx,sny,snz,snx2,sny2,snz2;
     double sqk;
     double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
     double numerator,denominator;
 
     const int nnx = nxhi_fft-nxlo_fft+1;
     const int nny = nyhi_fft-nylo_fft+1;
 
     loop_setup_thr(nnfrom, nnto, tid, nfft, comm->nthreads);
     ThrData *thr = fix->get_thr(tid);
     thr->timer(Timer::START);
 
     for (m = nzlo_fft; m <= nzhi_fft; m++) {
       mper = m - nz_pppm*(2*m/nz_pppm);
       qz = unitkz*mper;
       snz = sin(0.5*qz*zprd_slab/nz_pppm);
       snz2 = snz*snz;
       sz = exp(-0.25*pow(qz/g_ewald,2.0));
       wz = 1.0;
       argz = 0.5*qz*zprd_slab/nz_pppm;
       if (argz != 0.0) wz = pow(sin(argz)/argz,order);
       wz *= wz;
 
       for (l = nylo_fft; l <= nyhi_fft; l++) {
         lper = l - ny_pppm*(2*l/ny_pppm);
         qy = unitky*lper;
         sny = sin(0.5*qy*yprd/ny_pppm);
         sny2 = sny*sny;
         sy = exp(-0.25*pow(qy/g_ewald,2.0));
         wy = 1.0;
         argy = 0.5*qy*yprd/ny_pppm;
         if (argy != 0.0) wy = pow(sin(argy)/argy,order);
         wy *= wy;
 
         for (k = nxlo_fft; k <= nxhi_fft; k++) {
 
           /* only compute the part designated to this thread */
           nn = k-nxlo_fft + nnx*(l-nylo_fft + nny*(m-nzlo_fft));
           if ((nn < nnfrom) || (nn >=nnto)) continue;
 
           kper = k - nx_pppm*(2*k/nx_pppm);
           qx = unitkx*kper;
           snx = sin(0.5*qx*xprd/nx_pppm);
           snx2 = snx*snx;
           sx = exp(-0.25*pow(qx/g_ewald,2.0));
           wx = 1.0;
           argx = 0.5*qx*xprd/nx_pppm;
           if (argx != 0.0) wx = pow(sin(argx)/argx,order);
           wx *= wx;
 
           sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0);
 
           if (sqk != 0.0) {
             numerator = 4.0*MY_PI/sqk;
             denominator = gf_denom(snx2,sny2,snz2, gf_b, order);
             greensfn[nn] = numerator*sx*sy*sz*wx*wy*wz/denominator;
           } else greensfn[nn] = 0.0;
         }
       }
     }
     thr->timer(Timer::KSPACE);
   } // end of parallel region
 }
 
 /* ----------------------------------------------------------------------
    Compyute the modified (hockney-eastwood) dispersion green function
 ------------------------------------------------------------------------- */
 
 void PPPMDispOMP::compute_gf_6()
 {
 #if defined(_OPENMP)
 #pragma omp parallel default(none)
 #endif
   {
     double *prd;
     int k,l,m,nn;
 
     // volume-dependent factors
     // adjust z dimension for 2d slab PPPM
     // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
 
     if (triclinic == 0) prd = domain->prd;
     else prd = domain->prd_lamda;
 
     double xprd = prd[0];
     double yprd = prd[1];
     double zprd = prd[2];
     double zprd_slab = zprd*slab_volfactor;
 
     double unitkx = (2.0*MY_PI/xprd);
     double unitky = (2.0*MY_PI/yprd);
     double unitkz = (2.0*MY_PI/zprd_slab);
 
     int kper,lper,mper;
     double sqk;
     double snx,sny,snz,snx2,sny2,snz2;
     double argx,argy,argz,wx,wy,wz,sx,sy,sz;
     double qx,qy,qz;
     double rtsqk, term;
     double numerator,denominator;
     double inv2ew = 2*g_ewald_6;
     inv2ew = 1/inv2ew;
     double rtpi = sqrt(MY_PI);
     int nnfrom, nnto, tid;
 
     numerator = -MY_PI*rtpi*g_ewald_6*g_ewald_6*g_ewald_6/(3.0);
 
     const int nnx = nxhi_fft_6-nxlo_fft_6+1;
     const int nny = nyhi_fft_6-nylo_fft_6+1;
 
     loop_setup_thr(nnfrom, nnto, tid, nfft_6, comm->nthreads);
     ThrData *thr = fix->get_thr(tid);
     thr->timer(Timer::START);
 
     for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
       mper = m - nz_pppm_6*(2*m/nz_pppm_6);
       qz = unitkz*mper;
       snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm_6);
       snz2 = snz*snz;
       sz = exp(-qz*qz*inv2ew*inv2ew);
       wz = 1.0;
       argz = 0.5*qz*zprd_slab/nz_pppm_6;
       if (argz != 0.0) wz = pow(sin(argz)/argz,order_6);
       wz *= wz;
 
       for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
         lper = l - ny_pppm_6*(2*l/ny_pppm_6);
         qy = unitky*lper;
         sny = sin(0.5*unitky*lper*yprd/ny_pppm_6);
         sny2 = sny*sny;
         sy = exp(-qy*qy*inv2ew*inv2ew);
         wy = 1.0;
         argy = 0.5*qy*yprd/ny_pppm_6;
         if (argy != 0.0) wy = pow(sin(argy)/argy,order_6);
         wy *= wy;
 
         for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
 
           /* only compute the part designated to this thread */
           nn = k-nxlo_fft_6 + nnx*(l-nylo_fft_6 + nny*(m-nzlo_fft_6));
           if ((nn < nnfrom) || (nn >=nnto)) continue;
 
 	  kper = k - nx_pppm_6*(2*k/nx_pppm_6);
           qx = unitkx*kper;
 	  snx = sin(0.5*unitkx*kper*xprd/nx_pppm_6);
 	  snx2 = snx*snx;
           sx = exp(-qx*qx*inv2ew*inv2ew);
 	  wx = 1.0;
 	  argx = 0.5*qx*xprd/nx_pppm_6;
 	  if (argx != 0.0) wx = pow(sin(argx)/argx,order_6);
           wx *= wx;
 
 	  sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0);
 
           if (sqk != 0.0) {
 	    denominator = gf_denom(snx2,sny2,snz2, gf_b_6, order_6);
 	    rtsqk = sqrt(sqk);
             term = (1-2*sqk*inv2ew*inv2ew)*sx*sy*sz +
                     2*sqk*rtsqk*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtsqk*inv2ew);
 	    greensfn_6[nn] = numerator*term*wx*wy*wz/denominator;
           } else greensfn_6[nn] = 0.0;
         }
       }
     }
     thr->timer(Timer::KSPACE);
   } // end of parallel region
 }
 /* ----------------------------------------------------------------------
    run the regular toplevel compute method from plain PPPPM
    which will have individual methods replaced by our threaded
    versions and then call the obligatory force reduction.
 ------------------------------------------------------------------------- */
 
 void PPPMDispOMP::compute(int eflag, int vflag)
 {
 
   PPPMDisp::compute(eflag,vflag);
 #if defined(_OPENMP)
 #pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
 #if defined(_OPENMP)
     const int tid = omp_get_thread_num();
 #else
     const int tid = 0;
 #endif
 
     ThrData *thr = fix->get_thr(tid);
     thr->timer(Timer::START);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
 
 
 /* ----------------------------------------------------------------------
    find center grid pt for each of my particles
    check that full stencil for the particle will fit in my 3d brick
    store central grid pt indices in part2grid array
 ------------------------------------------------------------------------- */
 
 void PPPMDispOMP::particle_map(double dxinv, double dyinv,
                                double dzinv, double sft,
                                int ** part2grid, int nup,
                                int nlw, int nxlo_o,
                                int nylo_o, int nzlo_o,
                                int nxhi_o, int nyhi_o,
                                int nzhi_o)
 {
   const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
   int3_t * _noalias const p2g = (int3_t *) part2grid[0];
   const double boxlox = boxlo[0];
   const double boxloy = boxlo[1];
   const double boxloz = boxlo[2];
   const int nlocal = atom->nlocal;
 
   const double delxinv = dxinv;
   const double delyinv = dyinv;
   const double delzinv = dzinv;
   const double shift = sft;
   const int nupper = nup;
   const int nlower = nlw;
   const int nxlo_out = nxlo_o;
   const int nylo_out = nylo_o;
   const int nzlo_out = nzlo_o;
   const int nxhi_out = nxhi_o;
   const int nyhi_out = nyhi_o;
   const int nzhi_out = nzhi_o;
 
   if (!ISFINITE(boxlo[0]) || !ISFINITE(boxlo[1]) || !ISFINITE(boxlo[2]))
     error->one(FLERR,"Non-numeric box dimensions. Simulation unstable.");
 
   int i, flag = 0;
 #if defined(_OPENMP)
 #pragma omp parallel for private(i) default(none) reduction(+:flag) schedule(static)
 #endif
   for (i = 0; i < nlocal; i++) {
 
     // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
     // current particle coord can be outside global and local box
     // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
 
     const int nx = static_cast<int> ((x[i].x-boxlox)*delxinv+shift) - OFFSET;
     const int ny = static_cast<int> ((x[i].y-boxloy)*delyinv+shift) - OFFSET;
     const int nz = static_cast<int> ((x[i].z-boxloz)*delzinv+shift) - OFFSET;
 
     p2g[i].a = nx;
     p2g[i].b = ny;
     p2g[i].t = nz;
 
     // check that entire stencil around nx,ny,nz will fit in my 3d brick
 
     if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
         ny+nlower < nylo_out || ny+nupper > nyhi_out ||
         nz+nlower < nzlo_out || nz+nupper > nzhi_out)
       flag++;
   }
 
   int flag_all;
   MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
   if (flag_all) error->all(FLERR,"Out of range atoms - cannot compute PPPM");
 }
 
 /* ----------------------------------------------------------------------
    create discretized "density" on section of global grid due to my particles
    density(x,y,z) = charge "density" at grid points of my 3d brick
    (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
    in global grid
 ------------------------------------------------------------------------- */
 
 void PPPMDispOMP::make_rho_c()
 {
 
   // clear 3d density array
 
   FFT_SCALAR * _noalias const d = &(density_brick[nzlo_out][nylo_out][nxlo_out]);
   memset(d,0,ngrid*sizeof(FFT_SCALAR));
 
   // no local atoms => nothing else to do
 
   const int nlocal = atom->nlocal;
   if (nlocal == 0) return;
 
   const int ix = nxhi_out - nxlo_out + 1;
   const int iy = nyhi_out - nylo_out + 1;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(none)
 #endif
   {
     const double * _noalias const q = atom->q;
     const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
     const int3_t * _noalias const p2g = (int3_t *) part2grid[0];
 
     const double boxlox = boxlo[0];
     const double boxloy = boxlo[1];
     const double boxloz = boxlo[2];
 
     // determine range of grid points handled by this thread
     int i,jfrom,jto,tid;
     loop_setup_thr(jfrom,jto,tid,ngrid,comm->nthreads);
 
     // get per thread data
     ThrData *thr = fix->get_thr(tid);
     thr->timer(Timer::START);
     FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
 
     // loop over my charges, add their contribution to nearby grid points
     // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
     // (dx,dy,dz) = distance to "lower left" grid pt
 
     // loop over all local atoms for all threads
     for (i = 0; i < nlocal; i++) {
 
       const int nx = p2g[i].a;
       const int ny = p2g[i].b;
       const int nz = p2g[i].t;
 
       // pre-screen whether this atom will ever come within
       // reach of the data segement this thread is updating.
       if ( ((nz+nlower-nzlo_out)*ix*iy >= jto)
            || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue;
 
       const FFT_SCALAR dx = nx+shiftone - (x[i].x-boxlox)*delxinv;
       const FFT_SCALAR dy = ny+shiftone - (x[i].y-boxloy)*delyinv;
       const FFT_SCALAR dz = nz+shiftone - (x[i].z-boxloz)*delzinv;
 
       compute_rho1d_thr(r1d,dx,dy,dz,order,rho_coeff);
 
       const FFT_SCALAR z0 = delvolinv * q[i];
 
       for (int n = nlower; n <= nupper; ++n) {
         const int jn = (nz+n-nzlo_out)*ix*iy;
         const FFT_SCALAR y0 = z0*r1d[2][n];
 
         for (int m = nlower; m <= nupper; ++m) {
           const int jm = jn+(ny+m-nylo_out)*ix;
           const FFT_SCALAR x0 = y0*r1d[1][m];
 
           for (int l = nlower; l <= nupper; ++l) {
             const int jl = jm+nx+l-nxlo_out;
             // make sure each thread only updates
             // "his" elements of the density grid
             if (jl >= jto) break;
             if (jl < jfrom) continue;
 
             d[jl] += x0*r1d[0][l];
           }
         }
       }
     }
     thr->timer(Timer::KSPACE);
   } // end of parallel region
 }
 
 
 /* ----------------------------------------------------------------------
    same as above for dispersion interaction with geometric mixing rule
 ------------------------------------------------------------------------- */
 
 void PPPMDispOMP::make_rho_g()
 {
 
   // clear 3d density array
 
   FFT_SCALAR * _noalias const d = &(density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]);
   memset(d,0,ngrid_6*sizeof(FFT_SCALAR));
 
   // no local atoms => nothing else to do
 
   const int nlocal = atom->nlocal;
   if (nlocal == 0) return;
 
   const int ix = nxhi_out_6 - nxlo_out_6 + 1;
   const int iy = nyhi_out_6 - nylo_out_6 + 1;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(none)
 #endif
   {
     const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
     const int3_t * _noalias const p2g = (int3_t *) part2grid_6[0];
 
     const double boxlox = boxlo[0];
     const double boxloy = boxlo[1];
     const double boxloz = boxlo[2];
 
     // determine range of grid points handled by this thread
     int i,jfrom,jto,tid;
     loop_setup_thr(jfrom,jto,tid,ngrid_6,comm->nthreads);
 
     // get per thread data
     ThrData *thr = fix->get_thr(tid);
     thr->timer(Timer::START);
     FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
 
     // loop over my charges, add their contribution to nearby grid points
     // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
     // (dx,dy,dz) = distance to "lower left" grid pt
 
     // loop over all local atoms for all threads
     for (i = 0; i < nlocal; i++) {
 
       const int nx = p2g[i].a;
       const int ny = p2g[i].b;
       const int nz = p2g[i].t;
 
       // pre-screen whether this atom will ever come within
       // reach of the data segement this thread is updating.
       if ( ((nz+nlower_6-nzlo_out_6)*ix*iy >= jto)
            || ((nz+nupper_6-nzlo_out_6+1)*ix*iy < jfrom) ) continue;
 
       const FFT_SCALAR dx = nx+shiftone_6 - (x[i].x-boxlox)*delxinv_6;
       const FFT_SCALAR dy = ny+shiftone_6 - (x[i].y-boxloy)*delyinv_6;
       const FFT_SCALAR dz = nz+shiftone_6 - (x[i].z-boxloz)*delzinv_6;
 
       compute_rho1d_thr(r1d,dx,dy,dz,order_6,rho_coeff_6);
 
       const int type = atom->type[i];
       const double lj = B[type];
       const FFT_SCALAR z0 = delvolinv_6 * lj;
 
       for (int n = nlower_6; n <= nupper_6; ++n) {
         const int jn = (nz+n-nzlo_out_6)*ix*iy;
         const FFT_SCALAR y0 = z0*r1d[2][n];
 
         for (int m = nlower_6; m <= nupper_6; ++m) {
           const int jm = jn+(ny+m-nylo_out_6)*ix;
           const FFT_SCALAR x0 = y0*r1d[1][m];
 
           for (int l = nlower_6; l <= nupper_6; ++l) {
             const int jl = jm+nx+l-nxlo_out_6;
             // make sure each thread only updates
             // "his" elements of the density grid
             if (jl >= jto) break;
             if (jl < jfrom) continue;
 
             d[jl] += x0*r1d[0][l];
           }
         }
       }
     }
     thr->timer(Timer::KSPACE);
   } // end of parallel region
 }
 
 
 /* ----------------------------------------------------------------------
    same as above for dispersion interaction with arithmetic mixing rule
 ------------------------------------------------------------------------- */
 
 void PPPMDispOMP::make_rho_a()
 {
 
   // clear 3d density array
 
   FFT_SCALAR * _noalias const d0 = &(density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]);
   FFT_SCALAR * _noalias const d1 = &(density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]);
   FFT_SCALAR * _noalias const d2 = &(density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]);
   FFT_SCALAR * _noalias const d3 = &(density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]);
   FFT_SCALAR * _noalias const d4 = &(density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]);
   FFT_SCALAR * _noalias const d5 = &(density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]);
   FFT_SCALAR * _noalias const d6 = &(density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]);
 
   memset(d0,0,ngrid_6*sizeof(FFT_SCALAR));
   memset(d1,0,ngrid_6*sizeof(FFT_SCALAR));
   memset(d2,0,ngrid_6*sizeof(FFT_SCALAR));
   memset(d3,0,ngrid_6*sizeof(FFT_SCALAR));
   memset(d4,0,ngrid_6*sizeof(FFT_SCALAR));
   memset(d5,0,ngrid_6*sizeof(FFT_SCALAR));
   memset(d6,0,ngrid_6*sizeof(FFT_SCALAR));
 
   // no local atoms => nothing else to do
 
   const int nlocal = atom->nlocal;
   if (nlocal == 0) return;
 
   const int ix = nxhi_out_6 - nxlo_out_6 + 1;
   const int iy = nyhi_out_6 - nylo_out_6 + 1;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(none)
 #endif
   {
     const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
     const int3_t * _noalias const p2g = (int3_t *) part2grid_6[0];
 
     const double boxlox = boxlo[0];
     const double boxloy = boxlo[1];
     const double boxloz = boxlo[2];
 
     // determine range of grid points handled by this thread
     int i,jfrom,jto,tid;
     loop_setup_thr(jfrom,jto,tid,ngrid_6,comm->nthreads);
 
     // get per thread data
     ThrData *thr = fix->get_thr(tid);
     thr->timer(Timer::START);
     FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
 
     // loop over my charges, add their contribution to nearby grid points
     // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
     // (dx,dy,dz) = distance to "lower left" grid pt
 
     // loop over all local atoms for all threads
     for (i = 0; i < nlocal; i++) {
 
       const int nx = p2g[i].a;
       const int ny = p2g[i].b;
       const int nz = p2g[i].t;
 
       // pre-screen whether this atom will ever come within
       // reach of the data segement this thread is updating.
       if ( ((nz+nlower_6-nzlo_out_6)*ix*iy >= jto)
            || ((nz+nupper_6-nzlo_out_6+1)*ix*iy < jfrom) ) continue;
 
       const FFT_SCALAR dx = nx+shiftone_6 - (x[i].x-boxlox)*delxinv_6;
       const FFT_SCALAR dy = ny+shiftone_6 - (x[i].y-boxloy)*delyinv_6;
       const FFT_SCALAR dz = nz+shiftone_6 - (x[i].z-boxloz)*delzinv_6;
 
       compute_rho1d_thr(r1d,dx,dy,dz,order_6,rho_coeff_6);
 
       const int type = atom->type[i];
       const double lj0 = B[7*type];
       const double lj1 = B[7*type+1];
       const double lj2 = B[7*type+2];
       const double lj3 = B[7*type+3];
       const double lj4 = B[7*type+4];
       const double lj5 = B[7*type+5];
       const double lj6 = B[7*type+6];
 
       const FFT_SCALAR z0 = delvolinv_6;
 
       for (int n = nlower_6; n <= nupper_6; ++n) {
         const int jn = (nz+n-nzlo_out_6)*ix*iy;
         const FFT_SCALAR y0 = z0*r1d[2][n];
 
         for (int m = nlower_6; m <= nupper_6; ++m) {
           const int jm = jn+(ny+m-nylo_out_6)*ix;
           const FFT_SCALAR x0 = y0*r1d[1][m];
 
           for (int l = nlower_6; l <= nupper_6; ++l) {
             const int jl = jm+nx+l-nxlo_out_6;
             // make sure each thread only updates
             // "his" elements of the density grid
             if (jl >= jto) break;
             if (jl < jfrom) continue;
 
             const double w = x0*r1d[0][l];
 
             d0[jl] += w*lj0;
             d1[jl] += w*lj1;
             d2[jl] += w*lj2;
             d3[jl] += w*lj3;
             d4[jl] += w*lj4;
             d5[jl] += w*lj5;
             d6[jl] += w*lj6;
           }
         }
       }
     }
     thr->timer(Timer::KSPACE);
   } // end of parallel region
 }
 
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get electric field & force on my particles
    for ik scheme
 ------------------------------------------------------------------------- */
 
 void PPPMDispOMP::fieldforce_c_ik()
 {
   const int nlocal = atom->nlocal;
 
   // no local atoms => nothing to do
 
   if (nlocal == 0) return;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of E-field on particle
 
   const double * const q = atom->q;
   const double * const * const x = atom->x;
   const double qqrd2e = force->qqrd2e;
 
 #if defined(_OPENMP)
   const int nthreads = comm->nthreads;
 #pragma omp parallel default(none)
 #endif
   {
 #if defined(_OPENMP)
     // each thread works on a fixed chunk of atoms.
     const int tid = omp_get_thread_num();
     const int inum = nlocal;
     const int idelta = 1 + inum/nthreads;
     const int ifrom = tid*idelta;
     const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
 #else
     const int ifrom = 0;
     const int ito = nlocal;
     const int tid = 0;
 #endif
     ThrData *thr = fix->get_thr(tid);
     thr->timer(Timer::START);
     double * const * const f = thr->get_f();
     FFT_SCALAR * const * const r1d =  static_cast<FFT_SCALAR **>(thr->get_rho1d());
 
     int l,m,n,nx,ny,nz,mx,my,mz;
     FFT_SCALAR dx,dy,dz,x0,y0,z0;
     FFT_SCALAR ekx,eky,ekz;
 
     // this if protects against having more threads than local atoms
     if (ifrom < nlocal) {
       for (int i = ifrom; i < ito; i++) {
 
         nx = part2grid[i][0];
         ny = part2grid[i][1];
         nz = part2grid[i][2];
         dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
         dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
         dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
 
         compute_rho1d_thr(r1d,dx,dy,dz, order, rho_coeff);
 
         ekx = eky = ekz = ZEROF;
         for (n = nlower; n <= nupper; n++) {
           mz = n+nz;
           z0 = r1d[2][n];
           for (m = nlower; m <= nupper; m++) {
             my = m+ny;
             y0 = z0*r1d[1][m];
             for (l = nlower; l <= nupper; l++) {
               mx = l+nx;
               x0 = y0*r1d[0][l];
               ekx -= x0*vdx_brick[mz][my][mx];
               eky -= x0*vdy_brick[mz][my][mx];
               ekz -= x0*vdz_brick[mz][my][mx];
             }
           }
         }
 
         // convert E-field to force
         const double qfactor = qqrd2e*scale*q[i];
         f[i][0] += qfactor*ekx;
         f[i][1] += qfactor*eky;
         f[i][2] += qfactor*ekz;
       }
     }
     thr->timer(Timer::KSPACE);
   } // end of parallel region
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get electric field & force on my particles
    for ad scheme
 ------------------------------------------------------------------------- */
 
 void PPPMDispOMP::fieldforce_c_ad()
 {
   const int nlocal = atom->nlocal;
 
   // no local atoms => nothing to do
 
   if (nlocal == 0) return;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of E-field on particle
 
   const double * const q = atom->q;
   const double * const * const x = atom->x;
   const double qqrd2e = force->qqrd2e;
   //const double * const sf_c = sf_coeff;
 
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
 
   const double hx_inv = nx_pppm/xprd;
   const double hy_inv = ny_pppm/yprd;
   const double hz_inv = nz_pppm/zprd_slab;
 
 #if defined(_OPENMP)
   const int nthreads = comm->nthreads;
 #pragma omp parallel default(none)
 #endif
   {
 #if defined(_OPENMP)
     // each thread works on a fixed chunk of atoms.
     const int tid = omp_get_thread_num();
     const int inum = nlocal;
     const int idelta = 1 + inum/nthreads;
     const int ifrom = tid*idelta;
     const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
 #else
     const int ifrom = 0;
     const int ito = nlocal;
     const int tid = 0;
 #endif
     ThrData *thr = fix->get_thr(tid);
     thr->timer(Timer::START);
     double * const * const f = thr->get_f();
     FFT_SCALAR * const * const r1d =  static_cast<FFT_SCALAR **>(thr->get_rho1d());
     FFT_SCALAR * const * const dr1d = static_cast<FFT_SCALAR **>(thr->get_drho1d());
 
     int l,m,n,nx,ny,nz,mx,my,mz;
     FFT_SCALAR dx,dy,dz;
     FFT_SCALAR ekx,eky,ekz;
     double sf = 0.0;
     double s1,s2,s3;
 
     // this if protects against having more threads than local atoms
     if (ifrom < nlocal) {
       for (int i = ifrom; i < ito; i++) {
 
         nx = part2grid[i][0];
         ny = part2grid[i][1];
         nz = part2grid[i][2];
         dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
         dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
         dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
 
         compute_rho1d_thr(r1d,dx,dy,dz, order, rho_coeff);
         compute_drho1d_thr(dr1d,dx,dy,dz, order, drho_coeff);
 
         ekx = eky = ekz = ZEROF;
         for (n = nlower; n <= nupper; n++) {
           mz = n+nz;
           for (m = nlower; m <= nupper; m++) {
             my = m+ny;
             for (l = nlower; l <= nupper; l++) {
               mx = l+nx;
               ekx += dr1d[0][l]*r1d[1][m]*r1d[2][n]*u_brick[mz][my][mx];
               eky += r1d[0][l]*dr1d[1][m]*r1d[2][n]*u_brick[mz][my][mx];
               ekz += r1d[0][l]*r1d[1][m]*dr1d[2][n]*u_brick[mz][my][mx];
             }
           }
         }
         ekx *= hx_inv;
         eky *= hy_inv;
         ekz *= hz_inv;
 
         // convert E-field to force
         const double qfactor = qqrd2e*scale;
 
         s1 = x[i][0]*hx_inv;
         s2 = x[i][1]*hy_inv;
         s3 = x[i][2]*hz_inv;
         sf = sf_coeff[0]*sin(2*MY_PI*s1);
         sf += sf_coeff[1]*sin(4*MY_PI*s1);
         sf *= 2*q[i]*q[i];
         f[i][0] += qfactor*(ekx*q[i] - sf);
 
         sf = sf_coeff[2]*sin(2*MY_PI*s2);
         sf += sf_coeff[3]*sin(4*MY_PI*s2);
         sf *= 2*q[i]*q[i];
         f[i][1] += qfactor*(eky*q[i] - sf);
 
 
         sf = sf_coeff[4]*sin(2*MY_PI*s3);
         sf += sf_coeff[5]*sin(4*MY_PI*s3);
         sf *= 2*q[i]*q[i];
         if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf);
       }
     }
     thr->timer(Timer::KSPACE);
   } // end of parallel region
 }
 
 /* ----------------------------------------------------------------------
  interpolate from grid to get per-atom energy/virial
  ------------------------------------------------------------------------- */
 
 void PPPMDispOMP::fieldforce_c_peratom()
 {
   const int nlocal = atom->nlocal;
 
   // no local atoms => nothing to do
 
   if (nlocal == 0) return;
 
   // loop over my charges, interpolate from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
 
   const double * const q = atom->q;
   const double * const * const x = atom->x;
 
 #if defined(_OPENMP)
   const int nthreads = comm->nthreads;
 #pragma omp parallel default(none)
 #endif
   {
 #if defined(_OPENMP)
     // each thread works on a fixed chunk of atoms.
     const int tid = omp_get_thread_num();
     const int inum = nlocal;
     const int idelta = 1 + inum/nthreads;
     const int ifrom = tid*idelta;
     const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
 #else
     const int ifrom = 0;
     const int ito = nlocal;
     const int tid = 0;
 #endif
     ThrData *thr = fix->get_thr(tid);
     thr->timer(Timer::START);
     FFT_SCALAR * const * const r1d =  static_cast<FFT_SCALAR **>(thr->get_rho1d());
 
     int l,m,n,nx,ny,nz,mx,my,mz;
     FFT_SCALAR dx,dy,dz,x0,y0,z0;
     FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
 
     // this if protects against having more threads than local atoms
     if (ifrom < nlocal) {
       for (int i = ifrom; i < ito; i++) {
 
         nx = part2grid[i][0];
         ny = part2grid[i][1];
         nz = part2grid[i][2];
         dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
         dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
         dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
 
         compute_rho1d_thr(r1d,dx,dy,dz, order, rho_coeff);
 
         u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
         for (n = nlower; n <= nupper; n++) {
           mz = n+nz;
           z0 = r1d[2][n];
           for (m = nlower; m <= nupper; m++) {
             my = m+ny;
             y0 = z0*r1d[1][m];
             for (l = nlower; l <= nupper; l++) {
               mx = l+nx;
               x0 = y0*r1d[0][l];
               if (eflag_atom) u += x0*u_brick[mz][my][mx];
               if (vflag_atom) {
                 v0 += x0*v0_brick[mz][my][mx];
                 v1 += x0*v1_brick[mz][my][mx];
                 v2 += x0*v2_brick[mz][my][mx];
                 v3 += x0*v3_brick[mz][my][mx];
                 v4 += x0*v4_brick[mz][my][mx];
                 v5 += x0*v5_brick[mz][my][mx];
               }
             }
           }
         }
 
         const double qfactor = 0.5*force->qqrd2e * scale * q[i];
 
         if (eflag_atom) eatom[i] += u*qfactor;
         if (vflag_atom) {
           vatom[i][0] += v0*qfactor;
           vatom[i][1] += v1*qfactor;
           vatom[i][2] += v2*qfactor;
           vatom[i][3] += v3*qfactor;
           vatom[i][4] += v4*qfactor;
           vatom[i][5] += v5*qfactor;
         }
       }
     }
     thr->timer(Timer::KSPACE);
   } // end of parallel region
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get dispersion field & force on my particles
    for ik scheme and geometric mixing rule
 ------------------------------------------------------------------------- */
 
 void PPPMDispOMP::fieldforce_g_ik()
 {
   const int nlocal = atom->nlocal;
 
   // no local atoms => nothing to do
 
   if (nlocal == 0) return;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of E-field on particle
 
   const double * const * const x = atom->x;
 
 #if defined(_OPENMP)
   const int nthreads = comm->nthreads;
 #pragma omp parallel default(none)
 #endif
   {
 #if defined(_OPENMP)
     // each thread works on a fixed chunk of atoms.
     const int tid = omp_get_thread_num();
     const int inum = nlocal;
     const int idelta = 1 + inum/nthreads;
     const int ifrom = tid*idelta;
     const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
 #else
     const int ifrom = 0;
     const int ito = nlocal;
     const int tid = 0;
 #endif
     ThrData *thr = fix->get_thr(tid);
     thr->timer(Timer::START);
  double * const * const f = thr->get_f();
     FFT_SCALAR * const * const r1d =  static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
 
     int l,m,n,nx,ny,nz,mx,my,mz;
     FFT_SCALAR dx,dy,dz,x0,y0,z0;
     FFT_SCALAR ekx,eky,ekz;
     int type;
     double lj;
 
     // this if protects against having more threads than local atoms
     if (ifrom < nlocal) {
       for (int i = ifrom; i < ito; i++) {
 
         nx = part2grid_6[i][0];
         ny = part2grid_6[i][1];
         nz = part2grid_6[i][2];
         dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
         dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
         dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
 
         compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6);
 
         ekx = eky = ekz = ZEROF;
         for (n = nlower_6; n <= nupper_6; n++) {
           mz = n+nz;
           z0 = r1d[2][n];
           for (m = nlower_6; m <= nupper_6; m++) {
             my = m+ny;
             y0 = z0*r1d[1][m];
             for (l = nlower_6; l <= nupper_6; l++) {
               mx = l+nx;
               x0 = y0*r1d[0][l];
               ekx -= x0*vdx_brick_g[mz][my][mx];
               eky -= x0*vdy_brick_g[mz][my][mx];
               ekz -= x0*vdz_brick_g[mz][my][mx];
             }
           }
         }
 
         // convert E-field to force
         type = atom->type[i];
         lj = B[type];
         f[i][0] += lj*ekx;
         f[i][1] += lj*eky;
         f[i][2] += lj*ekz;
       }
     }
     thr->timer(Timer::KSPACE);
   } // end of parallel region
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get dispersion field & force on my particles
    for ad scheme and geometric mixing rule
 ------------------------------------------------------------------------- */
 
 void PPPMDispOMP::fieldforce_g_ad()
 {
   const int nlocal = atom->nlocal;
 
   // no local atoms => nothing to do
 
   if (nlocal == 0) return;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of E-field on particle
 
   const double * const * const x = atom->x;
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
 
   const double hx_inv = nx_pppm_6/xprd;
   const double hy_inv = ny_pppm_6/yprd;
   const double hz_inv = nz_pppm_6/zprd_slab;
 
 #if defined(_OPENMP)
   const int nthreads = comm->nthreads;
 #pragma omp parallel default(none)
 #endif
   {
 #if defined(_OPENMP)
     // each thread works on a fixed chunk of atoms.
     const int tid = omp_get_thread_num();
     const int inum = nlocal;
     const int idelta = 1 + inum/nthreads;
     const int ifrom = tid*idelta;
     const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
 #else
     const int ifrom = 0;
     const int ito = nlocal;
     const int tid = 0;
 #endif
     ThrData *thr = fix->get_thr(tid);
     thr->timer(Timer::START);
     double * const * const f = thr->get_f();
     FFT_SCALAR * const * const r1d =  static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
     FFT_SCALAR * const * const dr1d = static_cast<FFT_SCALAR **>(thr->get_drho1d_6());
 
     int l,m,n,nx,ny,nz,mx,my,mz;
     FFT_SCALAR dx,dy,dz;
     FFT_SCALAR ekx,eky,ekz;
     int type;
     double lj;
     double sf = 0.0;
     double s1,s2,s3;
 
     // this if protects against having more threads than local atoms
     if (ifrom < nlocal) {
       for (int i = ifrom; i < ito; i++) {
 
         nx = part2grid_6[i][0];
         ny = part2grid_6[i][1];
         nz = part2grid_6[i][2];
         dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
         dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
         dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
 
         compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6);
         compute_drho1d_thr(dr1d,dx,dy,dz, order_6, drho_coeff_6);
 
         ekx = eky = ekz = ZEROF;
         for (n = nlower_6; n <= nupper_6; n++) {
           mz = n+nz;
           for (m = nlower_6; m <= nupper_6; m++) {
             my = m+ny;
             for (l = nlower_6; l <= nupper_6; l++) {
               mx = l+nx;
               ekx += dr1d[0][l]*r1d[1][m]*r1d[2][n]*u_brick_g[mz][my][mx];
               eky += r1d[0][l]*dr1d[1][m]*r1d[2][n]*u_brick_g[mz][my][mx];
               ekz += r1d[0][l]*r1d[1][m]*dr1d[2][n]*u_brick_g[mz][my][mx];
             }
           }
         }
         ekx *= hx_inv;
         eky *= hy_inv;
         ekz *= hz_inv;
 
         // convert E-field to force
         type = atom->type[i];
         lj = B[type];
 
         s1 = x[i][0]*hx_inv;
         s2 = x[i][1]*hy_inv;
         s3 = x[i][2]*hz_inv;
 
         sf = sf_coeff_6[0]*sin(2*MY_PI*s1);
         sf += sf_coeff_6[1]*sin(4*MY_PI*s1);
         sf *= 2*lj*lj;
         f[i][0] += ekx*lj - sf;
 
         sf = sf_coeff_6[2]*sin(2*MY_PI*s2);
         sf += sf_coeff_6[3]*sin(4*MY_PI*s2);
         sf *= 2*lj*lj;
         f[i][1] += eky*lj - sf;
 
         sf = sf_coeff_6[4]*sin(2*MY_PI*s3);
         sf += sf_coeff_6[5]*sin(4*MY_PI*s3);
         sf *= 2*lj*lj;
         if (slabflag != 2) f[i][2] += ekz*lj - sf;
       }
     }
     thr->timer(Timer::KSPACE);
   } // end of parallel region
 }
 
 /* ----------------------------------------------------------------------
  interpolate from grid to get per-atom energy/virial for dispersion
  interaction and geometric mixing rule
  ------------------------------------------------------------------------- */
 
 void PPPMDispOMP::fieldforce_g_peratom()
 {
   const int nlocal = atom->nlocal;
 
   // no local atoms => nothing to do
 
   if (nlocal == 0) return;
 
   // loop over my charges, interpolate from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
 
   const double * const * const x = atom->x;
 
 #if defined(_OPENMP)
   const int nthreads = comm->nthreads;
 #pragma omp parallel default(none)
 #endif
   {
 #if defined(_OPENMP)
     // each thread works on a fixed chunk of atoms.
     const int tid = omp_get_thread_num();
     const int inum = nlocal;
     const int idelta = 1 + inum/nthreads;
     const int ifrom = tid*idelta;
     const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
 #else
     const int ifrom = 0;
     const int ito = nlocal;
     const int tid = 0;
 #endif
     ThrData *thr = fix->get_thr(tid);
     thr->timer(Timer::START);
     FFT_SCALAR * const * const r1d =  static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
 
     int l,m,n,nx,ny,nz,mx,my,mz;
     FFT_SCALAR dx,dy,dz,x0,y0,z0;
     FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
     int type;
     double lj;
 
     // this if protects against having more threads than local atoms
     if (ifrom < nlocal) {
       for (int i = ifrom; i < ito; i++) {
 
         nx = part2grid_6[i][0];
         ny = part2grid_6[i][1];
         nz = part2grid_6[i][2];
         dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
         dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
         dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
 
         compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6);
 
         u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
         for (n = nlower_6; n <= nupper_6; n++) {
           mz = n+nz;
           z0 = r1d[2][n];
           for (m = nlower_6; m <= nupper_6; m++) {
             my = m+ny;
             y0 = z0*r1d[1][m];
             for (l = nlower_6; l <= nupper_6; l++) {
               mx = l+nx;
               x0 = y0*r1d[0][l];
               if (eflag_atom) u += x0*u_brick_g[mz][my][mx];
               if (vflag_atom) {
                 v0 += x0*v0_brick_g[mz][my][mx];
                 v1 += x0*v1_brick_g[mz][my][mx];
                 v2 += x0*v2_brick_g[mz][my][mx];
                 v3 += x0*v3_brick_g[mz][my][mx];
                 v4 += x0*v4_brick_g[mz][my][mx];
                 v5 += x0*v5_brick_g[mz][my][mx];
               }
             }
           }
         }
 
         type = atom->type[i];
         lj = B[type]*0.5;
 
         if (eflag_atom) eatom[i] += u*lj;
         if (vflag_atom) {
           vatom[i][0] += v0*lj;
           vatom[i][1] += v1*lj;
           vatom[i][2] += v2*lj;
           vatom[i][3] += v3*lj;
           vatom[i][4] += v4*lj;
           vatom[i][5] += v5*lj;
         }
       }
     }
     thr->timer(Timer::KSPACE);
   } // end of parallel region
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get dispersion field & force on my particles
    for ik scheme and arithmetic mixing rule
 ------------------------------------------------------------------------- */
 
 void PPPMDispOMP::fieldforce_a_ik()
 {
   const int nlocal = atom->nlocal;
 
   // no local atoms => nothing to do
 
   if (nlocal == 0) return;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of E-field on particle
 
   const double * const * const x = atom->x;
 
 #if defined(_OPENMP)
   const int nthreads = comm->nthreads;
 #pragma omp parallel default(none)
 #endif
   {
 #if defined(_OPENMP)
     // each thread works on a fixed chunk of atoms.
     const int tid = omp_get_thread_num();
     const int inum = nlocal;
     const int idelta = 1 + inum/nthreads;
     const int ifrom = tid*idelta;
     const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
 #else
     const int ifrom = 0;
     const int ito = nlocal;
     const int tid = 0;
 #endif
     ThrData *thr = fix->get_thr(tid);
     thr->timer(Timer::START);
     double * const * const f = thr->get_f();
     FFT_SCALAR * const * const r1d =  static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
 
     int l,m,n,nx,ny,nz,mx,my,mz;
     FFT_SCALAR dx,dy,dz,x0,y0,z0;
     FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2;
     FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5;
     FFT_SCALAR ekx6, eky6, ekz6;
     int type;
     double lj0,lj1,lj2,lj3,lj4,lj5,lj6;
 
     // this if protects against having more threads than local atoms
     if (ifrom < nlocal) {
       for (int i = ifrom; i < ito; i++) {
 
         nx = part2grid_6[i][0];
         ny = part2grid_6[i][1];
         nz = part2grid_6[i][2];
         dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
         dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
         dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
 
         compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6);
 
         ekx0 = eky0 = ekz0 = ZEROF;
         ekx1 = eky1 = ekz1 = ZEROF;
         ekx2 = eky2 = ekz2 = ZEROF;
         ekx3 = eky3 = ekz3 = ZEROF;
         ekx4 = eky4 = ekz4 = ZEROF;
         ekx5 = eky5 = ekz5 = ZEROF;
         ekx6 = eky6 = ekz6 = ZEROF;
         for (n = nlower_6; n <= nupper_6; n++) {
           mz = n+nz;
           z0 = r1d[2][n];
           for (m = nlower_6; m <= nupper_6; m++) {
             my = m+ny;
             y0 = z0*r1d[1][m];
             for (l = nlower_6; l <= nupper_6; l++) {
               mx = l+nx;
               x0 = y0*r1d[0][l];
 	      ekx0 -= x0*vdx_brick_a0[mz][my][mx];
 	      eky0 -= x0*vdy_brick_a0[mz][my][mx];
 	      ekz0 -= x0*vdz_brick_a0[mz][my][mx];
 	      ekx1 -= x0*vdx_brick_a1[mz][my][mx];
 	      eky1 -= x0*vdy_brick_a1[mz][my][mx];
 	      ekz1 -= x0*vdz_brick_a1[mz][my][mx];
               ekx2 -= x0*vdx_brick_a2[mz][my][mx];
 	      eky2 -= x0*vdy_brick_a2[mz][my][mx];
 	      ekz2 -= x0*vdz_brick_a2[mz][my][mx];
 	      ekx3 -= x0*vdx_brick_a3[mz][my][mx];
 	      eky3 -= x0*vdy_brick_a3[mz][my][mx];
 	      ekz3 -= x0*vdz_brick_a3[mz][my][mx];
 	      ekx4 -= x0*vdx_brick_a4[mz][my][mx];
 	      eky4 -= x0*vdy_brick_a4[mz][my][mx];
 	      ekz4 -= x0*vdz_brick_a4[mz][my][mx];
               ekx5 -= x0*vdx_brick_a5[mz][my][mx];
 	      eky5 -= x0*vdy_brick_a5[mz][my][mx];
 	      ekz5 -= x0*vdz_brick_a5[mz][my][mx];
               ekx6 -= x0*vdx_brick_a6[mz][my][mx];
 	      eky6 -= x0*vdy_brick_a6[mz][my][mx];
 	      ekz6 -= x0*vdz_brick_a6[mz][my][mx];
             }
           }
         }
 
         // convert D-field to force
         type = atom->type[i];
         lj0 = B[7*type+6];
         lj1 = B[7*type+5];
         lj2 = B[7*type+4];
         lj3 = B[7*type+3];
         lj4 = B[7*type+2];
         lj5 = B[7*type+1];
         lj6 = B[7*type];
         f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6;
         f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6;
         f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6;
       }
     }
     thr->timer(Timer::KSPACE);
   } // end of parallel region
 }
 
 /* ----------------------------------------------------------------------
    interpolate from grid to get dispersion field & force on my particles
    for ad scheme and arithmetic mixing rule
 ------------------------------------------------------------------------- */
 
 void PPPMDispOMP::fieldforce_a_ad()
 {
   const int nlocal = atom->nlocal;
 
   // no local atoms => nothing to do
 
   if (nlocal == 0) return;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
   // ek = 3 components of E-field on particle
 
   const double * const * const x = atom->x;
   double *prd;
 
   if (triclinic == 0) prd = domain->prd;
   else prd = domain->prd_lamda;
 
   double xprd = prd[0];
   double yprd = prd[1];
   double zprd = prd[2];
   double zprd_slab = zprd*slab_volfactor;
 
   const double hx_inv = nx_pppm_6/xprd;
   const double hy_inv = ny_pppm_6/yprd;
   const double hz_inv = nz_pppm_6/zprd_slab;
 
 #if defined(_OPENMP)
   const int nthreads = comm->nthreads;
 #pragma omp parallel default(none)
 #endif
   {
 #if defined(_OPENMP)
     // each thread works on a fixed chunk of atoms.
     const int tid = omp_get_thread_num();
     const int inum = nlocal;
     const int idelta = 1 + inum/nthreads;
     const int ifrom = tid*idelta;
     const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
 #else
     const int ifrom = 0;
     const int ito = nlocal;
     const int tid = 0;
 #endif
     ThrData *thr = fix->get_thr(tid);
     thr->timer(Timer::START);
     double * const * const f = thr->get_f();
     FFT_SCALAR * const * const r1d =  static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
     FFT_SCALAR * const * const dr1d = static_cast<FFT_SCALAR **>(thr->get_drho1d_6());
 
     int l,m,n,nx,ny,nz,mx,my,mz;
     FFT_SCALAR dx,dy,dz,x0,y0,z0;
     FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2;
     FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5;
     FFT_SCALAR ekx6, eky6, ekz6;
     int type;
     double lj0,lj1,lj2,lj3,lj4,lj5,lj6;
     double sf = 0.0;
     double s1,s2,s3;
 
     // this if protects against having more threads than local atoms
     if (ifrom < nlocal) {
       for (int i = ifrom; i < ito; i++) {
 
         nx = part2grid_6[i][0];
         ny = part2grid_6[i][1];
         nz = part2grid_6[i][2];
         dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
         dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
         dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
 
         compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6);
         compute_drho1d_thr(dr1d,dx,dy,dz, order_6, drho_coeff_6);
 
         ekx0 = eky0 = ekz0 = ZEROF;
         ekx1 = eky1 = ekz1 = ZEROF;
         ekx2 = eky2 = ekz2 = ZEROF;
         ekx3 = eky3 = ekz3 = ZEROF;
         ekx4 = eky4 = ekz4 = ZEROF;
         ekx5 = eky5 = ekz5 = ZEROF;
         ekx6 = eky6 = ekz6 = ZEROF;
         for (n = nlower_6; n <= nupper_6; n++) {
           mz = n+nz;
           for (m = nlower_6; m <= nupper_6; m++) {
             my = m+ny;
             for (l = nlower_6; l <= nupper_6; l++) {
               mx = l+nx;
               x0 = dr1d[0][l]*r1d[1][m]*r1d[2][n];
               y0 = r1d[0][l]*dr1d[1][m]*r1d[2][n];
               z0 = r1d[0][l]*r1d[1][m]*dr1d[2][n];
 
               ekx0 += x0*u_brick_a0[mz][my][mx];
               eky0 += y0*u_brick_a0[mz][my][mx];
               ekz0 += z0*u_brick_a0[mz][my][mx];
 
               ekx1 += x0*u_brick_a1[mz][my][mx];
               eky1 += y0*u_brick_a1[mz][my][mx];
               ekz1 += z0*u_brick_a1[mz][my][mx];
 
               ekx2 += x0*u_brick_a2[mz][my][mx];
               eky2 += y0*u_brick_a2[mz][my][mx];
               ekz2 += z0*u_brick_a2[mz][my][mx];
 
               ekx3 += x0*u_brick_a3[mz][my][mx];
               eky3 += y0*u_brick_a3[mz][my][mx];
               ekz3 += z0*u_brick_a3[mz][my][mx];
 
               ekx4 += x0*u_brick_a4[mz][my][mx];
               eky4 += y0*u_brick_a4[mz][my][mx];
               ekz4 += z0*u_brick_a4[mz][my][mx];
 
               ekx5 += x0*u_brick_a5[mz][my][mx];
               eky5 += y0*u_brick_a5[mz][my][mx];
               ekz5 += z0*u_brick_a5[mz][my][mx];
 
               ekx6 += x0*u_brick_a6[mz][my][mx];
               eky6 += y0*u_brick_a6[mz][my][mx];
               ekz6 += z0*u_brick_a6[mz][my][mx];
             }
           }
         }
 
         ekx0 *= hx_inv;
         eky0 *= hy_inv;
         ekz0 *= hz_inv;
 
         ekx1 *= hx_inv;
         eky1 *= hy_inv;
         ekz1 *= hz_inv;
 
         ekx2 *= hx_inv;
         eky2 *= hy_inv;
         ekz2 *= hz_inv;
 
         ekx3 *= hx_inv;
         eky3 *= hy_inv;
         ekz3 *= hz_inv;
 
         ekx4 *= hx_inv;
         eky4 *= hy_inv;
         ekz4 *= hz_inv;
 
         ekx5 *= hx_inv;
         eky5 *= hy_inv;
         ekz5 *= hz_inv;
 
         ekx6 *= hx_inv;
         eky6 *= hy_inv;
         ekz6 *= hz_inv;
 
         // convert D-field to force
         type = atom->type[i];
         lj0 = B[7*type+6];
         lj1 = B[7*type+5];
         lj2 = B[7*type+4];
         lj3 = B[7*type+3];
         lj4 = B[7*type+2];
         lj5 = B[7*type+1];
         lj6 = B[7*type];
 
         s1 = x[i][0]*hx_inv;
         s2 = x[i][1]*hy_inv;
         s3 = x[i][2]*hz_inv;
 
         sf = sf_coeff_6[0]*sin(2*MY_PI*s1);
         sf += sf_coeff_6[1]*sin(4*MY_PI*s1);
         sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
         f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6 - sf;
 
         sf = sf_coeff_6[2]*sin(2*MY_PI*s2);
         sf += sf_coeff_6[3]*sin(4*MY_PI*s2);
         sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
         f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6 - sf;
 
         sf = sf_coeff_6[4]*sin(2*MY_PI*s3);
         sf += sf_coeff_6[5]*sin(4*MY_PI*s3);
         sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
         if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6 - sf;
       }
     }
     thr->timer(Timer::KSPACE);
   } // end of parallel region
 }
 
 /* ----------------------------------------------------------------------
  interpolate from grid to get per-atom energy/virial for dispersion
  interaction and arithmetic mixing rule
  ------------------------------------------------------------------------- */
 
 void PPPMDispOMP::fieldforce_a_peratom()
 {
   const int nlocal = atom->nlocal;
 
   // no local atoms => nothing to do
 
   if (nlocal == 0) return;
 
   // loop over my charges, interpolate from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
   // (dx,dy,dz) = distance to "lower left" grid pt
   // (mx,my,mz) = global coords of moving stencil pt
 
   const double * const * const x = atom->x;
 
 #if defined(_OPENMP)
   const int nthreads = comm->nthreads;
 #pragma omp parallel default(none)
 #endif
   {
 #if defined(_OPENMP)
     // each thread works on a fixed chunk of atoms.
     const int tid = omp_get_thread_num();
     const int inum = nlocal;
     const int idelta = 1 + inum/nthreads;
     const int ifrom = tid*idelta;
     const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
 #else
     const int ifrom = 0;
     const int ito = nlocal;
     const int tid = 0;
 #endif
     ThrData *thr = fix->get_thr(tid);
     thr->timer(Timer::START);
     FFT_SCALAR * const * const r1d =  static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
 
     int l,m,n,nx,ny,nz,mx,my,mz;
     FFT_SCALAR dx,dy,dz,x0,y0,z0;
     FFT_SCALAR u0,v00,v10,v20,v30,v40,v50;
     FFT_SCALAR u1,v01,v11,v21,v31,v41,v51;
     FFT_SCALAR u2,v02,v12,v22,v32,v42,v52;
     FFT_SCALAR u3,v03,v13,v23,v33,v43,v53;
     FFT_SCALAR u4,v04,v14,v24,v34,v44,v54;
     FFT_SCALAR u5,v05,v15,v25,v35,v45,v55;
     FFT_SCALAR u6,v06,v16,v26,v36,v46,v56;
     int type;
     double lj0,lj1,lj2,lj3,lj4,lj5,lj6;
 
     // this if protects against having more threads than local atoms
     if (ifrom < nlocal) {
       for (int i = ifrom; i < ito; i++) {
 
         nx = part2grid_6[i][0];
         ny = part2grid_6[i][1];
         nz = part2grid_6[i][2];
         dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
         dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
         dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
 
         compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6);
 
         u0 = v00 = v10 = v20 = v30 = v40 = v50 = ZEROF;
         u1 = v01 = v11 = v21 = v31 = v41 = v51 = ZEROF;
         u2 = v02 = v12 = v22 = v32 = v42 = v52 = ZEROF;
         u3 = v03 = v13 = v23 = v33 = v43 = v53 = ZEROF;
         u4 = v04 = v14 = v24 = v34 = v44 = v54 = ZEROF;
         u5 = v05 = v15 = v25 = v35 = v45 = v55 = ZEROF;
         u6 = v06 = v16 = v26 = v36 = v46 = v56 = ZEROF;
         for (n = nlower_6; n <= nupper_6; n++) {
           mz = n+nz;
           z0 = r1d[2][n];
           for (m = nlower_6; m <= nupper_6; m++) {
             my = m+ny;
             y0 = z0*r1d[1][m];
             for (l = nlower_6; l <= nupper_6; l++) {
               mx = l+nx;
               x0 = y0*r1d[0][l];
               if (eflag_atom) {
                 u0 += x0*u_brick_a0[mz][my][mx];
                 u1 += x0*u_brick_a1[mz][my][mx];
                 u2 += x0*u_brick_a2[mz][my][mx];
                 u3 += x0*u_brick_a3[mz][my][mx];
                 u4 += x0*u_brick_a4[mz][my][mx];
                 u5 += x0*u_brick_a5[mz][my][mx];
                 u6 += x0*u_brick_a6[mz][my][mx];
 	      }
               if (vflag_atom) {
                 v00 += x0*v0_brick_a0[mz][my][mx];
                 v10 += x0*v1_brick_a0[mz][my][mx];
                 v20 += x0*v2_brick_a0[mz][my][mx];
                 v30 += x0*v3_brick_a0[mz][my][mx];
                 v40 += x0*v4_brick_a0[mz][my][mx];
                 v50 += x0*v5_brick_a0[mz][my][mx];
                 v01 += x0*v0_brick_a1[mz][my][mx];
                 v11 += x0*v1_brick_a1[mz][my][mx];
                 v21 += x0*v2_brick_a1[mz][my][mx];
                 v31 += x0*v3_brick_a1[mz][my][mx];
                 v41 += x0*v4_brick_a1[mz][my][mx];
                 v51 += x0*v5_brick_a1[mz][my][mx];
                 v02 += x0*v0_brick_a2[mz][my][mx];
                 v12 += x0*v1_brick_a2[mz][my][mx];
                 v22 += x0*v2_brick_a2[mz][my][mx];
                 v32 += x0*v3_brick_a2[mz][my][mx];
                 v42 += x0*v4_brick_a2[mz][my][mx];
                 v52 += x0*v5_brick_a2[mz][my][mx];
                 v03 += x0*v0_brick_a3[mz][my][mx];
                 v13 += x0*v1_brick_a3[mz][my][mx];
                 v23 += x0*v2_brick_a3[mz][my][mx];
                 v33 += x0*v3_brick_a3[mz][my][mx];
                 v43 += x0*v4_brick_a3[mz][my][mx];
                 v53 += x0*v5_brick_a3[mz][my][mx];
                 v04 += x0*v0_brick_a4[mz][my][mx];
                 v14 += x0*v1_brick_a4[mz][my][mx];
                 v24 += x0*v2_brick_a4[mz][my][mx];
                 v34 += x0*v3_brick_a4[mz][my][mx];
                 v44 += x0*v4_brick_a4[mz][my][mx];
                 v54 += x0*v5_brick_a4[mz][my][mx];
                 v05 += x0*v0_brick_a5[mz][my][mx];
                 v15 += x0*v1_brick_a5[mz][my][mx];
                 v25 += x0*v2_brick_a5[mz][my][mx];
                 v35 += x0*v3_brick_a5[mz][my][mx];
                 v45 += x0*v4_brick_a5[mz][my][mx];
                 v55 += x0*v5_brick_a5[mz][my][mx];
                 v06 += x0*v0_brick_a6[mz][my][mx];
                 v16 += x0*v1_brick_a6[mz][my][mx];
                 v26 += x0*v2_brick_a6[mz][my][mx];
                 v36 += x0*v3_brick_a6[mz][my][mx];
                 v46 += x0*v4_brick_a6[mz][my][mx];
                 v56 += x0*v5_brick_a6[mz][my][mx];
               }
             }
           }
         }
 
         // convert D-field to force
         type = atom->type[i];
         lj0 = B[7*type+6]*0.5;
         lj1 = B[7*type+5]*0.5;
         lj2 = B[7*type+4]*0.5;
         lj3 = B[7*type+3]*0.5;
         lj4 = B[7*type+2]*0.5;
         lj5 = B[7*type+1]*0.5;
         lj6 = B[7*type]*0.5;
 
         if (eflag_atom)
           eatom[i] += u0*lj0 + u1*lj1 + u2*lj2 +
             u3*lj3 + u4*lj4 + u5*lj5 + u6*lj6;
         if (vflag_atom) {
           vatom[i][0] += v00*lj0 + v01*lj1 + v02*lj2 + v03*lj3 +
             v04*lj4 + v05*lj5 + v06*lj6;
           vatom[i][1] += v10*lj0 + v11*lj1 + v12*lj2 + v13*lj3 +
             v14*lj4 + v15*lj5 + v16*lj6;
           vatom[i][2] += v20*lj0 + v21*lj1 + v22*lj2 + v23*lj3 +
             v24*lj4 + v25*lj5 + v26*lj6;
           vatom[i][3] += v30*lj0 + v31*lj1 + v32*lj2 + v33*lj3 +
             v34*lj4 + v35*lj5 + v36*lj6;
           vatom[i][4] += v40*lj0 + v41*lj1 + v42*lj2 + v43*lj3 +
             v44*lj4 + v45*lj5 + v46*lj6;
           vatom[i][5] += v50*lj0 + v51*lj1 + v52*lj2 + v53*lj3 +
             v54*lj4 + v55*lj5 + v56*lj6;
         }
       }
     }
     thr->timer(Timer::KSPACE);
   } // end of parallel region
 }
 
 /* ----------------------------------------------------------------------
    charge assignment into rho1d
    dx,dy,dz = distance of particle from "lower left" grid point
 ------------------------------------------------------------------------- */
 void PPPMDispOMP::compute_rho1d_thr(FFT_SCALAR * const * const r1d, const FFT_SCALAR &dx,
 				    const FFT_SCALAR &dy, const FFT_SCALAR &dz,
                                     const int ord, FFT_SCALAR * const * const rho_c)
 {
   int k,l;
   FFT_SCALAR r1,r2,r3;
 
   for (k = (1-ord)/2; k <= ord/2; k++) {
     r1 = r2 = r3 = ZEROF;
 
     for (l = ord-1; l >= 0; l--) {
       r1 = rho_c[l][k] + r1*dx;
       r2 = rho_c[l][k] + r2*dy;
       r3 = rho_c[l][k] + r3*dz;
     }
     r1d[0][k] = r1;
     r1d[1][k] = r2;
     r1d[2][k] = r3;
   }
 }
 
 /* ----------------------------------------------------------------------
    charge assignment into drho1d
    dx,dy,dz = distance of particle from "lower left" grid point
 ------------------------------------------------------------------------- */
 
 void PPPMDispOMP::compute_drho1d_thr(FFT_SCALAR * const * const dr1d, const FFT_SCALAR &dx,
 				    const FFT_SCALAR &dy, const FFT_SCALAR &dz,
                                     const int ord, FFT_SCALAR * const * const drho_c)
 {
   int k,l;
   FFT_SCALAR r1,r2,r3;
 
   for (k = (1-ord)/2; k <= ord/2; k++) {
     r1 = r2 = r3 = ZEROF;
 
     for (l = ord-2; l >= 0; l--) {
       r1 = drho_c[l][k] + r1*dx;
       r2 = drho_c[l][k] + r2*dy;
       r3 = drho_c[l][k] + r3*dz;
     }
     dr1d[0][k] = r1;
     dr1d[1][k] = r2;
     dr1d[2][k] = r3;
   }
 }
diff --git a/src/USER-OMP/thr_data.cpp b/src/USER-OMP/thr_data.cpp
index cc184d9d6..0e9eafb2f 100644
--- a/src/USER-OMP/thr_data.cpp
+++ b/src/USER-OMP/thr_data.cpp
@@ -1,369 +1,369 @@
 /* -------------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
-   per-thread data management for LAMMPS
    Contributing author: Axel Kohlmeyer (Temple U)
+   per-thread data management for LAMMPS
 ------------------------------------------------------------------------- */
 
 #include "thr_data.h"
 
 #include <string.h>
 #include <stdio.h>
 
 #include "memory.h"
 #include "timer.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 ThrData::ThrData(int tid, Timer *t)
   : _f(0),_torque(0),_erforce(0),_de(0),_drho(0),_mu(0),_lambda(0),_rhoB(0),
     _D_values(0),_rho(0),_fp(0),_rho1d(0),_drho1d(0),_tid(tid), _timer(t)
 {
   _timer_active = 0;
 }
 
 
 /* ---------------------------------------------------------------------- */
 
 void ThrData::check_tid(int tid)
 {
   if (tid != _tid)
     fprintf(stderr,"WARNING: external and internal tid mismatch %d != %d\n",tid,_tid);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ThrData::_stamp(enum Timer::ttype flag)
 {
   // do nothing until it gets set to 0 in ::setup()
   if (_timer_active < 0) return;
 
   if (flag == Timer::START) {
     _timer_active = 1;
   }
 
   if (_timer_active) _timer->stamp(flag);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double ThrData::get_time(enum Timer::ttype flag)
 {
   if (_timer)
     return _timer->get_wall(flag);
   else
     return 0.0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ThrData::init_force(int nall, double **f, double **torque,
                          double *erforce, double *de, double *drho)
 {
   eng_vdwl=eng_coul=eng_bond=eng_angle=eng_dihed=eng_imprp=eng_kspce=0.0;
   memset(virial_pair,0,6*sizeof(double));
   memset(virial_bond,0,6*sizeof(double));
   memset(virial_angle,0,6*sizeof(double));
   memset(virial_dihed,0,6*sizeof(double));
   memset(virial_imprp,0,6*sizeof(double));
   memset(virial_kspce,0,6*sizeof(double));
 
   eatom_pair=eatom_bond=eatom_angle=eatom_dihed=eatom_imprp=eatom_kspce=NULL;
   vatom_pair=vatom_bond=vatom_angle=vatom_dihed=vatom_imprp=vatom_kspce=NULL;
 
   if (nall >= 0 && f) {
     _f = f + _tid*nall;
     memset(&(_f[0][0]),0,nall*3*sizeof(double));
   } else _f = NULL;
 
   if (nall >= 0 && torque) {
     _torque = torque + _tid*nall;
     memset(&(_torque[0][0]),0,nall*3*sizeof(double));
   } else _torque = NULL;
 
   if (nall >= 0 && erforce) {
     _erforce = erforce + _tid*nall;
     memset(&(_erforce[0]),0,nall*sizeof(double));
   } else _erforce = NULL;
 
   if (nall >= 0 && de) {
     _de = de + _tid*nall;
     memset(&(_de[0]),0,nall*sizeof(double));
   } else _de = NULL;
 
   if (nall >= 0 && drho) {
     _drho = drho + _tid*nall;
     memset(&(_drho[0]),0,nall*sizeof(double));
   } else _drho = NULL;
 }
 
 /* ----------------------------------------------------------------------
    set up and clear out locally managed per atom arrays
 ------------------------------------------------------------------------- */
 
 void ThrData::init_eam(int nall, double *rho)
 {
   if (nall >= 0 && rho) {
     _rho = rho + _tid*nall;
     memset(_rho, 0, nall*sizeof(double));
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ThrData::init_adp(int nall, double *rho, double **mu, double **lambda)
 {
   init_eam(nall, rho);
 
   if (nall >= 0 && mu && lambda) {
     _mu = mu + _tid*nall;
     _lambda = lambda + _tid*nall;
     memset(&(_mu[0][0]), 0, nall*3*sizeof(double));
     memset(&(_lambda[0][0]), 0, nall*6*sizeof(double));
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ThrData::init_cdeam(int nall, double *rho, double *rhoB, double *D_values)
 {
   init_eam(nall, rho);
 
   if (nall >= 0 && rhoB && D_values) {
     _rhoB = rhoB + _tid*nall;
     _D_values = D_values + _tid*nall;
     memset(_rhoB, 0, nall*sizeof(double));
     memset(_D_values, 0, nall*sizeof(double));
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ThrData::init_eim(int nall, double *rho, double *fp)
 {
   init_eam(nall, rho);
 
   if (nall >= 0 && fp) {
     _fp = fp + _tid*nall;
     memset(_fp,0,nall*sizeof(double));
   }
 }
 
 /* ----------------------------------------------------------------------
    if order > 0 : set up per thread storage for PPPM
    if order < 0 : free per thread storage for PPPM
 ------------------------------------------------------------------------- */
 #if defined(FFT_SINGLE)
 typedef float FFT_SCALAR;
 #else
 typedef double FFT_SCALAR;
 #endif
 
 void ThrData::init_pppm(int order, Memory *memory)
 {
   FFT_SCALAR **rho1d, **drho1d;
   if (order > 0) {
       memory->create2d_offset(rho1d,3,-order/2,order/2,"thr_data:rho1d");
       memory->create2d_offset(drho1d,3,-order/2,order/2,"thr_data:drho1d");
       _rho1d = static_cast<void *>(rho1d);
       _drho1d = static_cast<void *>(drho1d);
   } else {
     order = -order;
     rho1d = static_cast<FFT_SCALAR **>(_rho1d);
     drho1d = static_cast<FFT_SCALAR **>(_drho1d);
     memory->destroy2d_offset(rho1d,-order/2);
     memory->destroy2d_offset(drho1d,-order/2);
   }
 }
 
 /* ----------------------------------------------------------------------
    if order > 0 : set up per thread storage for PPPM
    if order < 0 : free per thread storage for PPPM
 ------------------------------------------------------------------------- */
 #if defined(FFT_SINGLE)
 typedef float FFT_SCALAR;
 #else
 typedef double FFT_SCALAR;
 #endif
 
 void ThrData::init_pppm_disp(int order_6, Memory *memory)
 {
   FFT_SCALAR **rho1d_6, **drho1d_6;
   if (order_6 > 0) {
       memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"thr_data:rho1d_6");
       memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"thr_data:drho1d_6");
       _rho1d_6 = static_cast<void *>(rho1d_6);
       _drho1d_6 = static_cast<void *>(drho1d_6);
   } else {
     order_6 = -order_6;
     rho1d_6 = static_cast<FFT_SCALAR **>(_rho1d_6);
     drho1d_6 = static_cast<FFT_SCALAR **>(_drho1d_6);
     memory->destroy2d_offset(rho1d_6,-order_6/2);
     memory->destroy2d_offset(drho1d_6,-order_6/2);
   }
 }
 
 
 /* ----------------------------------------------------------------------
    compute global pair virial via summing F dot r over own & ghost atoms
    at this point, only pairwise forces have been accumulated in atom->f
 ------------------------------------------------------------------------- */
 
 void ThrData::virial_fdotr_compute(double **x, int nlocal, int nghost, int nfirst)
 {
 
   // sum over force on all particles including ghosts
 
   if (nfirst < 0) {
     int nall = nlocal + nghost;
     for (int i = 0; i < nall; i++) {
       virial_pair[0] += _f[i][0]*x[i][0];
       virial_pair[1] += _f[i][1]*x[i][1];
       virial_pair[2] += _f[i][2]*x[i][2];
       virial_pair[3] += _f[i][1]*x[i][0];
       virial_pair[4] += _f[i][2]*x[i][0];
       virial_pair[5] += _f[i][2]*x[i][1];
     }
 
   // neighbor includegroup flag is set
   // sum over force on initial nfirst particles and ghosts
 
   } else {
     int nall = nfirst;
     for (int i = 0; i < nall; i++) {
       virial_pair[0] += _f[i][0]*x[i][0];
       virial_pair[1] += _f[i][1]*x[i][1];
       virial_pair[2] += _f[i][2]*x[i][2];
       virial_pair[3] += _f[i][1]*x[i][0];
       virial_pair[4] += _f[i][2]*x[i][0];
       virial_pair[5] += _f[i][2]*x[i][1];
     }
     nall = nlocal + nghost;
     for (int i = nlocal; i < nall; i++) {
       virial_pair[0] += _f[i][0]*x[i][0];
       virial_pair[1] += _f[i][1]*x[i][1];
       virial_pair[2] += _f[i][2]*x[i][2];
       virial_pair[3] += _f[i][1]*x[i][0];
       virial_pair[4] += _f[i][2]*x[i][0];
       virial_pair[5] += _f[i][2]*x[i][1];
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double ThrData::memory_usage()
 {
   double bytes = (7 + 6*6) * sizeof(double);
   bytes += 2 * sizeof(double*);
   bytes += 4 * sizeof(int);
 
   return bytes;
 }
 
 /* additional helper functions */
 
 // reduce per thread data into the first part of the data
 // array that is used for the non-threaded parts and reset
 // the temporary storage to 0.0. this routine depends on
 // multi-dimensional arrays like force stored in this order
 // x1,y1,z1,x2,y2,z2,...
 // we need to post a barrier to wait until all threads are done
 // with writing to the array .
 void LAMMPS_NS::data_reduce_thr(double *dall, int nall, int nthreads, int ndim, int tid)
 {
 #if defined(_OPENMP)
   // NOOP in single-threaded execution.
   if (nthreads == 1) return;
 #pragma omp barrier
   {
     const int nvals = ndim*nall;
     const int idelta = nvals/nthreads + 1;
     const int ifrom = tid*idelta;
     const int ito   = ((ifrom + idelta) > nvals) ? nvals : (ifrom + idelta);
 
 #if defined(USER_OMP_NO_UNROLL)
     if (ifrom < nvals) {
       int m = 0;
 
       for (m = ifrom; m < ito; ++m) {
         for (int n = 1; n < nthreads; ++n) {
           dall[m] += dall[n*nvals + m];
           dall[n*nvals + m] = 0.0;
         }
       }
     }
 #else
     // this if protects against having more threads than atoms
     if (ifrom < nvals) {
       int m = 0;
 
       // for architectures that have L1 D-cache line sizes of 64 bytes
       // (8 doubles) wide, explictly unroll this loop to  compute 8
       // contiguous values in the array at a time
       // -- modify this code based on the size of the cache line
       double t0, t1, t2, t3, t4, t5, t6, t7;
       for (m = ifrom; m < (ito-7); m+=8) {
         t0 = dall[m  ];
         t1 = dall[m+1];
         t2 = dall[m+2];
         t3 = dall[m+3];
         t4 = dall[m+4];
         t5 = dall[m+5];
         t6 = dall[m+6];
         t7 = dall[m+7];
         for (int n = 1; n < nthreads; ++n) {
           t0 += dall[n*nvals + m  ];
           t1 += dall[n*nvals + m+1];
           t2 += dall[n*nvals + m+2];
           t3 += dall[n*nvals + m+3];
           t4 += dall[n*nvals + m+4];
           t5 += dall[n*nvals + m+5];
           t6 += dall[n*nvals + m+6];
           t7 += dall[n*nvals + m+7];
           dall[n*nvals + m  ] = 0.0;
           dall[n*nvals + m+1] = 0.0;
           dall[n*nvals + m+2] = 0.0;
           dall[n*nvals + m+3] = 0.0;
           dall[n*nvals + m+4] = 0.0;
           dall[n*nvals + m+5] = 0.0;
           dall[n*nvals + m+6] = 0.0;
           dall[n*nvals + m+7] = 0.0;
         }
         dall[m  ] = t0;
         dall[m+1] = t1;
         dall[m+2] = t2;
         dall[m+3] = t3;
         dall[m+4] = t4;
         dall[m+5] = t5;
         dall[m+6] = t6;
         dall[m+7] = t7;
       }
       // do the last < 8 values
       for (; m < ito; m++) {
         for (int n = 1; n < nthreads; ++n) {
           dall[m] += dall[n*nvals + m];
           dall[n*nvals + m] = 0.0;
         }
       }
     }
 #endif
   }
 #else
   // NOOP in non-threaded execution.
   return;
 #endif
 }
diff --git a/src/USER-OMP/thr_omp.cpp b/src/USER-OMP/thr_omp.cpp
index 1aba7290a..1744a7738 100644
--- a/src/USER-OMP/thr_omp.cpp
+++ b/src/USER-OMP/thr_omp.cpp
@@ -1,1223 +1,1223 @@
 /* -------------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
-   OpenMP based threading support for LAMMPS
    Contributing author: Axel Kohlmeyer (Temple U)
+   OpenMP based threading support for LAMMPS
 ------------------------------------------------------------------------- */
 
 #include "atom.h"
 #include "comm.h"
 #include "error.h"
 #include "force.h"
 #include "memory.h"
 #include "modify.h"
 #include "neighbor.h"
 #include "timer.h"
 
 #include "thr_omp.h"
 
 #include "pair.h"
 #include "bond.h"
 #include "angle.h"
 #include "dihedral.h"
 #include "improper.h"
 #include "kspace.h"
 #include "compute.h"
 
 #include "math_const.h"
 
 #include <string.h>
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 /* ---------------------------------------------------------------------- */
 
 ThrOMP::ThrOMP(LAMMPS *ptr, int style)
   : lmp(ptr), fix(NULL), thr_style(style), thr_error(0)
 {
   // register fix omp with this class
   int ifix = lmp->modify->find_fix("package_omp");
   if (ifix < 0)
     lmp->error->all(FLERR,"The 'package omp' command is required for /omp styles");
   fix = static_cast<FixOMP *>(lmp->modify->fix[ifix]);
 }
 
 /* ---------------------------------------------------------------------- */
 
 ThrOMP::~ThrOMP()
 {
   // nothing to do?
 }
 
 /* ----------------------------------------------------------------------
    Hook up per thread per atom arrays into the tally infrastructure
    ---------------------------------------------------------------------- */
 
 void ThrOMP::ev_setup_thr(int eflag, int vflag, int nall, double *eatom,
                           double **vatom, ThrData *thr)
 {
   const int tid = thr->get_tid();
   if (tid == 0) thr_error = 0;
 
   if (thr_style & THR_PAIR) {
     if (eflag & 2) {
       thr->eatom_pair = eatom + tid*nall;
       if (nall > 0)
         memset(&(thr->eatom_pair[0]),0,nall*sizeof(double));
     }
     if (vflag & 4) {
       thr->vatom_pair = vatom + tid*nall;
       if (nall > 0)
         memset(&(thr->vatom_pair[0][0]),0,nall*6*sizeof(double));
     }
   }
 
   if (thr_style & THR_BOND) {
     if (eflag & 2) {
       thr->eatom_bond = eatom + tid*nall;
       if (nall > 0)
         memset(&(thr->eatom_bond[0]),0,nall*sizeof(double));
     }
     if (vflag & 4) {
       thr->vatom_bond = vatom + tid*nall;
       if (nall > 0)
         memset(&(thr->vatom_bond[0][0]),0,nall*6*sizeof(double));
     }
   }
 
   if (thr_style & THR_ANGLE) {
     if (eflag & 2) {
       thr->eatom_angle = eatom + tid*nall;
       if (nall > 0)
         memset(&(thr->eatom_angle[0]),0,nall*sizeof(double));
     }
     if (vflag & 4) {
       thr->vatom_angle = vatom + tid*nall;
       if (nall > 0)
         memset(&(thr->vatom_angle[0][0]),0,nall*6*sizeof(double));
     }
   }
 
   if (thr_style & THR_DIHEDRAL) {
     if (eflag & 2) {
       thr->eatom_dihed = eatom + tid*nall;
       if (nall > 0)
         memset(&(thr->eatom_dihed[0]),0,nall*sizeof(double));
     }
     if (vflag & 4) {
       thr->vatom_dihed = vatom + tid*nall;
       if (nall > 0)
         memset(&(thr->vatom_dihed[0][0]),0,nall*6*sizeof(double));
     }
   }
 
   if (thr_style & THR_IMPROPER) {
     if (eflag & 2) {
       thr->eatom_imprp = eatom + tid*nall;
       if (nall > 0)
         memset(&(thr->eatom_imprp[0]),0,nall*sizeof(double));
     }
     if (vflag & 4) {
       thr->vatom_imprp = vatom + tid*nall;
       if (nall > 0)
         memset(&(thr->vatom_imprp[0][0]),0,nall*6*sizeof(double));
     }
   }
 
   // nothing to do for THR_KSPACE
 }
 
 /* ----------------------------------------------------------------------
    Reduce per thread data into the regular structures
    Reduction of global properties is serialized with a "critical"
    directive, so that only one thread at a time will access the
    global variables. Since we are not synchronized, this should
    come with little overhead. The reduction of per-atom properties
    in contrast is parallelized over threads in the same way as forces.
    ---------------------------------------------------------------------- */
 
 void ThrOMP::reduce_thr(void *style, const int eflag, const int vflag,
                         ThrData *const thr)
 {
   const int nlocal = lmp->atom->nlocal;
   const int nghost = lmp->atom->nghost;
   const int nall = nlocal + nghost;
   const int nfirst = lmp->atom->nfirst;
   const int nthreads = lmp->comm->nthreads;
   const int evflag = eflag | vflag;
 
   const int tid = thr->get_tid();
   double **f = lmp->atom->f;
   double **x = lmp->atom->x;
 
   int need_force_reduce = 1;
 
   if (evflag)
     sync_threads();
 
   switch (thr_style) {
 
   case THR_PAIR: {
     Pair * const pair = lmp->force->pair;
 
     if (pair->vflag_fdotr) {
 
       // this is a non-hybrid pair style. compute per thread fdotr
       if (fix->last_pair_hybrid == NULL) {
         if (lmp->neighbor->includegroup == 0)
           thr->virial_fdotr_compute(x, nlocal, nghost, -1);
         else
           thr->virial_fdotr_compute(x, nlocal, nghost, nfirst);
       } else {
         if (style == fix->last_pair_hybrid) {
           // pair_style hybrid will compute fdotr for us
           // but we first need to reduce the forces
           data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid);
           fix->did_reduce();
           need_force_reduce = 0;
         }
       }
     }
 
     if (evflag) {
 #if defined(_OPENMP)
 #pragma omp critical
 #endif
       {
         if (eflag & 1) {
           pair->eng_vdwl += thr->eng_vdwl;
           pair->eng_coul += thr->eng_coul;
           thr->eng_vdwl = 0.0;
           thr->eng_coul = 0.0;
         }
         if (vflag & 3)
           for (int i=0; i < 6; ++i) {
             pair->virial[i] += thr->virial_pair[i];
             thr->virial_pair[i] = 0.0;
           }
       }
 
       if (eflag & 2) {
         data_reduce_thr(&(pair->eatom[0]), nall, nthreads, 1, tid);
       }
       if (vflag & 4) {
         data_reduce_thr(&(pair->vatom[0][0]), nall, nthreads, 6, tid);
       }
     }
   }
     break;
 
   case THR_BOND:
 
     if (evflag) {
       Bond * const bond = lmp->force->bond;
 #if defined(_OPENMP)
 #pragma omp critical
 #endif
       {
         if (eflag & 1) {
           bond->energy += thr->eng_bond;
           thr->eng_bond = 0.0;
         }
 
         if (vflag & 3) {
           for (int i=0; i < 6; ++i) {
             bond->virial[i] += thr->virial_bond[i];
             thr->virial_bond[i] = 0.0;
           }
         }
       }
 
       if (eflag & 2) {
         data_reduce_thr(&(bond->eatom[0]), nall, nthreads, 1, tid);
       }
       if (vflag & 4) {
         data_reduce_thr(&(bond->vatom[0][0]), nall, nthreads, 6, tid);
       }
 
     }
     break;
 
   case THR_ANGLE:
 
     if (evflag) {
       Angle * const angle = lmp->force->angle;
 #if defined(_OPENMP)
 #pragma omp critical
 #endif
       {
         if (eflag & 1) {
           angle->energy += thr->eng_angle;
           thr->eng_angle = 0.0;
         }
 
         if (vflag & 3) {
           for (int i=0; i < 6; ++i) {
             angle->virial[i] += thr->virial_angle[i];
             thr->virial_angle[i] = 0.0;
           }
         }
       }
 
       if (eflag & 2) {
         data_reduce_thr(&(angle->eatom[0]), nall, nthreads, 1, tid);
       }
       if (vflag & 4) {
         data_reduce_thr(&(angle->vatom[0][0]), nall, nthreads, 6, tid);
       }
 
     }
     break;
 
   case THR_DIHEDRAL:
 
     if (evflag) {
       Dihedral * const dihedral = lmp->force->dihedral;
 #if defined(_OPENMP)
 #pragma omp critical
 #endif
       {
         if (eflag & 1) {
           dihedral->energy += thr->eng_dihed;
           thr->eng_dihed = 0.0;
         }
 
         if (vflag & 3) {
           for (int i=0; i < 6; ++i) {
             dihedral->virial[i] += thr->virial_dihed[i];
             thr->virial_dihed[i] = 0.0;
           }
         }
       }
 
       if (eflag & 2) {
         data_reduce_thr(&(dihedral->eatom[0]), nall, nthreads, 1, tid);
       }
       if (vflag & 4) {
         data_reduce_thr(&(dihedral->vatom[0][0]), nall, nthreads, 6, tid);
       }
 
     }
     break;
 
   case THR_DIHEDRAL|THR_CHARMM: // special case for CHARMM dihedrals
 
     if (evflag) {
       Dihedral * const dihedral = lmp->force->dihedral;
       Pair * const pair = lmp->force->pair;
 #if defined(_OPENMP)
 #pragma omp critical
 #endif
       {
         if (eflag & 1) {
           dihedral->energy += thr->eng_dihed;
           pair->eng_vdwl += thr->eng_vdwl;
           pair->eng_coul += thr->eng_coul;
           thr->eng_dihed = 0.0;
           thr->eng_vdwl = 0.0;
           thr->eng_coul = 0.0;
         }
 
         if (vflag & 3) {
           for (int i=0; i < 6; ++i) {
             dihedral->virial[i] += thr->virial_dihed[i];
             pair->virial[i] += thr->virial_pair[i];
             thr->virial_dihed[i] = 0.0;
             thr->virial_pair[i] = 0.0;
           }
         }
       }
 
       if (eflag & 2) {
         data_reduce_thr(&(dihedral->eatom[0]), nall, nthreads, 1, tid);
         data_reduce_thr(&(pair->eatom[0]), nall, nthreads, 1, tid);
       }
       if (vflag & 4) {
         data_reduce_thr(&(dihedral->vatom[0][0]), nall, nthreads, 6, tid);
         data_reduce_thr(&(pair->vatom[0][0]), nall, nthreads, 6, tid);
       }
     }
     break;
 
   case THR_IMPROPER:
 
     if (evflag) {
       Improper *improper = lmp->force->improper;
 #if defined(_OPENMP)
 #pragma omp critical
 #endif
       {
         if (eflag & 1) {
           improper->energy += thr->eng_imprp;
           thr->eng_imprp = 0.0;
         }
 
         if (vflag & 3) {
           for (int i=0; i < 6; ++i) {
             improper->virial[i] += thr->virial_imprp[i];
             thr->virial_imprp[i] = 0.0;
           }
         }
       }
 
       if (eflag & 2) {
         data_reduce_thr(&(improper->eatom[0]), nall, nthreads, 1, tid);
       }
       if (vflag & 4) {
         data_reduce_thr(&(improper->vatom[0][0]), nall, nthreads, 6, tid);
       }
 
     }
     break;
 
   case THR_KSPACE:
     // nothing to do. XXX may need to add support for per-atom info
     break;
 
   case THR_INTGR:
     // nothing to do
     break;
 
   default:
     printf("tid:%d unhandled thr_style case %d\n", tid, thr_style);
     break;
   }
 
   if (style == fix->last_omp_style) {
     if (need_force_reduce) {
       data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid);
       fix->did_reduce();
     }
 
     if (lmp->atom->torque)
       data_reduce_thr(&(lmp->atom->torque[0][0]), nall, nthreads, 3, tid);
   }
   thr->timer(Timer::COMM);
 }
 
 /* ----------------------------------------------------------------------
    tally eng_vdwl and eng_coul into per thread global and per-atom accumulators
 ------------------------------------------------------------------------- */
 
 void ThrOMP::e_tally_thr(Pair * const pair, const int i, const int j,
                          const int nlocal, const int newton_pair,
                          const double evdwl, const double ecoul, ThrData * const thr)
 {
   if (pair->eflag_global) {
     if (newton_pair) {
       thr->eng_vdwl += evdwl;
       thr->eng_coul += ecoul;
     } else {
       const double evdwlhalf = 0.5*evdwl;
       const double ecoulhalf = 0.5*ecoul;
       if (i < nlocal) {
         thr->eng_vdwl += evdwlhalf;
         thr->eng_coul += ecoulhalf;
       }
       if (j < nlocal) {
         thr->eng_vdwl += evdwlhalf;
         thr->eng_coul += ecoulhalf;
       }
     }
   }
   if (pair->eflag_atom) {
     const double epairhalf = 0.5 * (evdwl + ecoul);
     if (newton_pair || i < nlocal) thr->eatom_pair[i] += epairhalf;
     if (newton_pair || j < nlocal) thr->eatom_pair[j] += epairhalf;
   }
 }
 
 /* helper functions */
 static void v_tally(double * const vout, const double * const vin)
 {
   vout[0] += vin[0];
   vout[1] += vin[1];
   vout[2] += vin[2];
   vout[3] += vin[3];
   vout[4] += vin[4];
   vout[5] += vin[5];
 }
 
 static void v_tally(double * const vout, const double scale, const double * const vin)
 {
   vout[0] += scale*vin[0];
   vout[1] += scale*vin[1];
   vout[2] += scale*vin[2];
   vout[3] += scale*vin[3];
   vout[4] += scale*vin[4];
   vout[5] += scale*vin[5];
 }
 
 /* ----------------------------------------------------------------------
    tally virial into per thread global and per-atom accumulators
 ------------------------------------------------------------------------- */
 void ThrOMP::v_tally_thr(Pair * const pair, const int i, const int j,
                          const int nlocal, const int newton_pair,
                          const double * const v, ThrData * const thr)
 {
   if (pair->vflag_global) {
     double * const va = thr->virial_pair;
     if (newton_pair) {
       v_tally(va,v);
     } else {
       if (i < nlocal) v_tally(va,0.5,v);
       if (j < nlocal) v_tally(va,0.5,v);
     }
   }
 
   if (pair->vflag_atom) {
     if (newton_pair || i < nlocal) {
       double * const va = thr->vatom_pair[i];
       v_tally(va,0.5,v);
     }
     if (newton_pair || j < nlocal) {
       double * const va = thr->vatom_pair[j];
       v_tally(va,0.5,v);
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    tally eng_vdwl and virial into per thread global and per-atom accumulators
    need i < nlocal test since called by bond_quartic and dihedral_charmm
 ------------------------------------------------------------------------- */
 
 void ThrOMP::ev_tally_thr(Pair * const pair, const int i, const int j, const int nlocal,
                           const int newton_pair, const double evdwl, const double ecoul,
                           const double fpair, const double delx, const double dely,
                           const double delz, ThrData * const thr)
 {
 
   if (pair->eflag_either)
     e_tally_thr(pair, i, j, nlocal, newton_pair, evdwl, ecoul, thr);
 
   if (pair->vflag_either) {
     double v[6];
     v[0] = delx*delx*fpair;
     v[1] = dely*dely*fpair;
     v[2] = delz*delz*fpair;
     v[3] = delx*dely*fpair;
     v[4] = delx*delz*fpair;
     v[5] = dely*delz*fpair;
 
     v_tally_thr(pair, i, j, nlocal, newton_pair, v, thr);
   }
 
   if (pair->num_tally_compute > 0) {
     // ev_tally callbacks are not thread safe and thus have to be protected
 #if defined(_OPENMP)
 #pragma omp critical
 #endif
     for (int k=0; k < pair->num_tally_compute; ++k) {
       Compute *c = pair->list_tally_compute[k];
       c->pair_tally_callback(i, j, nlocal, newton_pair,
                              evdwl, ecoul, fpair, delx, dely, delz);
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    tally eng_vdwl and virial into global and per-atom accumulators
    for virial, have delx,dely,delz and fx,fy,fz
 ------------------------------------------------------------------------- */
 
 void ThrOMP::ev_tally_xyz_thr(Pair * const pair, const int i, const int j,
                               const int nlocal, const int newton_pair,
                               const double evdwl, const double ecoul,
                               const double fx, const double fy, const double fz,
                               const double delx, const double dely, const double delz,
                               ThrData * const thr)
 {
 
   if (pair->eflag_either)
     e_tally_thr(pair, i, j, nlocal, newton_pair, evdwl, ecoul, thr);
 
   if (pair->vflag_either) {
     double v[6];
     v[0] = delx*fx;
     v[1] = dely*fy;
     v[2] = delz*fz;
     v[3] = delx*fy;
     v[4] = delx*fz;
     v[5] = dely*fz;
 
     v_tally_thr(pair, i, j, nlocal, newton_pair, v, thr);
   }
 }
 
 
 /* ----------------------------------------------------------------------
    tally eng_vdwl and virial into global and per-atom accumulators
    for virial, have delx,dely,delz and fx,fy,fz
    called when using full neighbor lists
 ------------------------------------------------------------------------- */
 
 void ThrOMP::ev_tally_xyz_full_thr(Pair * const pair, const int i,
                                    const double evdwl, const double ecoul,
                                    const double fx, const double fy,
                                    const double fz, const double delx,
                                    const double dely, const double delz,
                                    ThrData * const thr)
 {
 
   if (pair->eflag_either)
     e_tally_thr(pair,i,i,i+1,0,0.5*evdwl,ecoul,thr);
 
   if (pair->vflag_either) {
     double v[6];
     v[0] = 0.5*delx*fx;
     v[1] = 0.5*dely*fy;
     v[2] = 0.5*delz*fz;
     v[3] = 0.5*delx*fy;
     v[4] = 0.5*delx*fz;
     v[5] = 0.5*dely*fz;
 
     v_tally_thr(pair,i,i,i+1,0,v,thr);
   }
 }
 
 /* ----------------------------------------------------------------------
    tally eng_vdwl and virial into global and per-atom accumulators
    called by SW and hbond potentials, newton_pair is always on
    virial = riFi + rjFj + rkFk = (rj-ri) Fj + (rk-ri) Fk = drji*fj + drki*fk
  ------------------------------------------------------------------------- */
 
 void ThrOMP::ev_tally3_thr(Pair * const pair, const int i, const int j, const int k,
                            const double evdwl, const double ecoul,
                            const double * const fj, const double * const fk,
                            const double * const drji, const double * const drki,
                            ThrData * const thr)
 {
   if (pair->eflag_either) {
     if (pair->eflag_global) {
       thr->eng_vdwl += evdwl;
       thr->eng_coul += ecoul;
     }
     if (pair->eflag_atom) {
       const double epairthird = THIRD * (evdwl + ecoul);
       thr->eatom_pair[i] += epairthird;
       thr->eatom_pair[j] += epairthird;
       thr->eatom_pair[k] += epairthird;
     }
   }
 
   if (pair->vflag_either) {
     double v[6];
 
     v[0] = drji[0]*fj[0] + drki[0]*fk[0];
     v[1] = drji[1]*fj[1] + drki[1]*fk[1];
     v[2] = drji[2]*fj[2] + drki[2]*fk[2];
     v[3] = drji[0]*fj[1] + drki[0]*fk[1];
     v[4] = drji[0]*fj[2] + drki[0]*fk[2];
     v[5] = drji[1]*fj[2] + drki[1]*fk[2];
 
     if (pair->vflag_global) v_tally(thr->virial_pair,v);
 
     if (pair->vflag_atom) {
       v_tally(thr->vatom_pair[i],THIRD,v);
       v_tally(thr->vatom_pair[j],THIRD,v);
       v_tally(thr->vatom_pair[k],THIRD,v);
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    tally eng_vdwl and virial into global and per-atom accumulators
    called by AIREBO potential, newton_pair is always on
  ------------------------------------------------------------------------- */
 
 void ThrOMP::ev_tally4_thr(Pair * const pair, const int i, const int j,
                            const int k, const int m, const double evdwl,
                            const double * const fi, const double * const fj,
                            const double * const fk, const double * const drim,
                            const double * const drjm, const double * const drkm,
                            ThrData * const thr)
 {
   double v[6];
 
   if (pair->eflag_either) {
     if (pair->eflag_global) thr->eng_vdwl += evdwl;
     if (pair->eflag_atom) {
       const double epairfourth = 0.25 * evdwl;
       thr->eatom_pair[i] += epairfourth;
       thr->eatom_pair[j] += epairfourth;
       thr->eatom_pair[k] += epairfourth;
       thr->eatom_pair[m] += epairfourth;
     }
   }
 
   if (pair->vflag_atom) {
     v[0] = 0.25 * (drim[0]*fi[0] + drjm[0]*fj[0] + drkm[0]*fk[0]);
     v[1] = 0.25 * (drim[1]*fi[1] + drjm[1]*fj[1] + drkm[1]*fk[1]);
     v[2] = 0.25 * (drim[2]*fi[2] + drjm[2]*fj[2] + drkm[2]*fk[2]);
     v[3] = 0.25 * (drim[0]*fi[1] + drjm[0]*fj[1] + drkm[0]*fk[1]);
     v[4] = 0.25 * (drim[0]*fi[2] + drjm[0]*fj[2] + drkm[0]*fk[2]);
     v[5] = 0.25 * (drim[1]*fi[2] + drjm[1]*fj[2] + drkm[1]*fk[2]);
 
     v_tally(thr->vatom_pair[i],v);
     v_tally(thr->vatom_pair[j],v);
     v_tally(thr->vatom_pair[k],v);
     v_tally(thr->vatom_pair[m],v);
   }
 }
 
 /* ----------------------------------------------------------------------
    tally ecoul and virial into each of n atoms in list
    called by TIP4P potential, newton_pair is always on
    changes v values by dividing by n
  ------------------------------------------------------------------------- */
 
 void ThrOMP::ev_tally_list_thr(Pair * const pair, const int key,
                                const int * const list, const double * const v,
                                const double ecoul, const double alpha,
                                ThrData * const thr)
 {
   int i;
   if (pair->eflag_either) {
     if (pair->eflag_global) thr->eng_coul += ecoul;
     if (pair->eflag_atom) {
       if (key == 0) {
         thr->eatom_pair[list[0]] += 0.5*ecoul;
         thr->eatom_pair[list[1]] += 0.5*ecoul;
       } else if (key == 1) {
         thr->eatom_pair[list[0]] += 0.5*ecoul*(1-alpha);
         thr->eatom_pair[list[1]] += 0.25*ecoul*alpha;
         thr->eatom_pair[list[2]] += 0.25*ecoul*alpha;
         thr->eatom_pair[list[3]] += 0.5*ecoul;
       } else if (key == 2) {
         thr->eatom_pair[list[0]] += 0.5*ecoul;
         thr->eatom_pair[list[1]] += 0.5*ecoul*(1-alpha);
         thr->eatom_pair[list[2]] += 0.25*ecoul*alpha;
         thr->eatom_pair[list[3]] += 0.25*ecoul*alpha;
       } else {
         thr->eatom_pair[list[0]] += 0.5*ecoul*(1-alpha);
         thr->eatom_pair[list[1]] += 0.25*ecoul*alpha;
         thr->eatom_pair[list[2]] += 0.25*ecoul*alpha;
         thr->eatom_pair[list[3]] += 0.5*ecoul*(1-alpha);
         thr->eatom_pair[list[4]] += 0.25*ecoul*alpha;
         thr->eatom_pair[list[5]] += 0.25*ecoul*alpha;
       }
     }
   }
 
   if (pair->vflag_either) {
     if (pair->vflag_global)
       v_tally(thr->virial_pair,v);
 
     if (pair->vflag_atom) {
       if (key == 0) {
         for (i = 0; i <= 5; i++) {
           thr->vatom_pair[list[0]][i] += 0.5*v[i];
           thr->vatom_pair[list[1]][i] += 0.5*v[i];
         }
       } else if (key == 1) {
         for (i = 0; i <= 5; i++) {
           thr->vatom_pair[list[0]][i] += 0.5*v[i]*(1-alpha);
           thr->vatom_pair[list[1]][i] += 0.25*v[i]*alpha;
           thr->vatom_pair[list[2]][i] += 0.25*v[i]*alpha;
           thr->vatom_pair[list[3]][i] += 0.5*v[i];
         }
       } else if (key == 2) {
         for (i = 0; i <= 5; i++) {
           thr->vatom_pair[list[0]][i] += 0.5*v[i];
           thr->vatom_pair[list[1]][i] += 0.5*v[i]*(1-alpha);
           thr->vatom_pair[list[2]][i] += 0.25*v[i]*alpha;
           thr->vatom_pair[list[3]][i] += 0.25*v[i]*alpha;
         }
       } else {
         for (i = 0; i <= 5; i++) {
           thr->vatom_pair[list[0]][i] += 0.5*v[i]*(1-alpha);
           thr->vatom_pair[list[1]][i] += 0.25*v[i]*alpha;
           thr->vatom_pair[list[2]][i] += 0.25*v[i]*alpha;
           thr->vatom_pair[list[3]][i] += 0.5*v[i]*(1-alpha);
           thr->vatom_pair[list[4]][i] += 0.25*v[i]*alpha;
           thr->vatom_pair[list[5]][i] += 0.25*v[i]*alpha;
         }
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    tally energy and virial into global and per-atom accumulators
 ------------------------------------------------------------------------- */
 
 void ThrOMP::ev_tally_thr(Bond * const bond, const int i, const int j, const int nlocal,
                           const int newton_bond, const double ebond, const double fbond,
                           const double delx, const double dely, const double delz,
                           ThrData * const thr)
 {
   if (bond->eflag_either) {
     const double ebondhalf = 0.5*ebond;
     if (newton_bond) {
       if (bond->eflag_global)
         thr->eng_bond += ebond;
       if (bond->eflag_atom) {
         thr->eatom_bond[i] += ebondhalf;
         thr->eatom_bond[j] += ebondhalf;
       }
     } else {
       if (bond->eflag_global) {
         if (i < nlocal) thr->eng_bond += ebondhalf;
         if (j < nlocal) thr->eng_bond += ebondhalf;
       }
       if (bond->eflag_atom) {
         if (i < nlocal) thr->eatom_bond[i] += ebondhalf;
         if (j < nlocal) thr->eatom_bond[j] += ebondhalf;
       }
     }
   }
 
   if (bond->vflag_either) {
     double v[6];
 
     v[0] = delx*delx*fbond;
     v[1] = dely*dely*fbond;
     v[2] = delz*delz*fbond;
     v[3] = delx*dely*fbond;
     v[4] = delx*delz*fbond;
     v[5] = dely*delz*fbond;
 
     if (bond->vflag_global) {
       if (newton_bond)
         v_tally(thr->virial_bond,v);
       else {
         if (i < nlocal)
           v_tally(thr->virial_bond,0.5,v);
         if (j < nlocal)
           v_tally(thr->virial_bond,0.5,v);
       }
     }
 
     if (bond->vflag_atom) {
       v[0] *= 0.5;
       v[1] *= 0.5;
       v[2] *= 0.5;
       v[3] *= 0.5;
       v[4] *= 0.5;
       v[5] *= 0.5;
 
       if (newton_bond) {
         v_tally(thr->vatom_bond[i],v);
         v_tally(thr->vatom_bond[j],v);
       } else {
         if (i < nlocal)
           v_tally(thr->vatom_bond[i],v);
         if (j < nlocal)
           v_tally(thr->vatom_bond[j],v);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    tally energy and virial into global and per-atom accumulators
    virial = r1F1 + r2F2 + r3F3 = (r1-r2) F1 + (r3-r2) F3 = del1*f1 + del2*f3
 ------------------------------------------------------------------------- */
 
 void ThrOMP::ev_tally_thr(Angle * const angle, const int i, const int j, const int k,
                           const int nlocal, const int newton_bond, const double eangle,
                           const double * const f1, const double * const f3,
                           const double delx1, const double dely1, const double delz1,
                           const double delx2, const double dely2, const double delz2,
                           ThrData * const thr)
 {
   if (angle->eflag_either) {
     const double eanglethird = THIRD*eangle;
     if (newton_bond) {
       if (angle->eflag_global)
         thr->eng_angle += eangle;
       if (angle->eflag_atom) {
         thr->eatom_angle[i] += eanglethird;
         thr->eatom_angle[j] += eanglethird;
         thr->eatom_angle[k] += eanglethird;
       }
     } else {
       if (angle->eflag_global) {
         if (i < nlocal) thr->eng_angle += eanglethird;
         if (j < nlocal) thr->eng_angle += eanglethird;
         if (k < nlocal) thr->eng_angle += eanglethird;
       }
       if (angle->eflag_atom) {
         if (i < nlocal) thr->eatom_angle[i] += eanglethird;
         if (j < nlocal) thr->eatom_angle[j] += eanglethird;
         if (k < nlocal) thr->eatom_angle[k] += eanglethird;
       }
     }
   }
 
   if (angle->vflag_either) {
     double v[6];
 
     v[0] = delx1*f1[0] + delx2*f3[0];
     v[1] = dely1*f1[1] + dely2*f3[1];
     v[2] = delz1*f1[2] + delz2*f3[2];
     v[3] = delx1*f1[1] + delx2*f3[1];
     v[4] = delx1*f1[2] + delx2*f3[2];
     v[5] = dely1*f1[2] + dely2*f3[2];
 
     if (angle->vflag_global) {
       if (newton_bond) {
         v_tally(thr->virial_angle,v);
       } else {
         int cnt = 0;
         if (i < nlocal) ++cnt;
         if (j < nlocal) ++cnt;
         if (k < nlocal) ++cnt;
         v_tally(thr->virial_angle,cnt*THIRD,v);
       }
     }
 
     if (angle->vflag_atom) {
       v[0] *= THIRD;
       v[1] *= THIRD;
       v[2] *= THIRD;
       v[3] *= THIRD;
       v[4] *= THIRD;
       v[5] *= THIRD;
 
       if (newton_bond) {
         v_tally(thr->vatom_angle[i],v);
         v_tally(thr->vatom_angle[j],v);
         v_tally(thr->vatom_angle[k],v);
       } else {
         if (i < nlocal) v_tally(thr->vatom_angle[i],v);
         if (j < nlocal) v_tally(thr->vatom_angle[j],v);
         if (k < nlocal) v_tally(thr->vatom_angle[k],v);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    tally energy and virial from 1-3 repulsion of SDK angle into accumulators
 ------------------------------------------------------------------------- */
 
 void ThrOMP::ev_tally13_thr(Angle * const angle, const int i1, const int i3,
                             const int nlocal, const int newton_bond,
                             const double epair, const double fpair,
                             const double delx, const double dely,
                             const double delz, ThrData * const thr)
 {
 
   if (angle->eflag_either) {
     const double epairhalf = 0.5 * epair;
 
     if (angle->eflag_global) {
       if (newton_bond || i1 < nlocal)
         thr->eng_angle += epairhalf;
       if (newton_bond || i3 < nlocal)
         thr->eng_angle += epairhalf;
     }
 
     if (angle->eflag_atom) {
       if (newton_bond || i1 < nlocal) thr->eatom_angle[i1] += epairhalf;
       if (newton_bond || i3 < nlocal) thr->eatom_angle[i3] += epairhalf;
     }
   }
 
   if (angle->vflag_either) {
     double v[6];
     v[0] = delx*delx*fpair;
     v[1] = dely*dely*fpair;
     v[2] = delz*delz*fpair;
     v[3] = delx*dely*fpair;
     v[4] = delx*delz*fpair;
     v[5] = dely*delz*fpair;
 
     if (angle->vflag_global) {
       double * const va = thr->virial_angle;
       if (newton_bond || i1 < nlocal) v_tally(va,0.5,v);
       if (newton_bond || i3 < nlocal) v_tally(va,0.5,v);
     }
 
     if (angle->vflag_atom) {
       if (newton_bond || i1 < nlocal) {
         double * const va = thr->vatom_angle[i1];
         v_tally(va,0.5,v);
       }
       if (newton_bond || i3 < nlocal) {
         double * const va = thr->vatom_angle[i3];
         v_tally(va,0.5,v);
       }
     }
   }
 }
 
 
 /* ----------------------------------------------------------------------
    tally energy and virial into global and per-atom accumulators
    virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4
           = (r1-r2) F1 + (r3-r2) F3 + (r4-r3 + r3-r2) F4
           = vb1*f1 + vb2*f3 + (vb3+vb2)*f4
 ------------------------------------------------------------------------- */
 
 void ThrOMP::ev_tally_thr(Dihedral * const dihed, const int i1, const int i2,
                           const int i3, const int i4, const int nlocal,
                           const int newton_bond, const double edihedral,
                           const double * const f1, const double * const f3,
                           const double * const f4, const double vb1x,
                           const double vb1y, const double vb1z, const double vb2x,
                           const double vb2y, const double vb2z, const double vb3x,
                           const double vb3y, const double vb3z, ThrData * const thr)
 {
 
   if (dihed->eflag_either) {
     if (dihed->eflag_global) {
       if (newton_bond) {
         thr->eng_dihed += edihedral;
       } else {
         const double edihedralquarter = 0.25*edihedral;
         int cnt = 0;
         if (i1 < nlocal) ++cnt;
         if (i2 < nlocal) ++cnt;
         if (i3 < nlocal) ++cnt;
         if (i4 < nlocal) ++cnt;
         thr->eng_dihed += static_cast<double>(cnt)*edihedralquarter;
       }
     }
     if (dihed->eflag_atom) {
       const double edihedralquarter = 0.25*edihedral;
       if (newton_bond) {
         thr->eatom_dihed[i1] += edihedralquarter;
         thr->eatom_dihed[i2] += edihedralquarter;
         thr->eatom_dihed[i3] += edihedralquarter;
         thr->eatom_dihed[i4] += edihedralquarter;
       } else {
         if (i1 < nlocal) thr->eatom_dihed[i1] +=  edihedralquarter;
         if (i2 < nlocal) thr->eatom_dihed[i2] +=  edihedralquarter;
         if (i3 < nlocal) thr->eatom_dihed[i3] +=  edihedralquarter;
         if (i4 < nlocal) thr->eatom_dihed[i4] +=  edihedralquarter;
       }
     }
   }
 
   if (dihed->vflag_either) {
     double v[6];
     v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0];
     v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1];
     v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2];
     v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1];
     v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2];
     v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2];
 
     if (dihed->vflag_global) {
       if (newton_bond) {
         v_tally(thr->virial_dihed,v);
       } else {
         int cnt = 0;
         if (i1 < nlocal) ++cnt;
         if (i2 < nlocal) ++cnt;
         if (i3 < nlocal) ++cnt;
         if (i4 < nlocal) ++cnt;
         v_tally(thr->virial_dihed,0.25*static_cast<double>(cnt),v);
       }
     }
 
     v[0] *= 0.25;
     v[1] *= 0.25;
     v[2] *= 0.25;
     v[3] *= 0.25;
     v[4] *= 0.25;
     v[5] *= 0.25;
 
     if (dihed->vflag_atom) {
       if (newton_bond) {
         v_tally(thr->vatom_dihed[i1],v);
         v_tally(thr->vatom_dihed[i2],v);
         v_tally(thr->vatom_dihed[i3],v);
         v_tally(thr->vatom_dihed[i4],v);
       } else {
         if (i1 < nlocal) v_tally(thr->vatom_dihed[i1],v);
         if (i2 < nlocal) v_tally(thr->vatom_dihed[i2],v);
         if (i3 < nlocal) v_tally(thr->vatom_dihed[i3],v);
         if (i4 < nlocal) v_tally(thr->vatom_dihed[i4],v);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    tally energy and virial into global and per-atom accumulators
    virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4
           = (r1-r2) F1 + (r3-r2) F3 + (r4-r3 + r3-r2) F4
           = vb1*f1 + vb2*f3 + (vb3+vb2)*f4
 ------------------------------------------------------------------------- */
 
 void ThrOMP::ev_tally_thr(Improper * const imprp, const int i1, const int i2,
                           const int i3, const int i4, const int nlocal,
                           const int newton_bond, const double eimproper,
                           const double * const f1, const double * const f3,
                           const double * const f4, const double vb1x,
                           const double vb1y, const double vb1z, const double vb2x,
                           const double vb2y, const double vb2z, const double vb3x,
                           const double vb3y, const double vb3z, ThrData * const thr)
 {
 
   if (imprp->eflag_either) {
     if (imprp->eflag_global) {
       if (newton_bond) {
         thr->eng_imprp += eimproper;
       } else {
         const double eimproperquarter = 0.25*eimproper;
         int cnt = 0;
         if (i1 < nlocal) ++cnt;
         if (i2 < nlocal) ++cnt;
         if (i3 < nlocal) ++cnt;
         if (i4 < nlocal) ++cnt;
         thr->eng_imprp += static_cast<double>(cnt)*eimproperquarter;
       }
     }
     if (imprp->eflag_atom) {
       const double eimproperquarter = 0.25*eimproper;
       if (newton_bond) {
         thr->eatom_imprp[i1] += eimproperquarter;
         thr->eatom_imprp[i2] += eimproperquarter;
         thr->eatom_imprp[i3] += eimproperquarter;
         thr->eatom_imprp[i4] += eimproperquarter;
       } else {
         if (i1 < nlocal) thr->eatom_imprp[i1] +=  eimproperquarter;
         if (i2 < nlocal) thr->eatom_imprp[i2] +=  eimproperquarter;
         if (i3 < nlocal) thr->eatom_imprp[i3] +=  eimproperquarter;
         if (i4 < nlocal) thr->eatom_imprp[i4] +=  eimproperquarter;
       }
     }
   }
 
   if (imprp->vflag_either) {
     double v[6];
     v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0];
     v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1];
     v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2];
     v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1];
     v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2];
     v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2];
 
     if (imprp->vflag_global) {
       if (newton_bond) {
         v_tally(thr->virial_imprp,v);
       } else {
         int cnt = 0;
         if (i1 < nlocal) ++cnt;
         if (i2 < nlocal) ++cnt;
         if (i3 < nlocal) ++cnt;
         if (i4 < nlocal) ++cnt;
         v_tally(thr->virial_imprp,0.25*static_cast<double>(cnt),v);
       }
     }
 
     v[0] *= 0.25;
     v[1] *= 0.25;
     v[2] *= 0.25;
     v[3] *= 0.25;
     v[4] *= 0.25;
     v[5] *= 0.25;
 
     if (imprp->vflag_atom) {
       if (newton_bond) {
         v_tally(thr->vatom_imprp[i1],v);
         v_tally(thr->vatom_imprp[i2],v);
         v_tally(thr->vatom_imprp[i3],v);
         v_tally(thr->vatom_imprp[i4],v);
       } else {
         if (i1 < nlocal) v_tally(thr->vatom_imprp[i1],v);
         if (i2 < nlocal) v_tally(thr->vatom_imprp[i2],v);
         if (i3 < nlocal) v_tally(thr->vatom_imprp[i3],v);
         if (i4 < nlocal) v_tally(thr->vatom_imprp[i4],v);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    tally virial into per-atom accumulators
    called by AIREBO potential, newton_pair is always on
    fpair is magnitude of force on atom I
 ------------------------------------------------------------------------- */
 
 void ThrOMP::v_tally2_thr(const int i, const int j, const double fpair,
                           const double * const drij, ThrData * const thr)
 {
   double v[6];
 
   v[0] = 0.5 * drij[0]*drij[0]*fpair;
   v[1] = 0.5 * drij[1]*drij[1]*fpair;
   v[2] = 0.5 * drij[2]*drij[2]*fpair;
   v[3] = 0.5 * drij[0]*drij[1]*fpair;
   v[4] = 0.5 * drij[0]*drij[2]*fpair;
   v[5] = 0.5 * drij[1]*drij[2]*fpair;
 
   v_tally(thr->vatom_pair[i],v);
   v_tally(thr->vatom_pair[j],v);
 }
 
 /* ----------------------------------------------------------------------
    tally virial into per-atom accumulators
    called by AIREBO and Tersoff potential, newton_pair is always on
 ------------------------------------------------------------------------- */
 
 void ThrOMP::v_tally3_thr(const int i, const int j, const int k,
                           const double * const fi, const double * const fj,
                           const double * const drik, const double * const drjk,
                           ThrData * const thr)
 {
   double v[6];
 
   v[0] = THIRD * (drik[0]*fi[0] + drjk[0]*fj[0]);
   v[1] = THIRD * (drik[1]*fi[1] + drjk[1]*fj[1]);
   v[2] = THIRD * (drik[2]*fi[2] + drjk[2]*fj[2]);
   v[3] = THIRD * (drik[0]*fi[1] + drjk[0]*fj[1]);
   v[4] = THIRD * (drik[0]*fi[2] + drjk[0]*fj[2]);
   v[5] = THIRD * (drik[1]*fi[2] + drjk[1]*fj[2]);
 
   v_tally(thr->vatom_pair[i],v);
   v_tally(thr->vatom_pair[j],v);
   v_tally(thr->vatom_pair[k],v);
 }
 
 /* ----------------------------------------------------------------------
    tally virial into per-atom accumulators
    called by AIREBO potential, newton_pair is always on
 ------------------------------------------------------------------------- */
 
 void ThrOMP::v_tally4_thr(const int i, const int j, const int k, const int m,
                           const double * const fi, const double * const fj,
                           const double * const fk, const double * const drim,
                           const double * const drjm, const double * const drkm,
                           ThrData * const thr)
 {
   double v[6];
 
   v[0] = 0.25 * (drim[0]*fi[0] + drjm[0]*fj[0] + drkm[0]*fk[0]);
   v[1] = 0.25 * (drim[1]*fi[1] + drjm[1]*fj[1] + drkm[1]*fk[1]);
   v[2] = 0.25 * (drim[2]*fi[2] + drjm[2]*fj[2] + drkm[2]*fk[2]);
   v[3] = 0.25 * (drim[0]*fi[1] + drjm[0]*fj[1] + drkm[0]*fk[1]);
   v[4] = 0.25 * (drim[0]*fi[2] + drjm[0]*fj[2] + drkm[0]*fk[2]);
   v[5] = 0.25 * (drim[1]*fi[2] + drjm[1]*fj[2] + drkm[1]*fk[2]);
 
   v_tally(thr->vatom_pair[i],v);
   v_tally(thr->vatom_pair[j],v);
   v_tally(thr->vatom_pair[k],v);
   v_tally(thr->vatom_pair[m],v);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double ThrOMP::memory_usage_thr()
 {
   double bytes=0.0;
 
   return bytes;
 }
diff --git a/src/USER-PHONON/fix_phonon.cpp b/src/USER-PHONON/fix_phonon.cpp
index 1470bc3ee..e4ff1dd31 100644
--- a/src/USER-PHONON/fix_phonon.cpp
+++ b/src/USER-PHONON/fix_phonon.cpp
@@ -1,922 +1,922 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
+
 /* ----------------------------------------------------------------------
-   Contributing authors:
-     Ling-Ti Kong
+   Contributing author: Ling-Ti Kong
 
    Contact:
      School of Materials Science and Engineering,
      Shanghai Jiao Tong University,
      800 Dongchuan Road, Minhang,
      Shanghai 200240, CHINA
 
      konglt@sjtu.edu.cn; konglt@gmail.com
 ------------------------------------------------------------------------- */
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "fix_phonon.h"
 #include "fft3d_wrap.h"
 #include "atom.h"
 #include "compute.h"
 #include "domain.h"
 #include "force.h"
 #include "group.h"
 #include "lattice.h"
 #include "modify.h"
 #include "update.h"
 #include "citeme.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
 #define INVOKED_SCALAR 1
 #define INVOKED_VECTOR 2
 #define MAXLINE 512
 
 static const char cite_fix_phonon[] =
   "fix phonon command:\n\n"
   "@Article{Kong11,\n"
   " author = {L. T. Kong},\n"
   " title = {Phonon dispersion measured directly from molecular dynamics simulations},\n"
   " journal = {Comp.~Phys.~Comm.},\n"
   " year =    2011,\n"
   " volume =  182,\n"
   " pages =   {2201--2207}\n"
   "}\n\n";
 
 /* ---------------------------------------------------------------------- */
 
 FixPhonon::FixPhonon(LAMMPS *lmp,  int narg, char **arg) : Fix(lmp, narg, arg)
 {
   if (lmp->citeme) lmp->citeme->add(cite_fix_phonon);
 
   MPI_Comm_rank(world,&me);
   MPI_Comm_size(world,&nprocs);
   
   if (narg < 8) error->all(FLERR,"Illegal fix phonon command: number of arguments < 8");
 
   nevery = force->inumeric(FLERR, arg[3]);   // Calculate this fix every n steps!
   if (nevery < 1) error->all(FLERR,"Illegal fix phonon command");
 
   nfreq  = force->inumeric(FLERR, arg[4]);   // frequency to output result
   if (nfreq < 1) error->all(FLERR,"Illegal fix phonon command");
 
   waitsteps = force->bnumeric(FLERR,arg[5]); // Wait this many timesteps before actually measuring
   if (waitsteps < 0) error->all(FLERR,"Illegal fix phonon command: waitsteps < 0 !");
 
   int n = strlen(arg[6]) + 1; // map file
   mapfile = new char[n];
   strcpy(mapfile, arg[6]);
 
   n = strlen(arg[7]) + 1;   // prefix of output
   prefix = new char[n];
   strcpy(prefix, arg[7]);
   logfile = new char[n+4];
   sprintf(logfile,"%s.log",prefix);
   
   int sdim = sysdim = domain->dimension;
   int iarg = 8;
   nasr = 20;
 
   // other command line options
   while (iarg < narg){
     if (strcmp(arg[iarg],"sysdim") == 0){
       if (++iarg >= narg) error->all(FLERR,"Illegal fix phonon command: incomplete command line options.");
       sdim = force->inumeric(FLERR, arg[iarg]);
       if (sdim < 1) error->all(FLERR,"Illegal fix phonon command: sysdim should not be less than 1.");
 
     } else if (strcmp(arg[iarg],"nasr") == 0){
       if (++iarg >= narg) error->all(FLERR,"Illegal fix phonon command: incomplete command line options.");
       nasr = force->inumeric(FLERR, arg[iarg]);
 
     } else {
       error->all(FLERR,"Illegal fix phonon command: unknown option read!");
     }
 
     ++iarg;
   }
 
   // get the dimension of the simulation; 1D is possible by specifying the option of "sysdim 1"
   if (sdim < sysdim) sysdim = sdim;
   nasr = MAX(0, nasr);
 
   // get the total number of atoms in group and run min/max checks
   bigint ng = group->count(igroup);
   if (ng > MAXSMALLINT) error->all(FLERR,"Too many atoms for fix phonon");
   if (ng < 1) error->all(FLERR,"No atom found for fix phonon!");
   ngroup = static_cast<int>(ng);
 
 
   // MPI gatherv related variables
   recvcnts = new int[nprocs];
   displs   = new int[nprocs];
 
   // mapping index
   tag2surf.clear(); // clear map info
   surf2tag.clear();
 
   // get the mapping between lattice indices and atom IDs
   readmap(); delete []mapfile;
   if (nucell == 1) nasr = MIN(1,nasr);
 
   // get the mass matrix for dynamic matrix
   getmass();
 
   // create FFT and allocate memory for FFT
   // here the parallization is done on the x direction only
   nxlo = 0;
   int *nx_loc = new int [nprocs];
   for (int i = 0; i < nprocs; ++i){
     nx_loc[i] = nx / nprocs;
     if (i < nx%nprocs) ++nx_loc[i];
   }
   for (int i = 0; i < me; ++i) nxlo += nx_loc[i];
   nxhi  = nxlo + nx_loc[me] - 1;
   mynpt = nx_loc[me] * ny * nz;
   mynq  = mynpt;
 
   fft_dim   = nucell  * sysdim;
   fft_dim2  = fft_dim * fft_dim;
   fft_nsend = mynpt   * fft_dim;
 
   fft_cnts  = new int[nprocs];
   fft_disp  = new int[nprocs];
   fft_disp[0] = 0;
   for (int i = 0; i < nprocs; ++i) fft_cnts[i] = nx_loc[i] * ny * nz * fft_dim;
   for (int i = 1; i < nprocs; ++i) fft_disp[i] = fft_disp[i-1] + fft_cnts[i-1];
   delete []nx_loc;
 
   fft = new FFT3d(lmp,world,nz,ny,nx,0,nz-1,0,ny-1,nxlo,nxhi,0,nz-1,0,ny-1,nxlo,nxhi,0,0,&mysize,0);
   memory->create(fft_data, MAX(1,mynq)*2, "fix_phonon:fft_data");
 
   // allocate variables; MAX(1,... is used because NULL buffer will result in error for MPI
   memory->create(RIloc,ngroup,(sysdim+1),"fix_phonon:RIloc");
   memory->create(RIall,ngroup,(sysdim+1),"fix_phonon:RIall");
   memory->create(Rsort,ngroup, sysdim, "fix_phonon:Rsort");
                               
   memory->create(Rnow, MAX(1,mynpt),fft_dim,"fix_phonon:Rnow");
   memory->create(Rsum, MAX(1,mynpt),fft_dim,"fix_phonon:Rsum");
                               
   memory->create(basis,nucell, sysdim, "fix_phonon:basis");
 
   // because of hermit, only nearly half of q points are stored
   memory->create(Rqnow,MAX(1,mynq),fft_dim, "fix_phonon:Rqnow");
   memory->create(Rqsum,MAX(1,mynq),fft_dim2,"fix_phonon:Rqsum");
   memory->create(Phi_q,MAX(1,mynq),fft_dim2,"fix_phonon:Phi_q");
 
   // variable to collect all local Phi to root
   if (me == 0) memory->create(Phi_all,ntotal,fft_dim2,"fix_phonon:Phi_all");
   else memory->create(Phi_all,1,1,"fix_phonon:Phi_all");
 
   // output some information on the system to log file
   if (me == 0){
     flog = fopen(logfile, "w");
     if (flog == NULL) {
       char str[MAXLINE];
       sprintf(str,"Can not open output file %s",logfile);
       error->one(FLERR,str);
     }
     fprintf(flog,"############################################################\n");
     fprintf(flog,"# group name of the atoms under study      : %s\n", group->names[igroup]);
     fprintf(flog,"# total number of atoms in the group       : %d\n", ngroup);
     fprintf(flog,"# dimension of the system                  : %d D\n", sysdim);
     fprintf(flog,"# number of atoms per unit cell            : %d\n", nucell);
     fprintf(flog,"# dimension of the FFT mesh                : %d x %d x %d\n", nx, ny, nz);
     fprintf(flog,"# number of wait steps before measurement  : " BIGINT_FORMAT "\n", waitsteps);
     fprintf(flog,"# frequency of the measurement             : %d\n", nevery);
     fprintf(flog,"# output result after this many measurement: %d\n", nfreq);
     fprintf(flog,"# number of processors used by this run    : %d\n", nprocs);
     fprintf(flog,"############################################################\n");
     fprintf(flog,"# mapping information between lattice indices and atom id\n");
     fprintf(flog,"# nx ny nz nucell\n");
     fprintf(flog,"%d %d %d %d\n", nx, ny, nz, nucell);
     fprintf(flog,"# l1 l2 l3 k atom_id\n");
     int ix, iy, iz, iu;
     for (idx = 0; idx < ngroup; ++idx){
       itag = surf2tag[idx];
       iu   = idx%nucell;
       iz   = (idx/nucell)%nz;
       iy   = (idx/(nucell*nz))%ny;
       ix   = (idx/(nucell*nz*ny))%nx;
       fprintf(flog,"%d %d %d %d " TAGINT_FORMAT "\n", ix, iy, iz, iu, itag);
     }
     fprintf(flog,"############################################################\n");
     fflush(flog);
   }
   surf2tag.clear();
  
   // default temperature is from thermo
   TempSum = new double[sysdim];
   id_temp = new char[12];
   strcpy(id_temp,"thermo_temp");
   int icompute = modify->find_compute(id_temp);
   temperature = modify->compute[icompute];
   inv_nTemp = 1./group->count(temperature->igroup);
 
 } // end of constructor
 
 /* ---------------------------------------------------------------------- */
 
 void FixPhonon::post_run()
 {
   // compute and output final results
   if (ifreq > 0 && ifreq != nfreq) postprocess();
   if (me == 0) fclose(flog);
 }
 
 /* ---------------------------------------------------------------------- */
 
 FixPhonon::~FixPhonon()
 {
   // delete locally stored array
   memory->destroy(RIloc);
   memory->destroy(RIall);
   memory->destroy(Rsort);
   memory->destroy(Rnow);
   memory->destroy(Rsum);
 
   memory->destroy(basis);
 
   memory->destroy(Rqnow);
   memory->destroy(Rqsum);
   memory->destroy(Phi_q);
   memory->destroy(Phi_all);
 
   delete []recvcnts;
   delete []displs;
   delete []prefix;
   delete []logfile;
   delete []fft_cnts;
   delete []fft_disp;
   delete []id_temp;
   delete []TempSum;
   delete []M_inv_sqrt;
   delete []basetype;
 
   // destroy FFT
   delete fft;
   memory->sfree(fft_data);
   
   // clear map info
   tag2surf.clear();
   surf2tag.clear();
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixPhonon::setmask()
 {
   int mask = 0;
   mask |= END_OF_STEP;
 
 return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixPhonon::init()
 {
   // warn if more than one fix-phonon
   int count = 0;
   for (int i = 0; i < modify->nfix; ++i) if (strcmp(modify->fix[i]->style,"phonon") == 0) ++count;
   if (count > 1 && me == 0) error->warning(FLERR,"More than one fix phonon defined"); // just warn, but allowed.
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixPhonon::setup(int flag)
 {
   // initialize accumulating variables
   for (int i = 0; i < sysdim; ++i) TempSum[i] = 0.;
 
   for (int i = 0; i < mynpt; ++i)
   for (int j = 0; j < fft_dim;  ++j) Rsum[i][j] = 0.;
 
   for (int i =0; i < mynq; ++i)
   for (int j =0; j < fft_dim2; ++j) Rqsum[i][j] = std::complex<double> (0.,0.);
 
   for (int i = 0; i < 6; ++i) hsum[i] = 0.;
 
   for (int i = 0; i < nucell; ++i)
   for (int j = 0; j < sysdim; ++j) basis[i][j] = 0.;
 
   neval = ifreq = 0;
   prev_nstep = update->ntimestep;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixPhonon::end_of_step()
 {
   if ( (update->ntimestep-prev_nstep) <= waitsteps) return;
 
   double **x = atom->x;
   int *mask = atom->mask;
   tagint *tag = atom->tag;
   imageint *image = atom->image;
   int nlocal = atom->nlocal;
 
   double *h = domain->h;
 
   int i,idim,jdim,ndim;
   double xcur[3];
 
   // to get the current temperature
   if (!(temperature->invoked_flag & INVOKED_VECTOR)) temperature->compute_vector();
   for (idim = 0; idim < sysdim; ++idim) TempSum[idim] += temperature->vector[idim];
 
   // evaluate R(r) on local proc
   nfind = 0;
   for (i = 0; i < nlocal; ++i){
     if (mask[i] & groupbit){
       itag = tag[i];
       idx  = tag2surf[itag];
 
       domain->unmap(x[i], image[i], xcur);
         
       for (idim = 0; idim < sysdim; ++idim) RIloc[nfind][idim] = xcur[idim];
       RIloc[nfind++][sysdim] = static_cast<double>(idx);
     }
   }
 
   // gather R(r) on local proc, then sort and redistribute to all procs for FFT
   nfind *= (sysdim+1);
   displs[0] = 0;
   for (i = 0; i < nprocs; ++i) recvcnts[i] = 0;
   MPI_Gather(&nfind,1,MPI_INT,recvcnts,1,MPI_INT,0,world);
   for (i = 1; i < nprocs; ++i) displs[i] = displs[i-1] + recvcnts[i-1];
 
   MPI_Gatherv(RIloc[0],nfind,MPI_DOUBLE,RIall[0],recvcnts,displs,MPI_DOUBLE,0,world);
   if (me == 0){
     for (i = 0; i < ngroup; ++i){
       idx = static_cast<int>(RIall[i][sysdim]);
       for (idim = 0; idim < sysdim; ++idim) Rsort[idx][idim] = RIall[i][idim];
     }
   }
   MPI_Scatterv(Rsort[0],fft_cnts,fft_disp, MPI_DOUBLE, Rnow[0], fft_nsend, MPI_DOUBLE,0,world);
 
   // get Rsum
   for (idx = 0; idx < mynpt; ++idx)
   for (idim = 0; idim < fft_dim; ++idim) Rsum[idx][idim] += Rnow[idx][idim];
 
   // FFT R(r) to get R(q)
   for (idim = 0; idim < fft_dim; ++idim){
     int m = 0;
     for (idx = 0; idx < mynpt; ++idx){
       fft_data[m++] = static_cast<FFT_SCALAR>(Rnow[idx][idim]);
       fft_data[m++] = static_cast<FFT_SCALAR>(0.);
     }
 
     fft->compute(fft_data, fft_data, -1);
 
     m = 0;
     for (idq = 0; idq < mynq; ++idq){
       Rqnow[idq][idim] = std::complex<double>(static_cast<double>(fft_data[m]), static_cast<double>(fft_data[m+1]));
       m += 2;
     }
   }
 
   // to get sum(R(q).R(q)*)
   for (idq = 0; idq < mynq; ++idq){
     ndim = 0;
     for (idim = 0; idim < fft_dim; ++idim)
     for (jdim = 0; jdim < fft_dim; ++jdim) Rqsum[idq][ndim++] += Rqnow[idq][idim] * std::conj(Rqnow[idq][jdim]);
   }
 
   // get basis info
   if (fft_dim > sysdim){
     double dist2orig[3];
     for (idx = 0; idx < mynpt; ++idx){
       ndim = sysdim;
       for (i = 1; i < nucell; ++i){
         for (idim = 0; idim < sysdim; ++idim) dist2orig[idim] = Rnow[idx][ndim++] - Rnow[idx][idim];
         domain->minimum_image(dist2orig);
         for (idim = 0; idim < sysdim; ++idim) basis[i][idim] += dist2orig[idim];
       }
     }
   }
   // get lattice vector info
   for (int i = 0; i < 6; ++i) hsum[i] += h[i];
 
   // increment counter
   ++neval;
 
   // compute and output Phi_q after every nfreq evaluations
   if (++ifreq == nfreq) postprocess();
 
 }   // end of end_of_step()
 
 /* ---------------------------------------------------------------------- */
 
 double FixPhonon::memory_usage()
 {
   double bytes = sizeof(double)*2*mynq
                + sizeof(std::map<int,int>)*2*ngroup
                + sizeof(double)*(ngroup*(3*sysdim+2)+mynpt*fft_dim*2)
                + sizeof(std::complex<double>)*MAX(1,mynq)*fft_dim *(1+2*fft_dim)
                + sizeof(std::complex<double>)*ntotal*fft_dim2
                + sizeof(int) * nprocs * 4;
   return bytes;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixPhonon::modify_param(int narg, char **arg)
 {
   if (strcmp(arg[0],"temp") == 0) {
     if (narg < 2) error->all(FLERR,"Illegal fix_modify command");
     delete [] id_temp;
     int n = strlen(arg[1]) + 1;
     id_temp = new char[n];
     strcpy(id_temp,arg[1]);
 
     int icompute = modify->find_compute(id_temp);
     if (icompute < 0) error->all(FLERR,"Could not find fix_modify temp ID");
     temperature = modify->compute[icompute];
 
     if (temperature->tempflag == 0)
       error->all(FLERR,"Fix_modify temp ID does not compute temperature");
     inv_nTemp = 1.0/group->count(temperature->igroup);
 
     return 2;
   }
   return 0;
 }
 
 /* ----------------------------------------------------------------------
  * private method, to get the mass matrix for dynamic matrix
  * --------------------------------------------------------------------*/
 void FixPhonon::getmass()
 {
   int nlocal = atom->nlocal;
   int *mask  = atom->mask;
   tagint *tag   = atom->tag;
   int *type  = atom->type;
   double *rmass = atom->rmass;
   double *mass = atom->mass;
   double *mass_one, *mass_all;
   double *type_one, *type_all;
 
   mass_one = new double[nucell];
   mass_all = new double[nucell];
   type_one = new double[nucell];
   type_all = new double[nucell];
   for (int i = 0; i < nucell; ++i)  mass_one[i] = type_one[i] = 0.;
 
   if (rmass){
     for (int i = 0; i < nlocal; ++i){
       if (mask[i] & groupbit){
         itag = tag[i];
         idx  = tag2surf[itag];
         int iu = idx%nucell;
         mass_one[iu] += rmass[i];
         type_one[iu] += double(type[i]);
       }
     }
   } else {
     for (int i = 0; i < nlocal; ++i){
       if (mask[i] & groupbit){
         itag = tag[i];
         idx  = tag2surf[itag];
         int iu = idx%nucell;
         mass_one[iu] += mass[type[i]];
         type_one[iu] += double(type[i]);
       }
     }
   }
 
   MPI_Allreduce(mass_one,mass_all,nucell,MPI_DOUBLE,MPI_SUM,world);
   MPI_Allreduce(type_one,type_all,nucell,MPI_DOUBLE,MPI_SUM,world);
 
   M_inv_sqrt = new double[nucell];
   basetype   = new int[nucell];
 
   double inv_total = 1./double(ntotal);
   for (int i = 0; i < nucell; ++i){
     mass_all[i] *= inv_total;
     M_inv_sqrt[i] = sqrt(1./mass_all[i]);
 
     basetype[i] = int(type_all[i]*inv_total);
   }
   delete []mass_one;
   delete []mass_all;
   delete []type_one;
   delete []type_all;
 }
 
 
 /* ----------------------------------------------------------------------
  * private method, to read the mapping info from file
  * --------------------------------------------------------------------*/
 
 void FixPhonon::readmap()
 {
   int info = 0;
 
   // auto-generate mapfile for "cluster" (gamma only system)
   if (strcmp(mapfile, "GAMMA") == 0){
     nx = ny = nz = ntotal = 1;
     nucell = ngroup;
 
     tagint *tag_loc, *tag_all;
     memory->create(tag_loc,ngroup,"fix_phonon:tag_loc");
     memory->create(tag_all,ngroup,"fix_phonon:tag_all");
 
     // get atom IDs on local proc
     int nfind = 0;
     for (int i = 0; i < atom->nlocal; ++i){
       if (atom->mask[i] & groupbit) tag_loc[nfind++] = atom->tag[i];
     }
    
     // gather IDs on local proc
     displs[0] = 0;
     for (int i = 0; i < nprocs; ++i) recvcnts[i] = 0;
     MPI_Allgather(&nfind,1,MPI_INT,recvcnts,1,MPI_INT,world);
     for (int i = 1; i < nprocs; ++i) displs[i] = displs[i-1] + recvcnts[i-1];
    
     MPI_Allgatherv(tag_loc,nfind,MPI_LMP_TAGINT,tag_all,recvcnts,displs,MPI_LMP_TAGINT,world);
     for (int i = 0; i < ngroup; ++i){
       itag = tag_all[i];
       tag2surf[itag] = i;
       surf2tag[i] = itag;
     }
 
     memory->destroy(tag_loc);
     memory->destroy(tag_all);
     return;
   }
 
   // read from map file for others
   char line[MAXLINE];
   FILE *fp = fopen(mapfile, "r");
   if (fp == NULL){
     sprintf(line,"Cannot open input map file %s", mapfile);
     error->all(FLERR,line);
   }
 
   if (fgets(line,MAXLINE,fp) == NULL) 
     error->all(FLERR,"Error while reading header of mapping file!");
   nx     = force->inumeric(FLERR, strtok(line, " \n\t\r\f"));
   ny     = force->inumeric(FLERR, strtok(NULL, " \n\t\r\f"));
   nz     = force->inumeric(FLERR, strtok(NULL, " \n\t\r\f"));
   nucell = force->inumeric(FLERR, strtok(NULL, " \n\t\r\f"));
   ntotal = nx*ny*nz;
   if (ntotal*nucell != ngroup) 
     error->all(FLERR,"FFT mesh and number of atoms in group mismatch!");
   
   // second line of mapfile is comment
   if (fgets(line,MAXLINE,fp) == NULL) 
     error->all(FLERR,"Error while reading comment of mapping file!");
 
   int ix, iy, iz, iu;
   // the remaining lines carry the mapping info
   for (int i = 0; i < ngroup; ++i){
     if (fgets(line,MAXLINE,fp) == NULL) {info = 1; break;} 
     ix   = force->inumeric(FLERR, strtok(line, " \n\t\r\f"));
     iy   = force->inumeric(FLERR, strtok(NULL, " \n\t\r\f"));
     iz   = force->inumeric(FLERR, strtok(NULL, " \n\t\r\f"));
     iu   = force->inumeric(FLERR, strtok(NULL, " \n\t\r\f"));
     itag = force->inumeric(FLERR, strtok(NULL, " \n\t\r\f"));
 
     // check if index is in correct range
     if (ix < 0 || ix >= nx || iy < 0 || iy >= ny || 
         iz < 0 || iz >= nz || iu < 0 || iu >= nucell) {info = 2; break;}
     // 1 <= itag <= natoms
     if (itag < 1 || itag > static_cast<tagint>(atom->natoms)) {info = 3; break;}
     idx = ((ix*ny+iy)*nz+iz)*nucell + iu;
     tag2surf[itag] = idx;
     surf2tag[idx]  = itag;
   }
   fclose(fp);
 
   if (tag2surf.size() != surf2tag.size() || 
       tag2surf.size() != static_cast<std::size_t>(ngroup) )
     error->all(FLERR,"The mapping is incomplete!");
   if (info) error->all(FLERR,"Error while reading mapping file!");
   
   // check the correctness of mapping
   int *mask  = atom->mask;
   tagint *tag   = atom->tag;
   int nlocal = atom->nlocal;
 
   for (int i = 0; i < nlocal; ++i) {
     if (mask[i] & groupbit){
       itag = tag[i];
       idx  = tag2surf[itag];
       if (itag != surf2tag[idx]) 
         error->one(FLERR,"The mapping info read is incorrect!");
     }
   }
 }
 
 /* ----------------------------------------------------------------------
  * private method, to output the force constant matrix
  * --------------------------------------------------------------------*/
 void FixPhonon::postprocess( )
 {
   if (neval < 1) return;
 
   ifreq = 0;
   int idim, jdim, ndim;
   double inv_neval = 1. /double(neval);
 
   // to get <Rq.Rq*>
   for (idq = 0; idq < mynq; ++idq)
   for (idim = 0; idim < fft_dim2; ++idim) Phi_q[idq][idim] = Rqsum[idq][idim] * inv_neval;
 
   // to get <R>
   for (idx = 0; idx < mynpt; ++idx)
   for (idim = 0; idim < fft_dim; ++idim) Rnow[idx][idim] = Rsum[idx][idim] * inv_neval;
 
   // to get <R>q
   for (idim = 0; idim < fft_dim; ++idim){
     int m = 0;
     for (idx = 0; idx < mynpt; ++idx){
       fft_data[m++] = static_cast<FFT_SCALAR>(Rnow[idx][idim]);
       fft_data[m++] = static_cast<FFT_SCALAR>(0.);
     }
 
     fft->compute(fft_data,fft_data,-1);
 
     m = 0;
     for (idq = 0; idq < mynq; ++idq){
       Rqnow[idq][idim]  = std::complex<double>(static_cast<double>(fft_data[m]), static_cast<double>(fft_data[m+1]));
       m += 2;
     }
   }
 
   // to get G(q) = <Rq.Rq*> - <R>q.<R*>q
   for (idq = 0; idq < mynq; ++idq){
     ndim = 0;
     for (idim = 0; idim < fft_dim; ++idim)
     for (jdim = 0; jdim < fft_dim; ++jdim) Phi_q[idq][ndim++] -= Rqnow[idq][idim] * std::conj(Rqnow[idq][jdim]);
   }
 
   // to get Phi = KT.G^-1; normalization of FFTW data is done here
   double boltz = force->boltz, kbtsqrt[sysdim], TempAve = 0.;
   double TempFac = inv_neval * inv_nTemp;
   double NormFac = TempFac * double(ntotal);
 
   for (idim = 0; idim < sysdim; ++idim){
     kbtsqrt[idim] = sqrt(TempSum[idim] * NormFac);
     TempAve += TempSum[idim] * TempFac;
   }
   TempAve /= sysdim*boltz;
   
   for (idq = 0; idq < mynq; ++idq){
     GaussJordan(fft_dim, Phi_q[idq]);
     ndim =0;
     for (idim = 0; idim < fft_dim; ++idim)
     for (jdim = 0; jdim < fft_dim; ++jdim) Phi_q[idq][ndim++] *= kbtsqrt[idim%sysdim]*kbtsqrt[jdim%sysdim];
   }
 
   // to collect all local Phi_q to root
   displs[0]=0;
   for (int i = 0; i < nprocs; ++i) recvcnts[i] = fft_cnts[i]*fft_dim*2;
   for (int i = 1; i < nprocs; ++i) displs[i] = displs[i-1] + recvcnts[i-1];
   MPI_Gatherv(Phi_q[0],mynq*fft_dim2*2,MPI_DOUBLE,Phi_all[0],recvcnts,displs,MPI_DOUBLE,0,world);
   
   // to collect all basis info and averaged it on root
   double basis_root[fft_dim];
   if (fft_dim > sysdim) MPI_Reduce(&basis[1][0], &basis_root[sysdim], fft_dim-sysdim, MPI_DOUBLE, MPI_SUM, 0, world);
 
   if (me == 0){ // output dynamic matrix by root
 
     // get basis info
     for (idim = 0;      idim < sysdim;  ++idim) basis_root[idim]  = 0.;
     for (idim = sysdim; idim < fft_dim; ++idim) basis_root[idim] /= double(ntotal)*double(neval);
     // get unit cell base vector info; might be incorrect if MD pbc and FixPhonon pbc mismatch.
     double basevec[9];
     basevec[1] = basevec[2] = basevec[5] = 0.;
     basevec[0] = hsum[0] * inv_neval / double(nx);
     basevec[4] = hsum[1] * inv_neval / double(ny);
     basevec[8] = hsum[2] * inv_neval / double(nz);
     basevec[7] = hsum[3] * inv_neval / double(nz);
     basevec[6] = hsum[4] * inv_neval / double(nz);
     basevec[3] = hsum[5] * inv_neval / double(ny);
     
     // write binary file, in fact, it is the force constants matrix that is written
     // Enforcement of ASR and the conversion of dynamical matrix is done in the postprocessing code
     char fname[MAXLINE];
     sprintf(fname,"%s.bin." BIGINT_FORMAT,prefix,update->ntimestep);
     FILE *fp_bin = fopen(fname,"wb");
 
     fwrite(&sysdim, sizeof(int),    1, fp_bin);
     fwrite(&nx,     sizeof(int),    1, fp_bin);
     fwrite(&ny,     sizeof(int),    1, fp_bin);
     fwrite(&nz,     sizeof(int),    1, fp_bin);
     fwrite(&nucell, sizeof(int),    1, fp_bin);
     fwrite(&boltz,  sizeof(double), 1, fp_bin);
 
     fwrite(Phi_all[0],sizeof(double),ntotal*fft_dim2*2,fp_bin);
 
     fwrite(&TempAve,      sizeof(double),1,      fp_bin);
     fwrite(&basevec[0],   sizeof(double),9,      fp_bin);
     fwrite(&basis_root[0],sizeof(double),fft_dim,fp_bin);
     fwrite(basetype,      sizeof(int),   nucell, fp_bin);
     fwrite(M_inv_sqrt,    sizeof(double),nucell, fp_bin);
 
     fclose(fp_bin);
 
     // write log file, here however, it is the dynamical matrix that is written
     fprintf(flog,"############################################################\n");
     fprintf(flog,"# Current time step                      : " BIGINT_FORMAT "\n", update->ntimestep);
     fprintf(flog,"# Total number of measurements           : %d\n", neval);
     fprintf(flog,"# Average temperature of the measurement : %lg\n", TempAve);
     fprintf(flog,"# Boltzmann constant under current units : %lg\n", boltz);
     fprintf(flog,"# basis vector A1 = [%lg %lg %lg]\n", basevec[0], basevec[1], basevec[2]);
     fprintf(flog,"# basis vector A2 = [%lg %lg %lg]\n", basevec[3], basevec[4], basevec[5]);
     fprintf(flog,"# basis vector A3 = [%lg %lg %lg]\n", basevec[6], basevec[7], basevec[8]);
     fprintf(flog,"############################################################\n");
     fprintf(flog,"# qx\t qy \t qz \t\t Phi(q)\n");
 
     EnforceASR();
 
     // to get D = 1/M x Phi
     for (idq = 0; idq < ntotal; ++idq){
       ndim =0;
       for (idim = 0; idim < fft_dim; ++idim)
       for (jdim = 0; jdim < fft_dim; ++jdim) Phi_all[idq][ndim++] *= M_inv_sqrt[idim/sysdim]*M_inv_sqrt[jdim/sysdim];
     }
 
     idq =0;
     for (int ix = 0; ix < nx; ++ix){
       double qx = double(ix)/double(nx);
       for (int iy = 0; iy < ny; ++iy){
         double qy = double(iy)/double(ny);
         for (int iz = 0; iz < nz; ++iz){
           double qz = double(iz)/double(nz);
           fprintf(flog,"%lg %lg %lg", qx, qy, qz);
           for (idim = 0; idim < fft_dim2; ++idim)
             fprintf(flog, " %lg %lg", std::real(Phi_all[idq][idim]),
                                       std::imag(Phi_all[idq][idim]));
           fprintf(flog, "\n");
           ++idq;
         }
       }
     }
     fflush(flog);
   }
 
 }   // end of postprocess
 
 /* ----------------------------------------------------------------------
  * private method, to get the inverse of a complex matrix by means of
  * Gaussian-Jordan Elimination with full pivoting; square matrix required.
  *
  * Adapted from the Numerical Recipes in Fortran.
  * --------------------------------------------------------------------*/
 void FixPhonon::GaussJordan(int n, std::complex<double> *Mat)
 {
   int i,icol,irow,j,k,l,ll,idr,idc;
   int *indxc,*indxr,*ipiv;
   double big, nmjk;
   std::complex<double> dum, pivinv;
 
   indxc = new int[n];
   indxr = new int[n];
   ipiv  = new int[n];
 
   for (i = 0; i < n; ++i) ipiv[i] = 0;
   for (i = 0; i < n; ++i){
     big = 0.;
     for (j = 0; j < n; ++j){
       if (ipiv[j] != 1){
         for (k = 0; k < n; ++k){
           if (ipiv[k] == 0){
             idr = j*n+k;
             nmjk = norm(Mat[idr]);
             if (nmjk >= big){
               big  = nmjk;
               irow = j;
               icol = k;
             }
           } else if (ipiv[k] > 1) error->one(FLERR,"Singular matrix in complex GaussJordan!");
         }
       }
     }
     ipiv[icol] += 1;
     if (irow != icol){
       for (l = 0; l < n; ++l){
         idr  = irow*n+l;
         idc  = icol*n+l;
         dum  = Mat[idr];
         Mat[idr] = Mat[idc];
         Mat[idc] = dum;
       }
     }
     indxr[i] = irow;
     indxc[i] = icol;
     idr = icol*n+icol;
     if (Mat[idr] == std::complex<double>(0.,0.)) error->one(FLERR,"Singular matrix in complex GaussJordan!");
     
     pivinv = 1./ Mat[idr];
     Mat[idr] = std::complex<double>(1.,0.);
     idr = icol*n;
     for (l = 0; l < n; ++l) Mat[idr+l] *= pivinv;
     for (ll = 0; ll < n; ++ll){
       if (ll != icol){
         idc = ll*n + icol;
         dum = Mat[idc];
         Mat[idc] = 0.;
         idc -= icol;
         for (l = 0; l < n; ++l) Mat[idc+l] -= Mat[idr+l]*dum;
       }
     }
   }
 
   for (l = n-1; l >= 0; --l){
     int rl = indxr[l];
     int cl = indxc[l];
     if (rl != cl){
       for (k = 0; k < n; ++k){
         idr = k*n + rl;
         idc = k*n + cl;
         dum = Mat[idr];
         Mat[idr] = Mat[idc];
         Mat[idc] = dum;
       }
     }
   }
   delete []indxr;
   delete []indxc;
   delete []ipiv;
 }
 
 /* ----------------------------------------------------------------------
  * private method, to apply the acoustic sum rule on force constant matrix
  * at gamma point. Should be executed on root only.
  * --------------------------------------------------------------------*/
 void FixPhonon::EnforceASR()
 {
   if (nasr < 1) return;
 
   for (int iit = 0; iit < nasr; ++iit){
     // simple ASR; the resultant matrix might not be symmetric
     for (int a = 0; a < sysdim; ++a)
     for (int b = 0; b < sysdim; ++b){
       for (int k = 0; k < nucell; ++k){
         double sum = 0.;
         for (int kp = 0; kp < nucell; ++kp){
           int idx = (k*sysdim+a)*fft_dim + kp*sysdim + b;
           sum += std::real(Phi_all[0][idx]);
         }
         sum /= double(nucell);
         for (int kp = 0; kp < nucell; ++kp){
           int idx = (k*sysdim+a)*fft_dim + kp*sysdim + b;
           Phi_all[0][idx] -= sum;
         }
       }
     }
    
     // symmetrize
     for (int k = 0; k < nucell; ++k)
     for (int kp = k; kp < nucell; ++kp){
       double csum = 0.;
       for (int a = 0; a < sysdim; ++a)
       for (int b = 0; b < sysdim; ++b){
         int idx = (k*sysdim+a)*fft_dim + kp*sysdim + b;
         int jdx = (kp*sysdim+b)*fft_dim + k*sysdim + a;
         csum = (std::real(Phi_all[0][idx])+std::real(Phi_all[0][jdx]))*0.5;
         Phi_all[0][idx] = std::complex<double>(csum, std::imag(Phi_all[0][idx]));
         Phi_all[0][jdx] = std::complex<double>(csum, std::imag(Phi_all[0][jdx]));
       }
     }
   }
 
   // symmetric ASR
   for (int a = 0; a < sysdim; ++a)
   for (int b = 0; b < sysdim; ++b){
     for (int k = 0; k < nucell; ++k){
       double sum = 0.;
       for (int kp = 0; kp < nucell; ++kp){
         int idx = (k*sysdim+a)*fft_dim + kp*sysdim + b;
         sum += std::real(Phi_all[0][idx]);
       }
       sum /= double(nucell-k);
       for (int kp = k; kp < nucell; ++kp){
         int idx = (k*sysdim+a)*fft_dim + kp*sysdim + b;
         int jdx = (kp*sysdim+b)*fft_dim + k*sysdim + a;
         Phi_all[0][idx] -= sum;
         Phi_all[0][jdx] = std::complex<double>(std::real(Phi_all[0][idx]),
                                                std::imag(Phi_all[0][jdx]));
       }
     }
   }
 }
 /* --------------------------------------------------------------------*/
diff --git a/src/USER-SMD/fix_smd_wall_surface.cpp b/src/USER-SMD/fix_smd_wall_surface.cpp
index 082723426..94cac6e20 100644
--- a/src/USER-SMD/fix_smd_wall_surface.cpp
+++ b/src/USER-SMD/fix_smd_wall_surface.cpp
@@ -1,509 +1,509 @@
 /* ----------------------------------------------------------------------
  LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
  http://lammps.sandia.gov, Sandia National Laboratories
  Steve Plimpton, sjplimp@sandia.gov
 
  Copyright (2003) Sandia Corporation.  Under the terms of Contract
  DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
  certain rights in this software.  This software is distributed under
  the GNU General Public License.
 
  See the README file in the top-level LAMMPS directory.
  ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
- Contributing authors: Mike Parks (SNL), Ezwanur Rahman, J.T. Foster (UTSA)
- ------------------------------------------------------------------------- */
+   Contributing authors: Mike Parks (SNL), Ezwanur Rahman, J.T. Foster (UTSA)
+------------------------------------------------------------------------- */
 
 #include <math.h>
 #include "fix_smd_wall_surface.h"
 #include "atom.h"
 #include "domain.h"
 #include "force.h"
 #include "comm.h"
 #include "update.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "pair.h"
 #include "lattice.h"
 #include "memory.h"
 #include "error.h"
 #include <Eigen/Eigen>
 #include <stdio.h>
 #include "atom_vec.h"
 #include <string.h>
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace Eigen;
 using namespace std;
 #define DELTA 16384
 #define EPSILON 1.0e-6
 enum {
 	LAYOUT_UNIFORM, LAYOUT_NONUNIFORM, LAYOUT_TILED
 };
 // several files
 
 /* ---------------------------------------------------------------------- */
 
 FixSMDWallSurface::FixSMDWallSurface(LAMMPS *lmp, int narg, char **arg) :
 		Fix(lmp, narg, arg) {
 
 	restart_global = 0;
 	restart_peratom = 0;
 	first = 1;
 
 	//atom->add_callback(0);
 	//atom->add_callback(1);
 
 	if (narg != 6)
 		error->all(FLERR, "Illegal number of arguments for fix smd/wall_surface");
 
 	filename = strdup(arg[3]);
 	wall_particle_type = force->inumeric(FLERR, arg[4]);
 	wall_molecule_id = force->inumeric(FLERR, arg[5]);
 	if (wall_molecule_id < 65535) {
 		error->one(FLERR, "wall molcule id must be >= 65535\n");
 	}
 
 	if (comm->me == 0) {
 		printf("\n>>========>>========>>========>>========>>========>>========>>========>>========\n");
 		printf("fix smd/wall_surface reads trianglulated surface from file: %s\n", filename);
 		printf("fix smd/wall_surface has particle type %d \n", wall_particle_type);
 		printf("fix smd/wall_surface has molecule id %d \n", wall_molecule_id);
 		printf(">>========>>========>>========>>========>>========>>========>>========>>========\n");
 	}
 }
 
 /* ---------------------------------------------------------------------- */
 
 FixSMDWallSurface::~FixSMDWallSurface() {
         free(filename);
         filename = NULL;
 	// unregister this fix so atom class doesn't invoke it any more
 
 	//atom->delete_callback(id, 0);
 	//atom->delete_callback(id, 1);
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixSMDWallSurface::setmask() {
 	int mask = 0;
 	return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixSMDWallSurface::init() {
 	if (!first)
 		return;
 }
 
 /* ----------------------------------------------------------------------
  For minimization: setup as with dynamics
  ------------------------------------------------------------------------- */
 
 void FixSMDWallSurface::min_setup(int vflag) {
 	setup(vflag);
 }
 
 /* ----------------------------------------------------------------------
  create initial list of neighbor partners via call to neighbor->build()
  must be done in setup (not init) since fix init comes before neigh init
  ------------------------------------------------------------------------- */
 
 void FixSMDWallSurface::setup(int vflag) {
 
 	if (!first)
 		return;
 	first = 0;
 
 	// set bounds for my proc
 	// if periodic and I am lo/hi proc, adjust bounds by EPSILON
 	// insures all data atoms will be owned even with round-off
 
 	int triclinic = domain->triclinic;
 
 	double epsilon[3];
 	if (triclinic)
 		epsilon[0] = epsilon[1] = epsilon[2] = EPSILON;
 	else {
 		epsilon[0] = domain->prd[0] * EPSILON;
 		epsilon[1] = domain->prd[1] * EPSILON;
 		epsilon[2] = domain->prd[2] * EPSILON;
 	}
 
 	if (triclinic == 0) {
 		sublo[0] = domain->sublo[0];
 		subhi[0] = domain->subhi[0];
 		sublo[1] = domain->sublo[1];
 		subhi[1] = domain->subhi[1];
 		sublo[2] = domain->sublo[2];
 		subhi[2] = domain->subhi[2];
 	} else {
 		sublo[0] = domain->sublo_lamda[0];
 		subhi[0] = domain->subhi_lamda[0];
 		sublo[1] = domain->sublo_lamda[1];
 		subhi[1] = domain->subhi_lamda[1];
 		sublo[2] = domain->sublo_lamda[2];
 		subhi[2] = domain->subhi_lamda[2];
 	}
 
 	if (comm->layout != LAYOUT_TILED) {
 		if (domain->xperiodic) {
 			if (comm->myloc[0] == 0)
 				sublo[0] -= epsilon[0];
 			if (comm->myloc[0] == comm->procgrid[0] - 1)
 				subhi[0] += epsilon[0];
 		}
 		if (domain->yperiodic) {
 			if (comm->myloc[1] == 0)
 				sublo[1] -= epsilon[1];
 			if (comm->myloc[1] == comm->procgrid[1] - 1)
 				subhi[1] += epsilon[1];
 		}
 		if (domain->zperiodic) {
 			if (comm->myloc[2] == 0)
 				sublo[2] -= epsilon[2];
 			if (comm->myloc[2] == comm->procgrid[2] - 1)
 				subhi[2] += epsilon[2];
 		}
 
 	} else {
 		if (domain->xperiodic) {
 			if (comm->mysplit[0][0] == 0.0)
 				sublo[0] -= epsilon[0];
 			if (comm->mysplit[0][1] == 1.0)
 				subhi[0] += epsilon[0];
 		}
 		if (domain->yperiodic) {
 			if (comm->mysplit[1][0] == 0.0)
 				sublo[1] -= epsilon[1];
 			if (comm->mysplit[1][1] == 1.0)
 				subhi[1] += epsilon[1];
 		}
 		if (domain->zperiodic) {
 			if (comm->mysplit[2][0] == 0.0)
 				sublo[2] -= epsilon[2];
 			if (comm->mysplit[2][1] == 1.0)
 				subhi[2] += epsilon[2];
 		}
 	}
 
 	read_triangles(0);
 }
 
 /* ----------------------------------------------------------------------
  function to determine number of values in a text line
  ------------------------------------------------------------------------- */
 
 int FixSMDWallSurface::count_words(const char *line) {
 	int n = strlen(line) + 1;
 	char *copy;
 	memory->create(copy, n, "atom:copy");
 	strcpy(copy, line);
 
 	char *ptr;
 	if ((ptr = strchr(copy, '#')))
 		*ptr = '\0';
 
 	if (strtok(copy, " \t\n\r\f") == NULL) {
 		memory->destroy(copy);
 		return 0;
 	}
 	n = 1;
 	while (strtok(NULL, " \t\n\r\f"))
 		n++;
 
 	memory->destroy(copy);
 	return n;
 }
 
 /* ----------------------------------------------------------------------
  size of atom nlocal's restart data
  ------------------------------------------------------------------------- */
 
 void FixSMDWallSurface::read_triangles(int pass) {
 
 	double coord[3];
 
 	int nlocal_previous = atom->nlocal;
 	int ilocal = nlocal_previous;
 	int m;
 	int me;
 
 	bigint natoms_previous = atom->natoms;
 	Vector3d *vert;
 	vert = new Vector3d[3];
 	Vector3d normal, center;
 
 	FILE *fp = fopen(filename, "r");
 	if (fp == NULL) {
 		char str[128];
 		sprintf(str, "Cannot open file %s", filename);
 		error->one(FLERR, str);
 	}
 
 	MPI_Comm_rank(world, &me);
 	if (me == 0) {
 		if (screen) {
 			if (pass == 0) {
 				printf("\n>>========>>========>>========>>========>>========>>========>>========>>========\n");
 				fprintf(screen, "  scanning triangle pairs ...\n");
 			} else {
 				fprintf(screen, "  reading triangle pairs ...\n");
 			}
 		}
 		if (logfile) {
 			if (pass == 0) {
 				fprintf(logfile, "  scanning triangle pairs ...\n");
 			} else {
 				fprintf(logfile, "  reading triangle pairs ...\n");
 			}
 		}
 	}
 
 	char str[128];
 	char line[256];
 	char *retpointer;
 	char **values;
 	int nwords;
 
 	// read STL solid name
 	retpointer = fgets(line, sizeof(line), fp);
 	if (retpointer == NULL) {
 		sprintf(str, "error reading number of triangle pairs");
 		error->one(FLERR, str);
 	}
 
 	nwords = count_words(line);
 	if (nwords < 1) {
 		sprintf(str, "first line of file is incorrect");
 		error->one(FLERR, str);
 	}
 
 //	values = new char*[nwords];
 //	values[0] = strtok(line, " \t\n\r\f");
 //	if (values[0] == NULL)
 //		error->all(FLERR, "Incorrect atom format in data file");
 //	for (m = 1; m < nwords; m++) {
 //		values[m] = strtok(NULL, " \t\n\r\f");
 //		if (values[m] == NULL)
 //			error->all(FLERR, "Incorrect atom format in data file");
 //	}
 //	delete[] values;
 //
 //	if (comm->me == 0) {
 //		cout << "STL file contains solid body with name: " << values[1] << endl;
 //	}
 
 	// iterate over STL facets util end of body is reached
 
 	while (fgets(line, sizeof(line), fp)) { // read a line, should be the facet line
 
 		// evaluate facet line
 		nwords = count_words(line);
 		if (nwords != 5) {
 			//sprintf(str, "found end solid line");
 			//error->message(FLERR, str);
 			break;
 		} else {
 			// should be facet line
 		}
 
 		values = new char*[nwords];
 		values[0] = strtok(line, " \t\n\r\f");
 		if (values[0] == NULL)
 			error->all(FLERR, "Incorrect atom format in data file");
 		for (m = 1; m < nwords; m++) {
 			values[m] = strtok(NULL, " \t\n\r\f");
 			if (values[m] == NULL)
 				error->all(FLERR, "Incorrect atom format in data file");
 		}
 
 		normal << force->numeric(FLERR, values[2]), force->numeric(FLERR, values[3]), force->numeric(FLERR, values[4]);
 		//cout << "normal is " << normal << endl;
 
 		delete[] values;
 
 		// read outer loop line
 		retpointer = fgets(line, sizeof(line), fp);
 		if (retpointer == NULL) {
 			sprintf(str, "error reading outer loop");
 			error->one(FLERR, str);
 		}
 
 		nwords = count_words(line);
 		if (nwords != 2) {
 			sprintf(str, "error reading outer loop");
 			error->one(FLERR, str);
 		}
 
 		// read vertex lines
 
 		for (int k = 0; k < 3; k++) {
 			retpointer = fgets(line, sizeof(line), fp);
 			if (retpointer == NULL) {
 				sprintf(str, "error reading vertex line");
 				error->one(FLERR, str);
 			}
 
 			nwords = count_words(line);
 			if (nwords != 4) {
 				sprintf(str, "error reading vertex line");
 				error->one(FLERR, str);
 			}
 
 			values = new char*[nwords];
 			values[0] = strtok(line, " \t\n\r\f");
 			if (values[0] == NULL)
 				error->all(FLERR, "Incorrect vertex line");
 			for (m = 1; m < nwords; m++) {
 				values[m] = strtok(NULL, " \t\n\r\f");
 				if (values[m] == NULL)
 					error->all(FLERR, "Incorrect vertex line");
 			}
 
 			vert[k] << force->numeric(FLERR, values[1]), force->numeric(FLERR, values[2]), force->numeric(FLERR, values[3]);
 			//cout << "vertex is " << vert[k] << endl;
 			//printf("%s %s %s\n", values[1], values[2], values[3]);
 			delete[] values;
 			//exit(1);
 
 		}
 
 		// read end loop line
 		retpointer = fgets(line, sizeof(line), fp);
 		if (retpointer == NULL) {
 			sprintf(str, "error reading endloop");
 			error->one(FLERR, str);
 		}
 
 		nwords = count_words(line);
 		if (nwords != 1) {
 			sprintf(str, "error reading endloop");
 			error->one(FLERR, str);
 		}
 
 		// read end facet line
 		retpointer = fgets(line, sizeof(line), fp);
 		if (retpointer == NULL) {
 			sprintf(str, "error reading endfacet");
 			error->one(FLERR, str);
 		}
 
 		nwords = count_words(line);
 		if (nwords != 1) {
 			sprintf(str, "error reading endfacet");
 			error->one(FLERR, str);
 		}
 
 		// now we have a normal and three vertices ... proceed with adding triangle
 
 		center = (vert[0] + vert[1] + vert[2]) / 3.0;
 
 		//	cout << "center is " << center << endl;
 
 		double r1 = (center - vert[0]).norm();
 		double r2 = (center - vert[1]).norm();
 		double r3 = (center - vert[2]).norm();
 		double r = MAX(r1, r2);
 		r = MAX(r, r3);
 
 		/*
 		 * if atom/molecule is in my subbox, create it
 		 * ... use x0 to hold triangle normal.
 		 * ... use smd_data_9 to hold the three vertices
 		 * ... use x to hold triangle center
 		 * ... radius is the mmaximal distance from triangle center to all vertices
 		 */
 
 		//	printf("coord: %f %f %f\n", coord[0], coord[1], coord[2]);
 		//	printf("sublo: %f %f %f\n", sublo[0], sublo[1], sublo[2]);
 		//	printf("subhi: %f %f %f\n", subhi[0], subhi[1], subhi[2]);
 		//printf("ilocal = %d\n", ilocal);
 		if (center(0) >= sublo[0] && center(0) < subhi[0] && center(1) >= sublo[1] && center(1) < subhi[1] && center(2) >= sublo[2]
 				&& center(2) < subhi[2]) {
 			//printf("******* KERATIN nlocal=%d ***\n", nlocal);
 			coord[0] = center(0);
 			coord[1] = center(1);
 			coord[2] = center(2);
 			atom->avec->create_atom(wall_particle_type, coord);
 
 			/*
 			 * need to initialize pointers to atom vec arrays here, because they could have changed
 			 * due to calling grow() in create_atoms() above;
 			 */
 
 			tagint *mol = atom->molecule;
 			int *type = atom->type;
 			double *radius = atom->radius;
 			double *contact_radius = atom->contact_radius;
 			double **smd_data_9 = atom->smd_data_9;
 			double **x0 = atom->x0;
 
 			radius[ilocal] = r; //ilocal;
 			contact_radius[ilocal] = r; //ilocal;
 			mol[ilocal] = wall_molecule_id;
 			type[ilocal] = wall_particle_type;
 			x0[ilocal][0] = normal(0);
 			x0[ilocal][1] = normal(1);
 			x0[ilocal][2] = normal(2);
 			smd_data_9[ilocal][0] = vert[0](0);
 			smd_data_9[ilocal][1] = vert[0](1);
 			smd_data_9[ilocal][2] = vert[0](2);
 			smd_data_9[ilocal][3] = vert[1](0);
 			smd_data_9[ilocal][4] = vert[1](1);
 			smd_data_9[ilocal][5] = vert[1](2);
 			smd_data_9[ilocal][6] = vert[2](0);
 			smd_data_9[ilocal][7] = vert[2](1);
 			smd_data_9[ilocal][8] = vert[2](2);
 
 			ilocal++;
 		}
 
 	}
 
 // set new total # of atoms and error check
 
 	bigint nblocal = atom->nlocal;
 	MPI_Allreduce(&nblocal, &atom->natoms, 1, MPI_LMP_BIGINT, MPI_SUM, world);
 	if (atom->natoms < 0 || atom->natoms >= MAXBIGINT)
 		error->all(FLERR, "Too many total atoms");
 
 // add IDs for newly created atoms
 // check that atom IDs are valid
 
 	if (atom->tag_enable)
 		atom->tag_extend();
 	atom->tag_check();
 
 // create global mapping of atoms
 // zero nghost in case are adding new atoms to existing atoms
 
 	if (atom->map_style) {
 		atom->nghost = 0;
 		atom->map_init();
 		atom->map_set();
 	}
 
 // print status
 	if (comm->me == 0) {
 		if (screen) {
 			printf("... fix smd/wall_surface finished reading triangulated surface\n");
 			fprintf(screen, "fix smd/wall_surface created " BIGINT_FORMAT " atoms\n", atom->natoms - natoms_previous);
 			printf(">>========>>========>>========>>========>>========>>========>>========>>========\n");
 		}
 		if (logfile) {
 			fprintf(logfile, "... fix smd/wall_surface finished reading triangulated surface\n");
 			fprintf(logfile, "fix smd/wall_surface created " BIGINT_FORMAT " atoms\n", atom->natoms - natoms_previous);
 			fprintf(logfile, ">>========>>========>>========>>========>>========>>========>>========>>========\n");
 		}
 	}
 
 	delete[] vert;
 	fclose(fp);
 }
 
diff --git a/src/USER-SMD/pair_smd_hertz.cpp b/src/USER-SMD/pair_smd_hertz.cpp
index 3c7d7cffb..76143d639 100644
--- a/src/USER-SMD/pair_smd_hertz.cpp
+++ b/src/USER-SMD/pair_smd_hertz.cpp
@@ -1,385 +1,385 @@
 /* ----------------------------------------------------------------------
  *
  *                    *** Smooth Mach Dynamics ***
  *
  * This file is part of the USER-SMD package for LAMMPS.
  * Copyright (2014) Georg C. Ganzenmueller, georg.ganzenmueller@emi.fhg.de
  * Fraunhofer Ernst-Mach Institute for High-Speed Dynamics, EMI,
  * Eckerstrasse 4, D-79104 Freiburg i.Br, Germany.
  *
  * ----------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
  LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
  http://lammps.sandia.gov, Sandia National Laboratories
  Steve Plimpton, sjplimp@sandia.gov
 
  Copyright (2003) Sandia Corporation.  Under the terms of Contract
  DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
  certain rights in this software.  This software is distributed under
  the GNU General Public License.
 
  See the README file in the top-level LAMMPS directory.
  ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
- Contributing author: Mike Parks (SNL)
- ------------------------------------------------------------------------- */
+   Contributing author: Mike Parks (SNL)
+------------------------------------------------------------------------- */
 
 #include <math.h>
 #include <float.h>
 #include <stdlib.h>
 #include <string.h>
 #include "pair_smd_hertz.h"
 #include "atom.h"
 #include "domain.h"
 #include "force.h"
 #include "update.h"
 #include "modify.h"
 #include "fix.h"
 #include "comm.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define SQRT2 1.414213562e0
 
 /* ---------------------------------------------------------------------- */
 
 PairHertz::PairHertz(LAMMPS *lmp) :
 		Pair(lmp) {
 
 	onerad_dynamic = onerad_frozen = maxrad_dynamic = maxrad_frozen = NULL;
 	bulkmodulus = NULL;
 	kn = NULL;
 	scale = 1.0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairHertz::~PairHertz() {
 
 	if (allocated) {
 		memory->destroy(setflag);
 		memory->destroy(cutsq);
 		memory->destroy(bulkmodulus);
 		memory->destroy(kn);
 
 		delete[] onerad_dynamic;
 		delete[] onerad_frozen;
 		delete[] maxrad_dynamic;
 		delete[] maxrad_frozen;
 	}
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairHertz::compute(int eflag, int vflag) {
 	int i, j, ii, jj, inum, jnum, itype, jtype;
 	double xtmp, ytmp, ztmp, delx, dely, delz;
 	double rsq, r, evdwl, fpair;
 	int *ilist, *jlist, *numneigh, **firstneigh;
 	double rcut, r_geom, delta, ri, rj, dt_crit;
 	double *rmass = atom->rmass;
 
 	evdwl = 0.0;
 	if (eflag || vflag)
 		ev_setup(eflag, vflag);
 	else
 		evflag = vflag_fdotr = 0;
 
 	double **f = atom->f;
 	double **x = atom->x;
 	double **x0 = atom->x0;
 	int *type = atom->type;
 	int nlocal = atom->nlocal;
 	double *radius = atom->contact_radius;
 	double *sph_radius = atom->radius;
 	double rcutSq;
 	double delx0, dely0, delz0, rSq0, sphCut;
 
 	int newton_pair = force->newton_pair;
 	int periodic = (domain->xperiodic || domain->yperiodic || domain->zperiodic);
 
 	inum = list->inum;
 	ilist = list->ilist;
 	numneigh = list->numneigh;
 	firstneigh = list->firstneigh;
 
 	stable_time_increment = 1.0e22;
 
 	// loop over neighbors of my atoms
 	for (ii = 0; ii < inum; ii++) {
 		i = ilist[ii];
 		xtmp = x[i][0];
 		ytmp = x[i][1];
 		ztmp = x[i][2];
 		itype = type[i];
 		ri = scale * radius[i];
 		jlist = firstneigh[i];
 		jnum = numneigh[i];
 
 		for (jj = 0; jj < jnum; jj++) {
 			j = jlist[jj];
 			j &= NEIGHMASK;
 
 			jtype = type[j];
 
 			delx = xtmp - x[j][0];
 			dely = ytmp - x[j][1];
 			delz = ztmp - x[j][2];
 
 			rsq = delx * delx + dely * dely + delz * delz;
 
 			rj = scale * radius[j];
 			rcut = ri + rj;
 			rcutSq = rcut * rcut;
 
 			if (rsq < rcutSq) {
 
 				/*
 				 * self contact option:
 				 * if pair of particles was initially close enough to interact via a bulk continuum mechanism (e.g. SPH), exclude pair from contact forces.
 				 * this approach should work well if no updates of the reference configuration are performed.
 				 */
 
 				if (itype == jtype) {
 					delx0 = x0[j][0] - x0[i][0];
 					dely0 = x0[j][1] - x0[i][1];
 					delz0 = x0[j][2] - x0[i][2];
 					if (periodic) {
 						domain->minimum_image(delx0, dely0, delz0);
 					}
 					rSq0 = delx0 * delx0 + dely0 * dely0 + delz0 * delz0; // initial distance
 					sphCut = sph_radius[i] + sph_radius[j];
 					if (rSq0 < sphCut * sphCut) {
 						rcut = 0.5 * rcut;
 						rcutSq = rcut * rcut;
 						if (rsq > rcutSq) {
 							continue;
 						}
 					}
 				}
 
 				r = sqrt(rsq);
 				//printf("hertz interaction, r=%f, cut=%f, h=%f\n", r, rcut, sqrt(rSq0));
 
 				// Hertzian short-range forces
 				delta = rcut - r; // overlap distance
 				r_geom = ri * rj / rcut;
 				//assuming poisson ratio = 1/4 for 3d
 				fpair = 1.066666667e0 * bulkmodulus[itype][jtype] * delta * sqrt(delta * r_geom); //  units: N
 				evdwl = fpair * 0.4e0 * delta; // GCG 25 April: this expression conserves total energy
 				dt_crit = 3.14 * sqrt(0.5 * (rmass[i] + rmass[j]) / (fpair / delta));
 
 				stable_time_increment = MIN(stable_time_increment, dt_crit);
 				if (r > 2.0e-16) {
 					fpair /= r; // divide by r and multiply with non-normalized distance vector
 				} else {
 					fpair = 0.0;
 				}
 
 				/*
 				 * contact viscosity -- needs to be done, see GRANULAR package for normal & shear damping
 				 * for now: no damping and thus no viscous energy deltaE
 				 */
 
 				if (evflag) {
 					ev_tally(i, j, nlocal, newton_pair, evdwl, 0.0, fpair, delx, dely, delz);
 				}
 
 				f[i][0] += delx * fpair;
 				f[i][1] += dely * fpair;
 				f[i][2] += delz * fpair;
 
 				if (newton_pair || j < nlocal) {
 					f[j][0] -= delx * fpair;
 					f[j][1] -= dely * fpair;
 					f[j][2] -= delz * fpair;
 				}
 
 			}
 		}
 	}
 
 //	double stable_time_increment_all = 0.0;
 //	MPI_Allreduce(&stable_time_increment, &stable_time_increment_all, 1, MPI_DOUBLE, MPI_MIN, world);
 //	if (comm->me == 0) {
 //		printf("stable time step for pair smd/hertz is %f\n", stable_time_increment_all);
 //	}
 }
 
 /* ----------------------------------------------------------------------
  allocate all arrays
  ------------------------------------------------------------------------- */
 
 void PairHertz::allocate() {
 	allocated = 1;
 	int n = atom->ntypes;
 
 	memory->create(setflag, n + 1, n + 1, "pair:setflag");
 	for (int i = 1; i <= n; i++)
 		for (int j = i; j <= n; j++)
 			setflag[i][j] = 0;
 
 	memory->create(bulkmodulus, n + 1, n + 1, "pair:kspring");
 	memory->create(kn, n + 1, n + 1, "pair:kn");
 
 	memory->create(cutsq, n + 1, n + 1, "pair:cutsq"); // always needs to be allocated, even with granular neighborlist
 
 	onerad_dynamic = new double[n + 1];
 	onerad_frozen = new double[n + 1];
 	maxrad_dynamic = new double[n + 1];
 	maxrad_frozen = new double[n + 1];
 }
 
 /* ----------------------------------------------------------------------
  global settings
  ------------------------------------------------------------------------- */
 
 void PairHertz::settings(int narg, char **arg) {
 	if (narg != 1)
 		error->all(FLERR, "Illegal number of args for pair_style hertz");
 
 	scale = force->numeric(FLERR, arg[0]);
 	if (comm->me == 0) {
 		printf("\n>>========>>========>>========>>========>>========>>========>>========>>========\n");
 		printf("SMD/HERTZ CONTACT SETTINGS:\n");
 		printf("... effective contact radius is scaled by %f\n", scale);
 		printf(">>========>>========>>========>>========>>========>>========>>========>>========\n");
 	}
 
 }
 
 /* ----------------------------------------------------------------------
  set coeffs for one or more type pairs
  ------------------------------------------------------------------------- */
 
 void PairHertz::coeff(int narg, char **arg) {
 	if (narg != 3)
 		error->all(FLERR, "Incorrect args for pair coefficients");
 	if (!allocated)
 		allocate();
 
 	int ilo, ihi, jlo, jhi;
 	force->bounds(FLERR,arg[0], atom->ntypes, ilo, ihi);
 	force->bounds(FLERR,arg[1], atom->ntypes, jlo, jhi);
 
 	double bulkmodulus_one = atof(arg[2]);
 
 	// set short-range force constant
 	double kn_one = 0.0;
 	if (domain->dimension == 3) {
 		kn_one = (16. / 15.) * bulkmodulus_one; //assuming poisson ratio = 1/4 for 3d
 	} else {
 		kn_one = 0.251856195 * (2. / 3.) * bulkmodulus_one; //assuming poisson ratio = 1/3 for 2d
 	}
 
 	int count = 0;
 	for (int i = ilo; i <= ihi; i++) {
 		for (int j = MAX(jlo, i); j <= jhi; j++) {
 			bulkmodulus[i][j] = bulkmodulus_one;
 			kn[i][j] = kn_one;
 			setflag[i][j] = 1;
 			count++;
 		}
 	}
 
 	if (count == 0)
 		error->all(FLERR, "Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
  init for one type pair i,j and corresponding j,i
  ------------------------------------------------------------------------- */
 
 double PairHertz::init_one(int i, int j) {
 
 	if (!allocated)
 		allocate();
 
 	if (setflag[i][j] == 0)
 		error->all(FLERR, "All pair coeffs are not set");
 
 	bulkmodulus[j][i] = bulkmodulus[i][j];
 	kn[j][i] = kn[i][j];
 
 	// cutoff = sum of max I,J radii for
 	// dynamic/dynamic & dynamic/frozen interactions, but not frozen/frozen
 
 	double cutoff = maxrad_dynamic[i] + maxrad_dynamic[j];
 	cutoff = MAX(cutoff, maxrad_frozen[i] + maxrad_dynamic[j]);
 	cutoff = MAX(cutoff, maxrad_dynamic[i] + maxrad_frozen[j]);
 
 	if (comm->me == 0) {
 		printf("cutoff for pair smd/hertz = %f\n", cutoff);
 	}
 	return cutoff;
 }
 
 /* ----------------------------------------------------------------------
  init specific to this pair style
  ------------------------------------------------------------------------- */
 
 void PairHertz::init_style() {
 	int i;
 
 	// error checks
 
 	if (!atom->contact_radius_flag)
 		error->all(FLERR, "Pair style smd/hertz requires atom style with contact_radius");
 
 	int irequest = neighbor->request(this);
 	neighbor->requests[irequest]->half = 0;
 	neighbor->requests[irequest]->gran = 1;
 
 	// set maxrad_dynamic and maxrad_frozen for each type
 	// include future Fix pour particles as dynamic
 
 	for (i = 1; i <= atom->ntypes; i++)
 		onerad_dynamic[i] = onerad_frozen[i] = 0.0;
 
 	double *radius = atom->radius;
 	int *type = atom->type;
 	int nlocal = atom->nlocal;
 
 	for (i = 0; i < nlocal; i++) {
 		onerad_dynamic[type[i]] = MAX(onerad_dynamic[type[i]], radius[i]);
 	}
 
 	MPI_Allreduce(&onerad_dynamic[1], &maxrad_dynamic[1], atom->ntypes, MPI_DOUBLE, MPI_MAX, world);
 	MPI_Allreduce(&onerad_frozen[1], &maxrad_frozen[1], atom->ntypes, MPI_DOUBLE, MPI_MAX, world);
 }
 
 /* ----------------------------------------------------------------------
  neighbor callback to inform pair style of neighbor list to use
  optional granular history list
  ------------------------------------------------------------------------- */
 
 void PairHertz::init_list(int id, NeighList *ptr) {
 	if (id == 0)
 		list = ptr;
 }
 
 /* ----------------------------------------------------------------------
  memory usage of local atom-based arrays
  ------------------------------------------------------------------------- */
 
 double PairHertz::memory_usage() {
 
 	return 0.0;
 }
 
 void *PairHertz::extract(const char *str, int &i) {
 	//printf("in PairTriSurf::extract\n");
 	if (strcmp(str, "smd/hertz/stable_time_increment_ptr") == 0) {
 		return (void *) &stable_time_increment;
 	}
 
 	return NULL;
 
 }
diff --git a/src/USER-SMD/pair_smd_triangulated_surface.cpp b/src/USER-SMD/pair_smd_triangulated_surface.cpp
index 8410f2ec0..b4e63dd11 100644
--- a/src/USER-SMD/pair_smd_triangulated_surface.cpp
+++ b/src/USER-SMD/pair_smd_triangulated_surface.cpp
@@ -1,846 +1,846 @@
 /* ----------------------------------------------------------------------
  *
  *                    *** Smooth Mach Dynamics ***
  *
  * This file is part of the USER-SMD package for LAMMPS.
  * Copyright (2014) Georg C. Ganzenmueller, georg.ganzenmueller@emi.fhg.de
  * Fraunhofer Ernst-Mach Institute for High-Speed Dynamics, EMI,
  * Eckerstrasse 4, D-79104 Freiburg i.Br, Germany.
  *
  * ----------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
  LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
  http://lammps.sandia.gov, Sandia National Laboratories
  Steve Plimpton, sjplimp@sandia.gov
 
  Copyright (2003) Sandia Corporation.  Under the terms of Contract
  DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
  certain rights in this software.  This software is distributed under
  the GNU General Public License.
 
  See the README file in the top-level LAMMPS directory.
  ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
- Contributing author: Mike Parks (SNL)
- ------------------------------------------------------------------------- */
+   Contributing author: Mike Parks (SNL)
+------------------------------------------------------------------------- */
 
 #include <math.h>
 #include <float.h>
 #include <stdlib.h>
 #include <string.h>
 #include "pair_smd_triangulated_surface.h"
 #include "atom.h"
 #include "domain.h"
 #include "force.h"
 #include "update.h"
 #include "modify.h"
 #include "fix.h"
 #include "comm.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "memory.h"
 #include "error.h"
 #include <Eigen/Eigen>
 #include <stdio.h>
 #include <iostream>
 
 using namespace std;
 using namespace LAMMPS_NS;
 using namespace Eigen;
 
 #define SQRT2 1.414213562e0
 
 /* ---------------------------------------------------------------------- */
 
 PairTriSurf::PairTriSurf(LAMMPS *lmp) :
 		Pair(lmp) {
 
 	onerad_dynamic = onerad_frozen = maxrad_dynamic = maxrad_frozen = NULL;
 	bulkmodulus = NULL;
 	kn = NULL;
 	scale = 1.0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairTriSurf::~PairTriSurf() {
 
 	if (allocated) {
 		memory->destroy(setflag);
 		memory->destroy(cutsq);
 		memory->destroy(bulkmodulus);
 		memory->destroy(kn);
 
 		delete[] onerad_dynamic;
 		delete[] onerad_frozen;
 		delete[] maxrad_dynamic;
 		delete[] maxrad_frozen;
 	}
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairTriSurf::compute(int eflag, int vflag) {
 	int i, j, ii, jj, inum, jnum, itype, jtype;
 	double rsq, r, evdwl, fpair;
 	int *ilist, *jlist, *numneigh, **firstneigh;
 	double rcut, r_geom, delta, r_tri, r_particle, touch_distance, dt_crit;
 	int tri, particle;
 	Vector3d normal, x1, x2, x3, x4, x13, x23, x43, w, cp, x4cp, vnew, v_old;
 	;
 	Vector3d xi, x_center, dx;
 	Matrix2d C;
 	Vector2d w2d, rhs;
 
 	evdwl = 0.0;
 	if (eflag || vflag)
 		ev_setup(eflag, vflag);
 	else
 		evflag = vflag_fdotr = 0;
 
 	tagint *mol = atom->molecule;
 	double **f = atom->f;
 	double **smd_data_9 = atom->smd_data_9;
 	double **x = atom->x;
 	double **x0 = atom->x0;
 	double **v = atom->v;
 	double *rmass = atom->rmass;
 	int *type = atom->type;
 	int nlocal = atom->nlocal;
 	double *radius = atom->contact_radius;
 	double rcutSq;
 	Vector3d offset;
 
 	int newton_pair = force->newton_pair;
 	int periodic = (domain->xperiodic || domain->yperiodic || domain->zperiodic);
 
 	inum = list->inum;
 	ilist = list->ilist;
 	numneigh = list->numneigh;
 	firstneigh = list->firstneigh;
 
 	int max_neighs = 0;
 	stable_time_increment = 1.0e22;
 
 	// loop over neighbors of my atoms using a half neighbor list
 	for (ii = 0; ii < inum; ii++) {
 		i = ilist[ii];
 		itype = type[i];
 		jlist = firstneigh[i];
 		jnum = numneigh[i];
 		max_neighs = MAX(max_neighs, jnum);
 
 		for (jj = 0; jj < jnum; jj++) {
 			j = jlist[jj];
 
 			j &= NEIGHMASK;
 
 			jtype = type[j];
 
 			/*
 			 * decide which one of i, j is triangle and which is particle
 			 */
 			if ((mol[i] < 65535) && (mol[j] >= 65535)) {
 				particle = i;
 				tri = j;
 			} else if ((mol[j] < 65535) && (mol[i] >= 65535)) {
 				particle = j;
 				tri = i;
 			} else {
 				error->one(FLERR, "unknown case");
 			}
 
 			//x_center << x[tri][0], x[tri][1], x[tri][2]; // center of triangle
 			x_center(0) = x[tri][0];
 			x_center(1) = x[tri][1];
 			x_center(2) = x[tri][2];
 			//x4 << x[particle][0], x[particle][1], x[particle][2];
 			x4(0) = x[particle][0];
 			x4(1) = x[particle][1];
 			x4(2) = x[particle][2];
 			dx = x_center - x4; //
 			if (periodic) {
 				domain->minimum_image(dx(0), dx(1), dx(2));
 			}
 			rsq = dx.squaredNorm();
 
 			r_tri = scale * radius[tri];
 			r_particle = scale * radius[particle];
 			rcut = r_tri + r_particle;
 			rcutSq = rcut * rcut;
 
 			//printf("type i=%d, type j=%d, r=%f, ri=%f, rj=%f\n", itype, jtype, sqrt(rsq), ri, rj);
 
 			if (rsq < rcutSq) {
 
 				/*
 				 * gather triangle information
 				 */
 				normal(0) = x0[tri][0];
 				normal(1) = x0[tri][1];
 				normal(2) = x0[tri][2];
 
 				/*
 				 * distance check: is particle closer than its radius to the triangle plane?
 				 */
 				if (fabs(dx.dot(normal)) < radius[particle]) {
 					/*
 					 * get other two triangle vertices
 					 */
 					x1(0) = smd_data_9[tri][0];
 					x1(1) = smd_data_9[tri][1];
 					x1(2) = smd_data_9[tri][2];
 					x2(0) = smd_data_9[tri][3];
 					x2(1) = smd_data_9[tri][4];
 					x2(2) = smd_data_9[tri][5];
 					x3(0) = smd_data_9[tri][6];
 					x3(1) = smd_data_9[tri][7];
 					x3(2) = smd_data_9[tri][8];
 
 					PointTriangleDistance(x4, x1, x2, x3, cp, r);
 
 					/*
 					 * distance to closest point
 					 */
 					x4cp = x4 - cp;
 
 					/*
 					 * flip normal to point in direction of x4cp
 					 */
 
 					if (x4cp.dot(normal) < 0.0) {
 						normal *= -1.0;
 					}
 
 					/*
 					 * penalty force pushes particle away from triangle
 					 */
 					if (r < 1.0 * radius[particle]) {
 
 						delta = radius[particle] - r; // overlap distance
 						r_geom = radius[particle];
 						fpair = 1.066666667e0 * bulkmodulus[itype][jtype] * delta * sqrt(delta * r_geom);
 						dt_crit = 3.14 * sqrt(rmass[particle] / (fpair / delta));
 						stable_time_increment = MIN(stable_time_increment, dt_crit);
 
 						evdwl = r * fpair * 0.4e0 * delta; // GCG 25 April: this expression conserves total energy
 
 						fpair /= (r + 1.0e-2 * radius[particle]); // divide by r + softening and multiply with non-normalized distance vector
 
 						if (particle < nlocal) {
 							f[particle][0] += x4cp(0) * fpair;
 							f[particle][1] += x4cp(1) * fpair;
 							f[particle][2] += x4cp(2) * fpair;
 						}
 
 						if (tri < nlocal) {
 							f[tri][0] -= x4cp(0) * fpair;
 							f[tri][1] -= x4cp(1) * fpair;
 							f[tri][2] -= x4cp(2) * fpair;
 						}
 
 						if (evflag) {
 							ev_tally(i, j, nlocal, newton_pair, evdwl, 0.0, fpair, x4cp(0), x4cp(1), x4cp(2));
 						}
 
 					}
 
 					/*
 					 * if particle comes too close to triangle, reflect its velocity and explicitely move it away
 					 */
 
 					touch_distance = 1.0 * radius[particle];
 					if (r < touch_distance) {
 
 						/*
 						 * reflect velocity if it points toward triangle
 						 */
 
 						normal = x4cp / r;
 
 						//v_old << v[particle][0], v[particle][1], v[particle][2];
 						v_old(0) = v[particle][0];
 						v_old(1) = v[particle][1];
 						v_old(2) = v[particle][2];
 						if (v_old.dot(normal) < 0.0) {
 							//printf("flipping velocity\n");
 							vnew = 1.0 * (-2.0 * v_old.dot(normal) * normal + v_old);
 							v[particle][0] = vnew(0);
 							v[particle][1] = vnew(1);
 							v[particle][2] = vnew(2);
 						}
 
 						//printf("moving particle on top of triangle\n");
 						x[particle][0] = cp(0) + touch_distance * normal(0);
 						x[particle][1] = cp(1) + touch_distance * normal(1);
 						x[particle][2] = cp(2) + touch_distance * normal(2);
 					}
 
 				}
 			}
 		}
 	}
 
 //	int max_neighs_all = 0;
 //	MPI_Allreduce(&max_neighs, &max_neighs_all, 1, MPI_INT, MPI_MAX, world);
 //	if (comm->me == 0) {
 //		printf("max. neighs in tri pair is %d\n", max_neighs_all);
 //	}
 //
 //		double stable_time_increment_all = 0.0;
 //		MPI_Allreduce(&stable_time_increment, &stable_time_increment_all, 1, MPI_DOUBLE, MPI_MIN, world);
 //		if (comm->me == 0) {
 //			printf("stable time step tri pair is %f\n", stable_time_increment_all);
 //		}
 }
 
 /* ----------------------------------------------------------------------
  allocate all arrays
  ------------------------------------------------------------------------- */
 
 void PairTriSurf::allocate() {
 	allocated = 1;
 	int n = atom->ntypes;
 
 	memory->create(setflag, n + 1, n + 1, "pair:setflag");
 	for (int i = 1; i <= n; i++)
 		for (int j = i; j <= n; j++)
 			setflag[i][j] = 0;
 
 	memory->create(bulkmodulus, n + 1, n + 1, "pair:kspring");
 	memory->create(kn, n + 1, n + 1, "pair:kn");
 
 	memory->create(cutsq, n + 1, n + 1, "pair:cutsq"); // always needs to be allocated, even with granular neighborlist
 
 	onerad_dynamic = new double[n + 1];
 	onerad_frozen = new double[n + 1];
 	maxrad_dynamic = new double[n + 1];
 	maxrad_frozen = new double[n + 1];
 }
 
 /* ----------------------------------------------------------------------
  global settings
  ------------------------------------------------------------------------- */
 
 void PairTriSurf::settings(int narg, char **arg) {
 	if (narg != 1)
 		error->all(FLERR, "Illegal number of args for pair_style smd/tri_surface");
 
 	scale = force->numeric(FLERR, arg[0]);
 	if (comm->me == 0) {
 		printf("\n>>========>>========>>========>>========>>========>>========>>========>>========\n");
 		printf("SMD/TRI_SURFACE CONTACT SETTINGS:\n");
 		printf("... effective contact radius is scaled by %f\n", scale);
 		printf(">>========>>========>>========>>========>>========>>========>>========>>========\n");
 	}
 
 }
 
 /* ----------------------------------------------------------------------
  set coeffs for one or more type pairs
  ------------------------------------------------------------------------- */
 
 void PairTriSurf::coeff(int narg, char **arg) {
 	if (narg != 3)
 		error->all(FLERR, "Incorrect args for pair coefficients");
 	if (!allocated)
 		allocate();
 
 	int ilo, ihi, jlo, jhi;
 	force->bounds(FLERR,arg[0], atom->ntypes, ilo, ihi);
 	force->bounds(FLERR,arg[1], atom->ntypes, jlo, jhi);
 
 	double bulkmodulus_one = atof(arg[2]);
 
 	// set short-range force constant
 	double kn_one = 0.0;
 	if (domain->dimension == 3) {
 		kn_one = (16. / 15.) * bulkmodulus_one; //assuming poisson ratio = 1/4 for 3d
 	} else {
 		kn_one = 0.251856195 * (2. / 3.) * bulkmodulus_one; //assuming poisson ratio = 1/3 for 2d
 	}
 
 	int count = 0;
 	for (int i = ilo; i <= ihi; i++) {
 		for (int j = MAX(jlo, i); j <= jhi; j++) {
 			bulkmodulus[i][j] = bulkmodulus_one;
 			kn[i][j] = kn_one;
 			setflag[i][j] = 1;
 			count++;
 		}
 	}
 
 	if (count == 0)
 		error->all(FLERR, "Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
  init for one type pair i,j and corresponding j,i
  ------------------------------------------------------------------------- */
 
 double PairTriSurf::init_one(int i, int j) {
 
 	if (!allocated)
 		allocate();
 
 	if (setflag[i][j] == 0)
 		error->all(FLERR, "All pair coeffs are not set");
 
 	bulkmodulus[j][i] = bulkmodulus[i][j];
 	kn[j][i] = kn[i][j];
 
 	// cutoff = sum of max I,J radii for
 	// dynamic/dynamic & dynamic/frozen interactions, but not frozen/frozen
 
 	double cutoff = maxrad_dynamic[i] + maxrad_dynamic[j];
 	cutoff = MAX(cutoff, maxrad_frozen[i] + maxrad_dynamic[j]);
 	cutoff = MAX(cutoff, maxrad_dynamic[i] + maxrad_frozen[j]);
 
 	if (comm->me == 0) {
 		printf("cutoff for pair smd/smd/tri_surface = %f\n", cutoff);
 	}
 	return cutoff;
 }
 
 /* ----------------------------------------------------------------------
  init specific to this pair style
  ------------------------------------------------------------------------- */
 
 void PairTriSurf::init_style() {
 	int i;
 
 	// error checks
 
 	if (!atom->contact_radius_flag)
 		error->all(FLERR, "Pair style smd/smd/tri_surface requires atom style with contact_radius");
 
 	// old: half list
 	int irequest = neighbor->request(this);
 	neighbor->requests[irequest]->half = 0;
 	neighbor->requests[irequest]->gran = 1;
 
 	// need a full neighbor list
 //	int irequest = neighbor->request(this);
 //	neighbor->requests[irequest]->half = 0;
 //	neighbor->requests[irequest]->full = 1;
 
 	// set maxrad_dynamic and maxrad_frozen for each type
 	// include future Fix pour particles as dynamic
 
 	for (i = 1; i <= atom->ntypes; i++)
 		onerad_dynamic[i] = onerad_frozen[i] = 0.0;
 
 	double *radius = atom->radius;
 	int *type = atom->type;
 	int nlocal = atom->nlocal;
 
 	for (i = 0; i < nlocal; i++) {
 		onerad_dynamic[type[i]] = MAX(onerad_dynamic[type[i]], radius[i]);
 	}
 
 	MPI_Allreduce(&onerad_dynamic[1], &maxrad_dynamic[1], atom->ntypes, MPI_DOUBLE, MPI_MAX, world);
 	MPI_Allreduce(&onerad_frozen[1], &maxrad_frozen[1], atom->ntypes, MPI_DOUBLE, MPI_MAX, world);
 }
 
 /* ----------------------------------------------------------------------
  neighbor callback to inform pair style of neighbor list to use
  optional granular history list
  ------------------------------------------------------------------------- */
 
 void PairTriSurf::init_list(int id, NeighList *ptr) {
 	if (id == 0)
 		list = ptr;
 }
 
 /* ----------------------------------------------------------------------
  memory usage of local atom-based arrays
  ------------------------------------------------------------------------- */
 
 double PairTriSurf::memory_usage() {
 
 	return 0.0;
 }
 
 /*
  * distance between triangle and point
  */
 /*
  function [dist,PP0] = pointTriangleDistance(TRI,P)
  % calculate distance between a point and a triangle in 3D
  % SYNTAX
  %   dist = pointTriangleDistance(TRI,P)
  %   [dist,PP0] = pointTriangleDistance(TRI,P)
  %
  % DESCRIPTION
  %   Calculate the distance of a given point P from a triangle TRI.
  %   Point P is a row vector of the form 1x3. The triangle is a matrix
  %   formed by three rows of points TRI = [P1;P2;P3] each of size 1x3.
  %   dist = pointTriangleDistance(TRI,P) returns the distance of the point P
  %   to the triangle TRI.
  %   [dist,PP0] = pointTriangleDistance(TRI,P) additionally returns the
  %   closest point PP0 to P on the triangle TRI.
  %
  % Author: Gwendolyn Fischer
  % Release: 1.0
  % Release date: 09/02/02
  % Release: 1.1 Fixed Bug because of normalization
  % Release: 1.2 Fixed Bug because of typo in region 5 20101013
  % Release: 1.3 Fixed Bug because of typo in region 2 20101014
 
  % Possible extention could be a version tailored not to return the distance
  % and additionally the closest point, but instead return only the closest
  % point. Could lead to a small speed gain.
 
  % Example:
  % %% The Problem
  % P0 = [0.5 -0.3 0.5];
  %
  % P1 = [0 -1 0];
  % P2 = [1  0 0];
  % P3 = [0  0 0];
  %
  % vertices = [P1; P2; P3];
  % faces = [1 2 3];
  %
  % %% The Engine
  % [dist,PP0] = pointTriangleDistance([P1;P2;P3],P0);
  %
  % %% Visualization
  % [x,y,z] = sphere(20);
  % x = dist*x+P0(1);
  % y = dist*y+P0(2);
  % z = dist*z+P0(3);
  %
  % figure
  % hold all
  % patch('Vertices',vertices,'Faces',faces,'FaceColor','r','FaceAlpha',0.8);
  % plot3(P0(1),P0(2),P0(3),'b*');
  % plot3(PP0(1),PP0(2),PP0(3),'*g')
  % surf(x,y,z,'FaceColor','b','FaceAlpha',0.3)
  % view(3)
 
  % The algorithm is based on
  % "David Eberly, 'Distance Between Point and Triangle in 3D',
  % Geometric Tools, LLC, (1999)"
  % http:\\www.geometrictools.com/Documentation/DistancePoint3Triangle3.pdf
  %
  %        ^t
  %  \     |
  %   \reg2|
  %    \   |
  %     \  |
  %      \ |
  %       \|
  %        *P2
  %        |\
 %        | \
 %  reg3  |  \ reg1
  %        |   \
 %        |reg0\
 %        |     \
 %        |      \ P1
  % -------*-------*------->s
  %        |P0      \
 %  reg4  | reg5    \ reg6
  */
 
 //void PairTriSurf::PointTriangleDistance(const Vector3d P, const Vector3d TRI1, const Vector3d TRI2, const Vector3d TRI3,
 //		Vector3d &CP, double &dist) {
 //
 //	Vector3d B, E0, E1, D;
 //	double a, b, c, d, e, f;
 //	double det, s, t, sqrDistance, tmp0, tmp1, numer, denom, invDet;
 //
 //	// rewrite triangle in normal form
 //	B = TRI1;
 //	E0 = TRI2 - B;
 //	E1 = TRI3 - B;
 //
 //	D = B - P;
 //	a = E0.dot(E0);
 //	b = E0.dot(E1);
 //	c = E1.dot(E1);
 //	d = E0.dot(D);
 //	e = E1.dot(D);
 //	f = D.dot(D);
 //
 //	det = a * c - b * b;
 //	//% do we have to use abs here?
 //	s = b * e - c * d;
 //	t = b * d - a * e;
 //
 //	//% Terible tree of conditionals to determine in which region of the diagram
 //	//% shown above the projection of the point into the triangle-plane lies.
 //	if ((s + t) <= det) {
 //		if (s < 0) {
 //			if (t < 0) {
 //				// %region4
 //				if (d < 0) {
 //					t = 0;
 //					if (-d >= a) {
 //						s = 1;
 //						sqrDistance = a + 2 * d + f;
 //					} else {
 //						s = -d / a;
 //						sqrDistance = d * s + f;
 //					}
 //				} else {
 //					s = 0;
 //					if (e >= 0) {
 //						t = 0;
 //						sqrDistance = f;
 //					} else {
 //						if (-e >= c) {
 //							t = 1;
 //							sqrDistance = c + 2 * e + f;
 //						} else {
 //							t = -e / c;
 //							sqrDistance = e * t + f;
 //						}
 //					}
 //				}
 //				// end % of region 4
 //			} else {
 //				// % region 3
 //				s = 0;
 //				if (e >= 0) {
 //					t = 0;
 //					sqrDistance = f;
 //				} else {
 //					if (-e >= c) {
 //						t = 1;
 //						sqrDistance = c + 2 * e + f;
 //					} else {
 //						t = -e / c;
 //						sqrDistance = e * t + f;
 //					}
 //				}
 //			}
 //			// end of region 3
 //		} else {
 //			if (t < 0) {
 //				//% region 5
 //				t = 0;
 //				if (d >= 0) {
 //					s = 0;
 //					sqrDistance = f;
 //				} else {
 //					if (-d >= a) {
 //						s = 1;
 //						sqrDistance = a + 2 * d + f;
 //					} else {
 //						s = -d / a;
 //						sqrDistance = d * s + f;
 //					}
 //				}
 //			} else {
 //				// region 0
 //				invDet = 1 / det;
 //				s = s * invDet;
 //				t = t * invDet;
 //				sqrDistance = s * (a * s + b * t + 2 * d) + t * (b * s + c * t + 2 * e) + f;
 //			}
 //		}
 //	} else {
 //		if (s < 0) {
 //			// % region 2
 //			tmp0 = b + d;
 //			tmp1 = c + e;
 //			if (tmp1 > tmp0) { //% minimum on edge s+t=1
 //				numer = tmp1 - tmp0;
 //				denom = a - 2 * b + c;
 //				if (numer >= denom) {
 //					s = 1;
 //					t = 0;
 //					sqrDistance = a + 2 * d + f;
 //				} else {
 //					s = numer / denom;
 //					t = 1 - s;
 //					sqrDistance = s * (a * s + b * t + 2 * d) + t * (b * s + c * t + 2 * e) + f;
 //				}
 //			} else
 //				// % minimum on edge s=0
 //				s = 0;
 //			if (tmp1 <= 0) {
 //				t = 1;
 //				sqrDistance = c + 2 * e + f;
 //			} else {
 //				if (e >= 0) {
 //					t = 0;
 //					sqrDistance = f;
 //				} else {
 //					t = -e / c;
 //					sqrDistance = e * t + f;
 //				}
 //			}
 //		} //end % of region	2
 //		else {
 //			if (t < 0) {
 //				// %region6
 //				tmp0 = b + e;
 //				tmp1 = a + d;
 //				if (tmp1 > tmp0) {
 //					numer = tmp1 - tmp0;
 //					denom = a - 2 * b + c;
 //					if (numer >= denom) {
 //						t = 1;
 //						s = 0;
 //						sqrDistance = c + 2 * e + f;
 //					} else {
 //						t = numer / denom;
 //						s = 1 - t;
 //						sqrDistance = s * (a * s + b * t + 2 * d) + t * (b * s + c * t + 2 * e) + f;
 //					}
 //				} else {
 //					t = 0;
 //					if (tmp1 <= 0) {
 //						s = 1;
 //						sqrDistance = a + 2 * d + f;
 //					} else {
 //						if (d >= 0) {
 //							s = 0;
 //							sqrDistance = f;
 //						} else {
 //							s = -d / a;
 //							sqrDistance = d * s + f;
 //						}
 //					}
 //				} // % end region 6
 //			} else {
 //				//% region 1
 //				numer = c + e - b - d;
 //				if (numer <= 0) {
 //					s = 0;
 //					t = 1;
 //					sqrDistance = c + 2 * e + f;
 //				} else {
 //					denom = a - 2 * b + c;
 //					if (numer >= denom) {
 //						s = 1;
 //						t = 0;
 //						sqrDistance = a + 2 * d + f;
 //					} else {
 //						s = numer / denom;
 //						t = 1 - s;
 //						sqrDistance = s * (a * s + b * t + 2 * d) + t * (b * s + c * t + 2 * e) + f;
 //					}
 //				} //% end of region 1
 //			}
 //		}
 //	}
 //
 //	// % account for numerical round-off error
 //	if (sqrDistance < 0) {
 //		sqrDistance = 0;
 //	}
 //
 //	dist = sqrt(sqrDistance);
 //
 //	// closest point
 //	CP = B + s * E0 + t * E1;
 //
 //}
 /*
  * % The algorithm is based on
  % "David Eberly, 'Distance Between Point and Triangle in 3D',
  % Geometric Tools, LLC, (1999)"
  % http:\\www.geometrictools.com/Documentation/DistancePoint3Triangle3.pdf
  */
 
 void PairTriSurf::PointTriangleDistance(const Vector3d sourcePosition, const Vector3d TRI0, const Vector3d TRI1,
 		const Vector3d TRI2, Vector3d &CP, double &dist) {
 
 	Vector3d edge0 = TRI1 - TRI0;
 	Vector3d edge1 = TRI2 - TRI0;
 	Vector3d v0 = TRI0 - sourcePosition;
 
 	double a = edge0.dot(edge0);
 	double b = edge0.dot(edge1);
 	double c = edge1.dot(edge1);
 	double d = edge0.dot(v0);
 	double e = edge1.dot(v0);
 
 	double det = a * c - b * b;
 	double s = b * e - c * d;
 	double t = b * d - a * e;
 
 	if (s + t < det) {
 		if (s < 0.f) {
 			if (t < 0.f) {
 				if (d < 0.f) {
 					s = clamp(-d / a, 0.f, 1.f);
 					t = 0.f;
 				} else {
 					s = 0.f;
 					t = clamp(-e / c, 0.f, 1.f);
 				}
 			} else {
 				s = 0.f;
 				t = clamp(-e / c, 0.f, 1.f);
 			}
 		} else if (t < 0.f) {
 			s = clamp(-d / a, 0.f, 1.f);
 			t = 0.f;
 		} else {
 			float invDet = 1.f / det;
 			s *= invDet;
 			t *= invDet;
 		}
 	} else {
 		if (s < 0.f) {
 			float tmp0 = b + d;
 			float tmp1 = c + e;
 			if (tmp1 > tmp0) {
 				float numer = tmp1 - tmp0;
 				float denom = a - 2 * b + c;
 				s = clamp(numer / denom, 0.f, 1.f);
 				t = 1 - s;
 			} else {
 				t = clamp(-e / c, 0.f, 1.f);
 				s = 0.f;
 			}
 		} else if (t < 0.f) {
 			if (a + d > b + e) {
 				float numer = c + e - b - d;
 				float denom = a - 2 * b + c;
 				s = clamp(numer / denom, 0.f, 1.f);
 				t = 1 - s;
 			} else {
 				s = clamp(-e / c, 0.f, 1.f);
 				t = 0.f;
 			}
 		} else {
 			float numer = c + e - b - d;
 			float denom = a - 2 * b + c;
 			s = clamp(numer / denom, 0.f, 1.f);
 			t = 1.f - s;
 		}
 	}
 
 	CP = TRI0 + s * edge0 + t * edge1;
 	dist = (CP - sourcePosition).norm();
 
 }
 
 double PairTriSurf::clamp(const double a, const double min, const double max) {
 	if (a < min) {
 		return min;
 	} else if (a > max) {
 		return max;
 	} else {
 		return a;
 	}
 }
 
 void *PairTriSurf::extract(const char *str, int &i) {
 	//printf("in PairTriSurf::extract\n");
 	if (strcmp(str, "smd/tri_surface/stable_time_increment_ptr") == 0) {
 		return (void *) &stable_time_increment;
 	}
 
 	return NULL;
 
 }